· 5 years ago · Aug 26, 2020, 05:20 PM
1#importing the modules that will be used in the script
2from gtts import gTTS #The TTS Module will be converting the output string to text to speech.
3import playsound #This will be importing the playsound module, which will give out the output of the fianl product.
4import speech_recognition as sr #The speech recognition module will try to get the speech use Google's
5import wolframalpha #imports the wolframalpha module, which will be used for calculations and queries
6import os #imports the possibility to access local files and use them on the system scale
7import cv2 #imports the opencv model to access the camera instances
8import matplotlib.pyplot as plt #plotting the information from camera into graphzs for the other modules to interpret
9import cvlib as cv #computer vision library to detect what the camemra sees
10from cvlib.object_detection import draw_bbox #object detection class from the computer vision library
11import easyocr #easyocr to detect text from the image, uses machine learning in pytorch.
12
13number = 1 #describes the default user to see if it matches,
14#and takes it to retrive earlier information capture (still in beta)
15def call_to_assistant(output): #defining the statement to call to assistant
16
17 global number #calling the number in the global class, refer to line 13
18
19 # this will be renaming every new name that it gets using the number and then adding an integer to create a new file name
20 # this is in hope to remove any ambiguity between users and in order to organize the file numbers
21 number += 1 #refer to annotation on line 14
22 print("ABV Bot : ", output) #prints out the transcript of what is being said
23
24 toSpeak = gTTS(text = output, lang = 'en', slow = False) #Using the Google API to translate the string to TTS
25
26 filename = "answer.mp3" #save it in the following file and extension (simplified to answer.mp3 for demo purposes)
27 toSpeak.save(filename) #Saving the TTS file in the filename that was specified, its recommended to use an mp3 file to save the TTS
28
29 #playsound module used to play the aforementioned file, note the os module can also be used to open the file
30 #but this method proves faster times.
31 playsound.playsound(filename, True)
32 os.remove(filename) #after the file is done being used, remove the file in order to not take up a lot of space
33
34
35
36def get_audio(): #defining the function to gather the audio and to use the sr module
37
38 rObject = sr.Recognizer() #calling the speech recognition module to provide the output
39 audio = ''
40
41 with sr.Microphone() as source: #defining the source of the mic, this will use the internal mic as a default.
42 print("Start Speaking...") #tells the user when to start speaking
43
44 # recording the audio using speech recognition
45 audio = rObject.listen(source, phrase_time_limit = 5)
46 print("Stop Speaking.") #puts the limit at 5 seconds, this can be changed by changing the 5 to the relative seconds in an integer number.
47
48 try:
49
50 text = rObject.recognize_google(audio, language ='en_US') #parsing the audio through the Google API
51 print("Human : ", text) #furthering the transcript, this time displaying the input that is being taken
52 return text
53
54 except:
55
56 call_to_assistant("There was a problem parsing the audio. Please try again!") #if no audio is detected then this message is displayed.
57 return 0
58
59def process_text(input): #this will define the processing of the text, i.e: the commands which will be used
60 try:
61 #defining the 'who are you' commands, so that the bot can introduce iteself to the user in question.
62 if "who are you" in input or "what do you do" in input or "what is your name" in input:
63 speak = '''Hi, I am A B V bot, I was made as an initiative to ease the process of giving
64 answers to queries for the blind. You can read more about me in the portfolio of Nivyan Lakhani.'''
65 call_to_assistant(speak) # <-- calling the function to convert the text to TTS
66 return
67
68 #defining the 'who made you' commands, so that the author can be credited
69 elif "who made you" in input or "created you" in input or "who were you created by" in input:
70 speak = "I have been created by Nivyan Lakhani."
71 call_to_assistant(speak) # <-- calling the function to convert the text to TTS
72 return
73
74 #defining the name of the project if the user does not know what the project is
75 elif "a blind vision" in input:
76 speak = "A blind vision is an initiative to create a voice synthesized bot to aid the visually blind whilst being extrememely cost friendly."
77 call_to_assistant(speak) # <-- calling the function to convert the text to TTS
78 return
79
80 #incase the person forgets a specific info, this will remind them of it, for this example I'll be using a phone number variable.
81 elif "phone" in input:
82 speak = "your phone number is +254 711 878443"
83 call_to_assistant(speak) # <-- calling the function to convert the text to TTS
84 return
85
86 #detects for the word scan or image, this will be using a deep learning model to detect what is inside of an image, and currently runs natively on the system.
87 elif "scan" in input or "image" in input:
88 cam = cv2.VideoCapture(0) #specifying the camera path which is to be used, set the camera used as default if it does not detect
89 cv2.namedWindow("Object Detection") #names the computational window the string that is inside of the quotes.
90
91 img_counter = 0 #stands for the fact that if no image is detected then run the code below
92
93 while True: #
94 ret, frame = cam.read() #calls out the function to set a variable on if the camera can read the current frame
95 if not ret: #if the above is not met, then the script terminates
96 print("no frame was found") #outputs this message if nothing is detected
97 break #breaks the script
98 cv2.imshow("Object Detection", frame)
99
100 k = cv2.waitKey(1)
101 if k%256 == 32: #waiting for the space key to be pressed, can be configured to match any small button like the one of the glasses.
102 img_name = "image.jpg".format(img_counter) #saves the image as "image.jpg"
103 cv2.imwrite(img_name, frame) #writes the image on disk natively
104 print("{} written!".format(img_name)) #types out written and then the image name on the console
105 img_counter += 1 #if an image has been taken then it breaks the script
106 break
107
108 cam.release() #stops the camera module
109
110 cv2.destroyAllWindows() #closes the preview window
111
112 im = cv2.imread('image.jpg') #uses the cv2 module to read the image
113 bbox, label, conf = cv.detect_common_objects(im) #uses the cvlib object detection library
114 output_image = draw_bbox(im, bbox, label, conf) #labels the text, draws a boundarybox around the object that is detected, and states the confidence of the label.
115 speak = (label) #the label is set as the speak variable
116 call_to_assistant(str(speak)) #speak variable is passed through TTS
117 print(speak) #completes the transcript
118
119 elif "reader" in input or "read" in input: #when the word reader or read is called the following happens
120 cam = cv2.VideoCapture(0) #calls out the function to set a variable on if the camera can read the current frame
121 cv2.namedWindow("test") #names the window to whatever is in the string
122
123 img_counter = 0 #refer to lines 91-110
124
125 while True: #refer to lines 91-110
126 ret, frame = cam.read() #refer to lines 91-110
127 if not ret: #refer to lines 91-110
128 print("failed to grab frame") #refer to lines 91-110
129 break #refer to lines 91-110
130 cv2.imshow("test", frame) #refer to lines 91-110
131
132 k = cv2.waitKey(1) #refer to lines 91-110
133 if k%256 == 32: #refer to lines 91-110
134 img_name = "read.jpg".format(img_counter) #refer to lines 91-110
135 cv2.imwrite(img_name, frame) #refer to lines 91-110
136 print("{} written!".format(img_name)) #refer to lines 91-110
137 img_counter += 1 #refer to lines 91-110
138 break #refer to lines 91-110
139
140 cam.release() #refer to lines 91-110
141
142 cv2.destroyAllWindows() #refer to lines 91-110
143
144 reader = easyocr.Reader(['en']) #sets the language of the OCR reader, use the 2 letter denomination for the language.
145 result = reader.readtext('read.jpg', detail = 0) #links to the file that is being read so the algorithm can scan and extract the text.
146 call_to_assistant(str(result)) #calls to the assistant and reads the output (which is converted into a string.)
147 print(result) #continues the transcript
148
149 #defining the calculation module to get the calculation query from the wolfram API.
150 elif "calculate" in input.lower():
151
152 app_id = "HX7QQ2-EA4XE27UXX" #Enter your unique ID, I will be using my personal ID for this demo
153 client = wolframalpha.Client(app_id) #describes the client variable to specify the query towards the wolfram API
154
155 indx = input.lower().split().index('calculate') #seperates the word calculate from the input
156 query = input.split()[indx + 1:] #puts the query as the indx, refer to line 155
157 res = client.query(' '.join(query)) #joins the query and informs the client refer to like 157
158 answer = next(res.results).text #gets the result in the form of a text file
159 call_to_assistant("The answer is " + answer) #parses the text file through the TTS API and reads it out loud.
160 return
161
162 else:
163
164 app_id = "HX7QQ2-EA4XE27UXX" #incase nothing works, it uses the wolfram engline to find the answer to the query.
165 client = wolframalpha.Client(app_id)
166 my_input = str(input) #puts the query as the indx, refer to line 165
167 res = client.query(my_input) #joins the query and informs the client
168 answerfinal = next(res.results).text #exports the result in a text file.
169 call_to_assistant("The answer is " + answerfinal) #parses the text file through the gTTS API and raeds itout loud.
170 return
171
172
173 except :
174
175 call_to_assistant("I don't understand, please try repeating or saying 'calculate' before the query.") #if nothing is detected then this is what is said.
176 ans = get_audio #gives the user another chance to try and repeat the querie.
177
178# Driver Code
179if __name__ == "__main__":
180 call_to_assistant("What's your name, Human?") #optional in order to make the assistant seem more personal
181 name ='Human' #the default name that is set
182 name = get_audio() #using the get audio functionk
183 call_to_assistant("Hello, " + name + '.') #greeting by attaching the string of hello and then the name.
184
185 while(1):
186
187 call_to_assistant("What can i do for you?") #starts to take in the input after this string is said.
188 text = get_audio().lower() #getting the audio from the speech recognition module.
189
190 if text == 0: #loops back this if nothing is detected.
191 continue
192
193 if "exit" in str(text) or "bye" in str(text) or "sleep" in str(text): #when "exit", "sleep", or "bye" is detected, the program breaks itself and then exits.
194 call_to_assistant("Ok bye, "+ name+'.') #tells the user goodbye and then proceeds to break the script.
195 break
196
197 process_text(text)