p6Hvwvz1

· 5 years ago · Aug 26, 2020, 05:20 PM
1#importing the modules that will be used in the script
2from gtts import gTTS #The TTS Module will be converting the output string to text to speech.
3import playsound #This will be importing the playsound module, which will give out the output of the fianl product. 
4import speech_recognition as sr  #The speech recognition module will try to get the speech use Google's 
5import wolframalpha #imports the wolframalpha module, which will be used for calculations and queries
6import os #imports the possibility to access local files and use them on the system scale
7import cv2 #imports the opencv model to access the camera instances 
8import matplotlib.pyplot as plt #plotting the information from camera into graphzs for the other modules to interpret
9import cvlib as cv #computer vision library to detect what the camemra sees
10from cvlib.object_detection import draw_bbox #object detection class from the computer vision library
11import easyocr #easyocr to detect text from the image, uses machine learning in pytorch.
12
13number = 1 #describes the default user to see if it matches,
14#and takes it to retrive earlier information capture (still in beta)
15def call_to_assistant(output): #defining the statement to call to assistant
16
17    global number #calling the number in the global class, refer to line 13
18  
19    # this will be renaming every new name that it gets using the number and then adding an integer to create a new file name  
20    # this is in hope to remove any ambiguity between users and in order to organize the file numbers 
21    number += 1 #refer to annotation on line 14
22    print("ABV Bot : ", output) #prints out the transcript of what is being said
23  
24    toSpeak = gTTS(text = output, lang = 'en', slow = False) #Using the Google API to translate the string to TTS
25 
26    filename = "answer.mp3" #save it in the following file and extension (simplified to answer.mp3 for demo purposes)
27    toSpeak.save(filename) #Saving the TTS file in the filename that was specified, its recommended to use an mp3 file to save the TTS
28      
29    #playsound module used to play the aforementioned file, note the os module can also be used to open the file
30    #but this method proves faster times.
31    playsound.playsound(filename, True)  
32    os.remove(filename) #after the file is done being used, remove the file in order to not take up a lot of space
33  
34  
35  
36def get_audio(): #defining the function to gather the audio and to use the sr module    
37  
38    rObject = sr.Recognizer() #calling the speech recognition module to provide the output
39    audio = '' 
40  
41    with sr.Microphone() as source: #defining the source of the mic, this will use the internal mic as a default.
42        print("Start Speaking...") #tells the user when to start speaking
43          
44        # recording the audio using speech recognition 
45        audio = rObject.listen(source, phrase_time_limit = 5)  
46    print("Stop Speaking.") #puts the limit at 5 seconds, this can be changed by changing the 5 to the relative seconds in an integer number.
47  
48    try: 
49  
50        text = rObject.recognize_google(audio, language ='en_US') #parsing the audio through the Google API
51        print("Human : ", text) #furthering the transcript, this time displaying the input that is being taken
52        return text 
53  
54    except: 
55  
56        call_to_assistant("There was a problem parsing the audio. Please try again!") #if no audio is detected then this message is displayed.
57        return 0
58  
59def process_text(input): #this will define the processing of the text, i.e: the commands which will be used
60    try: 
61        #defining the 'who are you' commands, so that the bot can introduce iteself to the user in question.
62        if "who are you" in input or "what do you do" in input or "what is your name" in input: 
63            speak = '''Hi, I am A B V bot, I was made as an initiative to ease the process of giving
64            answers to queries for the blind. You can read more about me in the portfolio of Nivyan Lakhani.'''
65            call_to_assistant(speak) # <-- calling the function to convert the text to TTS
66            return
67  
68        #defining the 'who made you' commands, so that the author can be credited
69        elif "who made you" in input or "created you" in input or "who were you created by" in input: 
70            speak = "I have been created by Nivyan Lakhani."
71            call_to_assistant(speak) # <-- calling the function to convert the text to TTS
72            return
73    
74        #defining the name of the project if the user does not know what the project is
75        elif "a blind vision" in input: 
76            speak = "A blind vision is an initiative to create a voice synthesized bot to aid the visually blind whilst being extrememely cost friendly."
77            call_to_assistant(speak) # <-- calling the function to convert the text to TTS
78            return
79
80        #incase the person forgets a specific info, this will remind them of it, for this example I'll be using a phone number variable. 
81        elif "phone" in input: 
82            speak = "your phone number is +254 711 878443"
83            call_to_assistant(speak) # <-- calling the function to convert the text to TTS
84            return
85        
86        #detects for the word scan or image, this will be using a deep learning model to detect what is inside of an image, and currently runs natively on the system. 
87        elif "scan" in input or "image" in input: 
88            cam = cv2.VideoCapture(0) #specifying the camera path which is to be used, set the camera used as default if it does not detect
89            cv2.namedWindow("Object Detection") #names the computational window the string that is inside of the quotes. 
90
91            img_counter = 0 #stands for the fact that if no image is detected then run the code below
92
93            while True: #
94                ret, frame = cam.read() #calls out the function to set a variable on if the camera can read the current frame
95                if not ret: #if the above is not met, then the script terminates
96                    print("no frame was found") #outputs this message if nothing is detected
97                    break #breaks the script
98                cv2.imshow("Object Detection", frame) 
99
100                k = cv2.waitKey(1)
101                if k%256 == 32: #waiting for the space key to be pressed, can be configured to match any small button like the one of the glasses. 
102                    img_name = "image.jpg".format(img_counter) #saves the image as "image.jpg"
103                    cv2.imwrite(img_name, frame) #writes the image on disk natively
104                    print("{} written!".format(img_name)) #types out written and then the image name on the console
105                    img_counter += 1 #if an image has been taken then it breaks the script
106                    break
107            
108            cam.release() #stops the camera module
109
110            cv2.destroyAllWindows() #closes the preview window
111
112            im = cv2.imread('image.jpg') #uses the cv2 module to read the image
113            bbox, label, conf = cv.detect_common_objects(im) #uses the cvlib object detection library
114            output_image = draw_bbox(im, bbox, label, conf) #labels the text, draws a boundarybox around the object that is detected, and states the confidence of the label.
115            speak = (label) #the label is set as the speak variable
116            call_to_assistant(str(speak)) #speak variable is passed through TTS
117            print(speak) #completes the transcript
118  
119        elif "reader" in input or "read" in input: #when the word reader or read is called the following happens
120            cam = cv2.VideoCapture(0) #calls out the function to set a variable on if the camera can read the current frame
121            cv2.namedWindow("test") #names the window to whatever is in the string
122
123            img_counter = 0 #refer to lines 91-110
124
125            while True: #refer to lines 91-110
126                ret, frame = cam.read() #refer to lines 91-110
127                if not ret: #refer to lines 91-110
128                    print("failed to grab frame") #refer to lines 91-110
129                    break #refer to lines 91-110
130                cv2.imshow("test", frame) #refer to lines 91-110
131
132                k = cv2.waitKey(1) #refer to lines 91-110
133                if k%256 == 32: #refer to lines 91-110
134                    img_name = "read.jpg".format(img_counter) #refer to lines 91-110
135                    cv2.imwrite(img_name, frame) #refer to lines 91-110
136                    print("{} written!".format(img_name)) #refer to lines 91-110
137                    img_counter += 1 #refer to lines 91-110
138                    break #refer to lines 91-110
139            
140            cam.release() #refer to lines 91-110
141
142            cv2.destroyAllWindows() #refer to lines 91-110
143
144            reader = easyocr.Reader(['en']) #sets the language of the OCR reader, use the 2 letter denomination for the language. 
145            result = reader.readtext('read.jpg', detail = 0) #links to the file that is being read so the algorithm can scan and extract the text. 
146            call_to_assistant(str(result)) #calls to the assistant and reads the output (which is converted into a string.)
147            print(result) #continues the transcript
148            
149        #defining the calculation module to get the calculation query from the wolfram API.
150        elif "calculate" in input.lower(): 
151              
152            app_id = "HX7QQ2-EA4XE27UXX" #Enter your unique ID, I will be using my personal ID for this demo
153            client = wolframalpha.Client(app_id) #describes the client variable to specify the query towards the wolfram API
154  
155            indx = input.lower().split().index('calculate') #seperates the word calculate from the input
156            query = input.split()[indx + 1:] #puts the query as the indx, refer to line 155
157            res = client.query(' '.join(query)) #joins the query and informs the client refer to like 157
158            answer = next(res.results).text #gets the result in the form of a text file
159            call_to_assistant("The answer is " + answer) #parses the text file through the TTS API and reads it out loud. 
160            return
161
162        else: 
163            
164            app_id = "HX7QQ2-EA4XE27UXX" #incase nothing works, it uses the wolfram engline to find the answer to the query. 
165            client = wolframalpha.Client(app_id)
166            my_input = str(input) #puts the query as the indx, refer to line 165
167            res = client.query(my_input) #joins the query and informs the client
168            answerfinal = next(res.results).text #exports the result in a text file. 
169            call_to_assistant("The answer is " + answerfinal) #parses the text file through the gTTS API and raeds itout loud. 
170            return
171
172
173    except : 
174  
175        call_to_assistant("I don't understand, please try repeating or saying 'calculate' before the query.") #if nothing is detected then this is what is said. 
176        ans = get_audio #gives the user another chance to try and repeat the querie. 
177
178# Driver Code 
179if __name__ == "__main__": 
180    call_to_assistant("What's your name, Human?") #optional in order to make the assistant seem more personal 
181    name ='Human' #the default name that is set
182    name = get_audio() #using the get audio functionk
183    call_to_assistant("Hello, " + name + '.') #greeting by attaching the string of hello and then the name.
184      
185    while(1): 
186  
187        call_to_assistant("What can i do for you?") #starts to take in the input after this string is said. 
188        text = get_audio().lower() #getting the audio from the speech recognition module. 
189  
190        if text == 0: #loops back this if nothing is detected. 
191            continue
192  
193        if "exit" in str(text) or "bye" in str(text) or "sleep" in str(text): #when "exit", "sleep", or "bye" is detected, the program breaks itself and then exits. 
194            call_to_assistant("Ok bye, "+ name+'.') #tells the user goodbye and then proceeds to break the script. 
195            break
196
197        process_text(text)