· 6 years ago · Jun 21, 2019, 07:20 PM
1from synthetic_data import HOTWORD_LIST, NEGATIVES_LIST, SENTENCE_LIST, \
2 DATA_PATH
3from os.path import join, isdir, exists
4from os import makedirs
5import subprocess
6
7
8def convert(path):
9 print("converting", path)
10 args = ["ffmpeg", "-i", path, "-acodec", "pcm_s16le", "-ar", "16000",
11 "-ac", "1", "-f", "wav", path.replace(".mp3", ".wav"), "-y"]
12 subprocess.call(args)
13
14
15def delete(path):
16 print("deleting", path)
17 args = ["rm", path]
18 subprocess.call(args)
19
20
21import boto3
22
23
24class Polly:
25 key_id = "xxx"
26 secret_key = "xxx"
27 region = 'us-east-1'
28 session = boto3.Session(aws_access_key_id=key_id,
29 aws_secret_access_key=secret_key,
30 region_name=region).client('polly')
31
32 @staticmethod
33 def get_tts(sentence, wav_file, voice, text_type="text", overwrite=False):
34 if exists(wav_file.replace(".mp3", ".wav")) and not overwrite:
35 return
36 response = Polly.session.synthesize_speech(
37 OutputFormat="mp3",
38 Text=sentence,
39 TextType=text_type,
40 VoiceId=voice)
41
42 with open(wav_file, 'wb') as f:
43 f.write(response['AudioStream'].read())
44 convert(wav_file)
45 delete(wav_file)
46
47
48def create_responsive_voice():
49 from responsive_voice import ResponsiveVoice
50 engine_name = "responsive_voice"
51 params = [
52 {"gender": "male"},
53 {"gender": "female"},
54 {"gender": "male", "rate": 0.4},
55 {"gender": "female", "rate": 0.4},
56 {"gender": "male", "rate": 0.6},
57 {"gender": "female", "rate": 0.6},
58 {"gender": "female", "lang": "en-GB"},
59 {"gender": "female", "rate": 0.4, "lang": "en-GB"},
60 {"gender": "female", "rate": 0.6, "lang": "en-GB"},
61 {"gender": "male", "pitch": 0.4},
62 {"gender": "female", "pitch": 0.4},
63 {"gender": "male", "rate": 0.4, "pitch": 0.4},
64 {"gender": "female", "rate": 0.4, "pitch": 0.4},
65 {"gender": "male", "rate": 0.6, "pitch": 0.4},
66 {"gender": "female", "rate": 0.6, "pitch": 0.4},
67 {"gender": "female", "lang": "en-GB", "pitch": 0.4},
68 {"gender": "female", "rate": 0.4, "lang": "en-GB", "pitch": 0.4},
69 {"gender": "female", "rate": 0.6, "lang": "en-GB", "pitch": 0.4},
70 {"gender": "male", "pitch": 0.6},
71 {"gender": "female", "pitch": 0.6},
72 {"gender": "male", "rate": 0.4, "pitch": 0.6},
73 {"gender": "female", "rate": 0.4, "pitch": 0.6},
74 {"gender": "male", "rate": 0.6, "pitch": 0.6},
75 {"gender": "female", "rate": 0.6, "pitch": 0.6},
76 {"gender": "female", "lang": "en-GB", "pitch": 0.6},
77 {"gender": "female", "rate": 0.4, "lang": "en-GB", "pitch": 0.6},
78 {"gender": "female", "rate": 0.6, "lang": "en-GB", "pitch": 0.6}
79 ]
80 # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/positives"
81 for w in HOTWORD_LIST:
82 w_path = join(DATA_PATH, w).replace(" ", "_")
83 if not isdir(w_path):
84 makedirs(w_path)
85 for p in params:
86
87 name = w
88 for k in p:
89 name += "-" + k + "-" + str(p[k]) + "-" + engine_name
90 wp_path = join(w_path, name + ".mp3").replace(" ", "_")
91 engine = ResponsiveVoice(**p)
92 engine.get_mp3(w, wp_path, **p)
93
94 # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/negatives"
95 for w in NEGATIVES_LIST:
96 w_path = join(DATA_PATH, w).replace(" ", "_")
97 if not isdir(w_path):
98 makedirs(w_path)
99 for p in params:
100
101 name = w
102 for k in p:
103 name += "-" + k + "-" + str(p[k]) + "-" + engine_name
104 wp_path = join(w_path, name + ".mp3").replace(" ", "_")
105 engine = ResponsiveVoice(**p)
106 engine.get_mp3(w, wp_path, **p)
107
108 # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
109 for idx, w in enumerate(SENTENCE_LIST):
110 w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
111 if not isdir(w_path):
112 makedirs(w_path)
113 for p in params:
114 name = str(idx + 1)
115 for k in p:
116 name += "-" + k + "-" + str(p[k]) + "-" + engine_name
117 wp_path = join(w_path, name + ".mp3").replace(" ", "_")
118 engine = ResponsiveVoice(**p)
119 engine.get_mp3(w, wp_path, **p)
120
121
122def create_google():
123 from gtts import gTTS
124
125 engine_name = "gtts"
126 # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/positives"
127 for w in HOTWORD_LIST:
128 w_path = join(DATA_PATH, w).replace(" ", "_")
129 if not isdir(w_path):
130 makedirs(w_path)
131
132 wp_path = join(w_path, w + "-" + engine_name + ".mp3").replace(" ",
133 "_")
134 tts = gTTS(w)
135 tts.save(wp_path)
136
137 # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/negatives"
138 for w in NEGATIVES_LIST:
139 w_path = join(DATA_PATH, w).replace(" ", "_")
140 if not isdir(w_path):
141 makedirs(w_path)
142 wp_path = join(w_path, w + "-" + engine_name + ".mp3").replace(" ",
143 "_")
144 tts = gTTS(w)
145 tts.save(wp_path)
146
147 # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
148 for idx, w in enumerate(SENTENCE_LIST):
149 w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
150 if not isdir(w_path):
151 makedirs(w_path)
152 wp_path = join(w_path,
153 str(idx + 1) + "-" + engine_name + ".mp3").replace(" ",
154 "_")
155 tts = gTTS(w)
156 tts.save(wp_path)
157
158
159def create_mimic():
160 MIMIC_BIN = "/home/user/PycharmProjects/mycroft-core/mimic/bin/mimic"
161 engine_name = "mimic"
162 voices = ["ap", "slt", "kal", "awb", "rms"]
163 # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/positives"
164 for w in HOTWORD_LIST:
165 w_path = join(DATA_PATH, w).replace(" ", "_")
166 if not isdir(w_path):
167 makedirs(w_path)
168
169 for voice in voices:
170 args = [MIMIC_BIN, '-voice', voice]
171 wp_path = join(w_path,
172 w + "-" + voice + "-" + engine_name +
173 ".wav").replace(" ", "_")
174 subprocess.call(args + ['-t', w, '-o', wp_path])
175
176 # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/negatives"
177 for w in NEGATIVES_LIST:
178 w_path = join(DATA_PATH, w).replace(" ", "_")
179 if not isdir(w_path):
180 makedirs(w_path)
181 for voice in voices:
182 args = [MIMIC_BIN, '-voice', voice]
183 wp_path = join(w_path,
184 w + "-" + voice + "-" + engine_name +
185 ".wav").replace(" ", "_")
186 subprocess.call(args + ['-t', w, '-o', wp_path])
187
188 # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
189 for idx, w in enumerate(SENTENCE_LIST):
190 w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
191 if not isdir(w_path):
192 makedirs(w_path)
193 for voice in voices:
194 args = [MIMIC_BIN, '-voice', voice]
195 wp_path = join(w_path,
196 str(idx + 1) + "-" + voice + "-" + engine_name +
197 ".wav").replace(" ", "_")
198 subprocess.call(args + ['-t', w, '-o', wp_path])
199
200
201def create_pico():
202 engine_name = "pico"
203 voices = ["en-GB", "en-US"]
204 # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data
205 # /positives"
206 for w in HOTWORD_LIST:
207 w_path = join(DATA_PATH, w).replace(" ", "_")
208 if not isdir(w_path):
209 makedirs(w_path)
210
211 for voice in voices:
212 wp_path = join(w_path,
213 w + "-" + voice + "-" + engine_name +
214 ".wav").replace(" ", "_")
215 subprocess.call(
216 ['pico2wave', '-l', voice, "-w", wp_path, w])
217
218 # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data
219 # /negatives"
220 for w in NEGATIVES_LIST:
221 w_path = join(DATA_PATH, w).replace(" ", "_")
222 if not isdir(w_path):
223 makedirs(w_path)
224 for voice in voices:
225 wp_path = join(w_path,
226 w + "-" + voice + "-" + engine_name +
227 ".wav").replace(" ", "_")
228 subprocess.call(
229 ['pico2wave', '-l', voice, "-w", wp_path, w])
230
231 # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
232 for idx, w in enumerate(SENTENCE_LIST):
233 w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
234 if not isdir(w_path):
235 makedirs(w_path)
236 for voice in voices:
237 wp_path = join(w_path,
238 str(idx + 1) + "-" + voice + "-" + engine_name +
239 ".wav").replace(" ", "_")
240 subprocess.call(
241 ['pico2wave', '-l', voice, "-w", wp_path, w])
242
243
244def create_mimic2():
245 import requests
246
247 class Mimic2():
248 url = "https://mimic-api.mycroft.ai"
249
250 @staticmethod
251 def get_tts(sentence, wav_file):
252 req_route = Mimic2.url + "/synthesize?text=" + sentence
253 response = requests.get(req_route)
254 with open(wav_file, 'wb') as f:
255 f.write(response.content)
256
257 engine_name = "mimic2"
258 voices = ["kusal"]
259 # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data
260 # /positives"
261 for w in HOTWORD_LIST:
262 w_path = join(DATA_PATH, w).replace(" ", "_")
263 if not isdir(w_path):
264 makedirs(w_path)
265
266 for voice in voices:
267 wp_path = join(w_path,
268 w + "-" + voice + "-" + engine_name +
269 ".wav").replace(" ", "_")
270 Mimic2.get_tts(w, wp_path)
271
272 # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data
273 # /negatives"
274 for w in NEGATIVES_LIST:
275 w_path = join(DATA_PATH, w).replace(" ", "_")
276 if not isdir(w_path):
277 makedirs(w_path)
278 for voice in voices:
279 wp_path = join(w_path,
280 w + "-" + voice + "-" + engine_name +
281 ".wav").replace(" ", "_")
282 Mimic2.get_tts(w, wp_path)
283
284 # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
285 for idx, w in enumerate(SENTENCE_LIST):
286 w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
287 if not isdir(w_path):
288 makedirs(w_path)
289 for voice in voices:
290 wp_path = join(w_path,
291 str(idx + 1) + "-" + voice + "-" + engine_name +
292 ".wav").replace(" ", "_")
293 Mimic2.get_tts(w, wp_path)
294
295
296def create_polly():
297 engine_name = "polly"
298 voices = ["Ivy", "Amy", "Emma", "Nicole", "Russell", "Brian", "Geraint",
299 "Joanna", "Kendra", "Kimberly", "Salli", "Joey", "Matthew",
300 "Justin"]
301 #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/positives"
302 for w in HOTWORD_LIST:
303 w_path = join(DATA_PATH, w).replace(" ", "_")
304 if not isdir(w_path):
305 makedirs(w_path)
306
307 for voice in voices:
308 wp_path = join(w_path,
309 w + "-" + voice + "-" + engine_name +
310 ".mp3").replace(" ", "_")
311 Polly.get_tts(w, wp_path, voice)
312
313 #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/negatives"
314 for w in NEGATIVES_LIST:
315 w_path = join(DATA_PATH, w).replace(" ", "_")
316 if not isdir(w_path):
317 makedirs(w_path)
318 for voice in voices:
319 wp_path = join(w_path,
320 w + "-" + voice + "-" + engine_name +
321 ".mp3").replace(" ", "_")
322 Polly.get_tts(w, wp_path, voice)
323
324 #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
325 for idx, w in enumerate(SENTENCE_LIST):
326 w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
327 if not isdir(w_path):
328 makedirs(w_path)
329 for voice in voices:
330 wp_path = join(w_path,
331 str(idx + 1) + "-" + voice + "-" + engine_name +
332 ".mp3").replace(" ", "_")
333 Polly.get_tts(w, wp_path, voice)
334
335
336def create_polly_ssml():
337 engine_name = "polly"
338 voices = ["Ivy", "Amy", "Emma", "Nicole", "Russell", "Brian", "Geraint",
339 "Joanna", "Kendra", "Kimberly", "Salli", "Joey", "Matthew",
340 "Justin"]
341 effects = [
342 ('<amazon:effect vocal-tract-length="+20%">',
343 '</amazon:effect>',
344 'strong'),
345 ('<amazon:effect vocal-tract-length="-20%">',
346 '</amazon:effect>',
347 'weak'),
348 # ("<amazon:effect name=\"whispered\">",
349 # "</amazon:effect>",
350 # "whispered"),
351 ("<prosody rate='0.7'>",
352 "</prosody>",
353 "slow"),
354 ("<prosody rate='1.25'>",
355 "</prosody>",
356 "fast"),
357 ('<amazon:effect phonation="soft">',
358 '</amazon:effect>',
359 "softly"),
360 # ("<amazon:auto-breaths>",
361 # "</amazon:auto-breaths>",
362 # "auto_breaths"),
363 ("<prosody pitch='-10%'>",
364 "</prosody>",
365 "low_pitch"),
366 ("<prosody pitch='+10%'>",
367 "</prosody>",
368 "high_pitch")
369 ]
370
371 #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/positives"
372 for w in HOTWORD_LIST:
373 w_path = join(DATA_PATH, w).replace(" ", "_")
374 if not isdir(w_path):
375 makedirs(w_path)
376
377 for voice in voices:
378 for effect in effects:
379 wp_path = join(w_path,
380 w + "-" + voice + "-" + effect[2] + "-" +
381 engine_name + ".mp3").replace(" ", "_")
382
383 w2 = "<speak>" + effect[0] + w + effect[1] + "</speak>"
384 Polly.get_tts(w2, wp_path, voice, "ssml")
385
386 #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/negatives"
387 for w in NEGATIVES_LIST:
388 w_path = join(DATA_PATH, w).replace(" ", "_")
389 if not isdir(w_path):
390 makedirs(w_path)
391 for voice in voices:
392 for effect in effects:
393 wp_path = join(w_path,
394 w + "-" + voice + "-" + effect[2] + "-" +
395 engine_name + ".mp3").replace(" ", "_")
396
397 w2 = "<speak>" + effect[0] + w + effect[1] + "</speak>"
398 Polly.get_tts(w2, wp_path, voice, "ssml")
399
400 #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
401 for idx, w in enumerate(SENTENCE_LIST):
402 w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
403 if not isdir(w_path):
404 makedirs(w_path)
405 for voice in voices:
406 for effect in effects:
407 wp_path = join(w_path,
408 str(idx + 1) + "-" + voice + "-" + effect[
409 2] + "-" +
410 engine_name + ".mp3").replace(" ", "_")
411
412 w2 = "<speak>" + effect[0] + w + effect[1] + "</speak>"
413 Polly.get_tts(w2, wp_path, voice, "ssml")
414
415
416def create_polly_mixed_ssml():
417 engine_name = "polly"
418 voices = ["Ivy", "Amy", "Emma", "Nicole", "Russell", "Brian", "Geraint",
419 "Joanna", "Kendra", "Kimberly", "Salli", "Joey", "Matthew",
420 "Justin"]
421 effects = [
422 ('<amazon:effect vocal-tract-length="+20%">',
423 '</amazon:effect>',
424 'strong'),
425 ('<amazon:effect vocal-tract-length="-20%">',
426 '</amazon:effect>',
427 'weak'),
428 # ("<amazon:effect name=\"whispered\">",
429 # "</amazon:effect>",
430 # "whispered"),
431 ("<prosody rate='0.7'>",
432 "</prosody>",
433 "slow"),
434 ("<prosody rate='1.25'>",
435 "</prosody>",
436 "fast"),
437 ('<amazon:effect phonation="soft">',
438 '</amazon:effect>',
439 "softly"),
440 # ("<amazon:auto-breaths>",
441 # "</amazon:auto-breaths>",
442 # "auto_breaths"),
443 ("<prosody pitch='-10%'>",
444 "</prosody>",
445 "low_pitch"),
446 ("<prosody pitch='+10%'>",
447 "</prosody>",
448 "high_pitch")
449 ]
450
451 mixes = [
452 (0, 2, "strong_slow"),
453 (0, 3, "strong_fast"),
454 (0, 5, "strong_low_pitch"),
455 (0, 6, "strong_high_pitch"),
456 (1, 2, "weak_slow"),
457 (1, 3, "weak_fast"),
458 (1, 5, "weak_low_pitch"),
459 (1, 6, "weak_high_pitch"),
460 (2, 5, "slow_low_pitch"),
461 (2, 6, "slow_high_pitch"),
462 (3, 5, "fast_low_pitch"),
463 (3, 6, "fast_high_pitch")
464
465 ]
466 #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/positives"
467 for w in HOTWORD_LIST:
468 w_path = join(DATA_PATH, w).replace(" ", "_")
469 if not isdir(w_path):
470 makedirs(w_path)
471
472 for voice in voices:
473 for m in mixes:
474 effect1 = effects[m[0]]
475 effect2 = effects[m[1]]
476 wp_path = join(w_path,
477 w + "-" + voice + "-" + m[2] + "-" +
478 engine_name + ".mp3").replace(" ", "_")
479
480 w2 = "<speak>" + effect2[0] + effect1[0] + w + effect1[1] + \
481 effect2[1] + "</speak>"
482 Polly.get_tts(w2, wp_path, voice, "ssml")
483
484 #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/negatives"
485 for w in NEGATIVES_LIST:
486 w_path = join(DATA_PATH, w).replace(" ", "_")
487 if not isdir(w_path):
488 makedirs(w_path)
489 for voice in voices:
490 for m in mixes:
491 effect1 = effects[m[0]]
492 effect2 = effects[m[1]]
493 wp_path = join(w_path,
494 w + "-" + voice + "-" + m[2] + "-" +
495 engine_name + ".mp3").replace(" ", "_")
496
497 w2 = "<speak>" + effect2[0] + effect1[0] + w + effect1[1] + \
498 effect2[1] + "</speak>"
499 Polly.get_tts(w2, wp_path, voice, "ssml")
500
501 #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
502 for idx, w in enumerate(SENTENCE_LIST):
503 w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
504 if not isdir(w_path):
505 makedirs(w_path)
506 for voice in voices:
507 for m in mixes:
508 effect1 = effects[m[0]]
509 effect2 = effects[m[1]]
510 wp_path = join(w_path,
511 str(idx + 1) + "-" + voice + "-" + m[2] + "-" +
512 engine_name + ".mp3").replace(" ", "_")
513
514 w2 = "<speak>" + effect2[0] + effect1[0] + w + \
515 effect1[1] + effect2[1] + "</speak>"
516 Polly.get_tts(w2, wp_path, voice, "ssml")
517
518
519if __name__ == "__main__":
520 create_polly_mixed_ssml()
521 create_polly_ssml()
522 create_polly()
523 # create_mimic2()
524 # create_pico()
525 # create_mimic()
526 # create_google()
527 # create_responsive_voice()