Trq220s5

· 6 years ago · Jun 21, 2019, 07:20 PM
1from synthetic_data import HOTWORD_LIST, NEGATIVES_LIST, SENTENCE_LIST, \
2    DATA_PATH
3from os.path import join, isdir, exists
4from os import makedirs
5import subprocess
6
7
8def convert(path):
9    print("converting", path)
10    args = ["ffmpeg", "-i", path, "-acodec", "pcm_s16le", "-ar", "16000",
11            "-ac", "1", "-f", "wav", path.replace(".mp3", ".wav"), "-y"]
12    subprocess.call(args)
13
14
15def delete(path):
16    print("deleting", path)
17    args = ["rm", path]
18    subprocess.call(args)
19
20
21import boto3
22
23
24class Polly:
25    key_id = "xxx"
26    secret_key = "xxx"
27    region = 'us-east-1'
28    session = boto3.Session(aws_access_key_id=key_id,
29                            aws_secret_access_key=secret_key,
30                            region_name=region).client('polly')
31
32    @staticmethod
33    def get_tts(sentence, wav_file, voice, text_type="text", overwrite=False):
34        if exists(wav_file.replace(".mp3", ".wav")) and not overwrite:
35            return
36        response = Polly.session.synthesize_speech(
37            OutputFormat="mp3",
38            Text=sentence,
39            TextType=text_type,
40            VoiceId=voice)
41
42        with open(wav_file, 'wb') as f:
43            f.write(response['AudioStream'].read())
44        convert(wav_file)
45        delete(wav_file)
46
47
48def create_responsive_voice():
49    from responsive_voice import ResponsiveVoice
50    engine_name = "responsive_voice"
51    params = [
52        {"gender": "male"},
53        {"gender": "female"},
54        {"gender": "male", "rate": 0.4},
55        {"gender": "female", "rate": 0.4},
56        {"gender": "male", "rate": 0.6},
57        {"gender": "female", "rate": 0.6},
58        {"gender": "female", "lang": "en-GB"},
59        {"gender": "female", "rate": 0.4, "lang": "en-GB"},
60        {"gender": "female", "rate": 0.6, "lang": "en-GB"},
61        {"gender": "male", "pitch": 0.4},
62        {"gender": "female", "pitch": 0.4},
63        {"gender": "male", "rate": 0.4, "pitch": 0.4},
64        {"gender": "female", "rate": 0.4, "pitch": 0.4},
65        {"gender": "male", "rate": 0.6, "pitch": 0.4},
66        {"gender": "female", "rate": 0.6, "pitch": 0.4},
67        {"gender": "female", "lang": "en-GB", "pitch": 0.4},
68        {"gender": "female", "rate": 0.4, "lang": "en-GB", "pitch": 0.4},
69        {"gender": "female", "rate": 0.6, "lang": "en-GB", "pitch": 0.4},
70        {"gender": "male", "pitch": 0.6},
71        {"gender": "female", "pitch": 0.6},
72        {"gender": "male", "rate": 0.4, "pitch": 0.6},
73        {"gender": "female", "rate": 0.4, "pitch": 0.6},
74        {"gender": "male", "rate": 0.6, "pitch": 0.6},
75        {"gender": "female", "rate": 0.6, "pitch": 0.6},
76        {"gender": "female", "lang": "en-GB", "pitch": 0.6},
77        {"gender": "female", "rate": 0.4, "lang": "en-GB", "pitch": 0.6},
78        {"gender": "female", "rate": 0.6, "lang": "en-GB", "pitch": 0.6}
79    ]
80    # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/positives"
81    for w in HOTWORD_LIST:
82        w_path = join(DATA_PATH, w).replace(" ", "_")
83        if not isdir(w_path):
84            makedirs(w_path)
85        for p in params:
86
87            name = w
88            for k in p:
89                name += "-" + k + "-" + str(p[k]) + "-" + engine_name
90            wp_path = join(w_path, name + ".mp3").replace(" ", "_")
91            engine = ResponsiveVoice(**p)
92            engine.get_mp3(w, wp_path, **p)
93
94    # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/negatives"
95    for w in NEGATIVES_LIST:
96        w_path = join(DATA_PATH, w).replace(" ", "_")
97        if not isdir(w_path):
98            makedirs(w_path)
99        for p in params:
100
101            name = w
102            for k in p:
103                name += "-" + k + "-" + str(p[k]) + "-" + engine_name
104            wp_path = join(w_path, name + ".mp3").replace(" ", "_")
105            engine = ResponsiveVoice(**p)
106            engine.get_mp3(w, wp_path, **p)
107
108    # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
109    for idx, w in enumerate(SENTENCE_LIST):
110        w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
111        if not isdir(w_path):
112            makedirs(w_path)
113        for p in params:
114            name = str(idx + 1)
115            for k in p:
116                name += "-" + k + "-" + str(p[k]) + "-" + engine_name
117            wp_path = join(w_path, name + ".mp3").replace(" ", "_")
118            engine = ResponsiveVoice(**p)
119            engine.get_mp3(w, wp_path, **p)
120
121
122def create_google():
123    from gtts import gTTS
124
125    engine_name = "gtts"
126    # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/positives"
127    for w in HOTWORD_LIST:
128        w_path = join(DATA_PATH, w).replace(" ", "_")
129        if not isdir(w_path):
130            makedirs(w_path)
131
132        wp_path = join(w_path, w + "-" + engine_name + ".mp3").replace(" ",
133                                                                       "_")
134        tts = gTTS(w)
135        tts.save(wp_path)
136
137    # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/negatives"
138    for w in NEGATIVES_LIST:
139        w_path = join(DATA_PATH, w).replace(" ", "_")
140        if not isdir(w_path):
141            makedirs(w_path)
142        wp_path = join(w_path, w + "-" + engine_name + ".mp3").replace(" ",
143                                                                       "_")
144        tts = gTTS(w)
145        tts.save(wp_path)
146
147    # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
148    for idx, w in enumerate(SENTENCE_LIST):
149        w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
150        if not isdir(w_path):
151            makedirs(w_path)
152        wp_path = join(w_path,
153                       str(idx + 1) + "-" + engine_name + ".mp3").replace(" ",
154                                                                          "_")
155        tts = gTTS(w)
156        tts.save(wp_path)
157
158
159def create_mimic():
160    MIMIC_BIN = "/home/user/PycharmProjects/mycroft-core/mimic/bin/mimic"
161    engine_name = "mimic"
162    voices = ["ap", "slt", "kal", "awb", "rms"]
163    # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/positives"
164    for w in HOTWORD_LIST:
165        w_path = join(DATA_PATH, w).replace(" ", "_")
166        if not isdir(w_path):
167            makedirs(w_path)
168
169        for voice in voices:
170            args = [MIMIC_BIN, '-voice', voice]
171            wp_path = join(w_path,
172                           w + "-" + voice + "-" + engine_name +
173                           ".wav").replace(" ", "_")
174            subprocess.call(args + ['-t', w, '-o', wp_path])
175
176    # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/negatives"
177    for w in NEGATIVES_LIST:
178        w_path = join(DATA_PATH, w).replace(" ", "_")
179        if not isdir(w_path):
180            makedirs(w_path)
181        for voice in voices:
182            args = [MIMIC_BIN, '-voice', voice]
183            wp_path = join(w_path,
184                           w + "-" + voice + "-" + engine_name +
185                           ".wav").replace(" ", "_")
186            subprocess.call(args + ['-t', w, '-o', wp_path])
187
188    # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
189    for idx, w in enumerate(SENTENCE_LIST):
190        w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
191        if not isdir(w_path):
192            makedirs(w_path)
193        for voice in voices:
194            args = [MIMIC_BIN, '-voice', voice]
195            wp_path = join(w_path,
196                           str(idx + 1) + "-" + voice + "-" + engine_name +
197                           ".wav").replace(" ", "_")
198            subprocess.call(args + ['-t', w, '-o', wp_path])
199
200
201def create_pico():
202    engine_name = "pico"
203    voices = ["en-GB", "en-US"]
204    # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data
205    # /positives"
206    for w in HOTWORD_LIST:
207        w_path = join(DATA_PATH, w).replace(" ", "_")
208        if not isdir(w_path):
209            makedirs(w_path)
210
211        for voice in voices:
212            wp_path = join(w_path,
213                           w + "-" + voice + "-" + engine_name +
214                           ".wav").replace(" ", "_")
215            subprocess.call(
216                ['pico2wave', '-l', voice, "-w", wp_path, w])
217
218    # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data
219    # /negatives"
220    for w in NEGATIVES_LIST:
221        w_path = join(DATA_PATH, w).replace(" ", "_")
222        if not isdir(w_path):
223            makedirs(w_path)
224        for voice in voices:
225            wp_path = join(w_path,
226                           w + "-" + voice + "-" + engine_name +
227                           ".wav").replace(" ", "_")
228            subprocess.call(
229                ['pico2wave', '-l', voice, "-w", wp_path, w])
230
231    # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
232    for idx, w in enumerate(SENTENCE_LIST):
233        w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
234        if not isdir(w_path):
235            makedirs(w_path)
236        for voice in voices:
237            wp_path = join(w_path,
238                           str(idx + 1) + "-" + voice + "-" + engine_name +
239                           ".wav").replace(" ", "_")
240            subprocess.call(
241                ['pico2wave', '-l', voice, "-w", wp_path, w])
242
243
244def create_mimic2():
245    import requests
246
247    class Mimic2():
248        url = "https://mimic-api.mycroft.ai"
249
250        @staticmethod
251        def get_tts(sentence, wav_file):
252            req_route = Mimic2.url + "/synthesize?text=" + sentence
253            response = requests.get(req_route)
254            with open(wav_file, 'wb') as f:
255                f.write(response.content)
256
257    engine_name = "mimic2"
258    voices = ["kusal"]
259    # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data
260    # /positives"
261    for w in HOTWORD_LIST:
262        w_path = join(DATA_PATH, w).replace(" ", "_")
263        if not isdir(w_path):
264            makedirs(w_path)
265
266        for voice in voices:
267            wp_path = join(w_path,
268                           w + "-" + voice + "-" + engine_name +
269                           ".wav").replace(" ", "_")
270            Mimic2.get_tts(w, wp_path)
271
272    # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data
273    # /negatives"
274    for w in NEGATIVES_LIST:
275        w_path = join(DATA_PATH, w).replace(" ", "_")
276        if not isdir(w_path):
277            makedirs(w_path)
278        for voice in voices:
279            wp_path = join(w_path,
280                           w + "-" + voice + "-" + engine_name +
281                           ".wav").replace(" ", "_")
282            Mimic2.get_tts(w, wp_path)
283
284    # DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
285    for idx, w in enumerate(SENTENCE_LIST):
286        w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
287        if not isdir(w_path):
288            makedirs(w_path)
289        for voice in voices:
290            wp_path = join(w_path,
291                           str(idx + 1) + "-" + voice + "-" + engine_name +
292                           ".wav").replace(" ", "_")
293            Mimic2.get_tts(w, wp_path)
294
295
296def create_polly():
297    engine_name = "polly"
298    voices = ["Ivy", "Amy", "Emma", "Nicole", "Russell", "Brian", "Geraint",
299              "Joanna", "Kendra", "Kimberly", "Salli", "Joey", "Matthew",
300              "Justin"]
301    #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/positives"
302    for w in HOTWORD_LIST:
303        w_path = join(DATA_PATH, w).replace(" ", "_")
304        if not isdir(w_path):
305            makedirs(w_path)
306
307        for voice in voices:
308            wp_path = join(w_path,
309                           w + "-" + voice + "-" + engine_name +
310                           ".mp3").replace(" ", "_")
311            Polly.get_tts(w, wp_path, voice)
312
313    #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/negatives"
314    for w in NEGATIVES_LIST:
315        w_path = join(DATA_PATH, w).replace(" ", "_")
316        if not isdir(w_path):
317            makedirs(w_path)
318        for voice in voices:
319            wp_path = join(w_path,
320                           w + "-" + voice + "-" + engine_name +
321                           ".mp3").replace(" ", "_")
322            Polly.get_tts(w, wp_path, voice)
323
324    #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
325    for idx, w in enumerate(SENTENCE_LIST):
326        w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
327        if not isdir(w_path):
328            makedirs(w_path)
329        for voice in voices:
330            wp_path = join(w_path,
331                           str(idx + 1) + "-" + voice + "-" + engine_name +
332                           ".mp3").replace(" ", "_")
333            Polly.get_tts(w, wp_path, voice)
334
335
336def create_polly_ssml():
337    engine_name = "polly"
338    voices = ["Ivy", "Amy", "Emma", "Nicole", "Russell", "Brian", "Geraint",
339              "Joanna", "Kendra", "Kimberly", "Salli", "Joey", "Matthew",
340              "Justin"]
341    effects = [
342        ('<amazon:effect vocal-tract-length="+20%">',
343         '</amazon:effect>',
344         'strong'),
345        ('<amazon:effect vocal-tract-length="-20%">',
346         '</amazon:effect>',
347         'weak'),
348        # ("<amazon:effect name=\"whispered\">",
349        # "</amazon:effect>",
350        # "whispered"),
351        ("<prosody rate='0.7'>",
352         "</prosody>",
353         "slow"),
354        ("<prosody rate='1.25'>",
355         "</prosody>",
356         "fast"),
357        ('<amazon:effect phonation="soft">',
358         '</amazon:effect>',
359         "softly"),
360        # ("<amazon:auto-breaths>",
361        # "</amazon:auto-breaths>",
362        # "auto_breaths"),
363        ("<prosody pitch='-10%'>",
364         "</prosody>",
365         "low_pitch"),
366        ("<prosody pitch='+10%'>",
367         "</prosody>",
368         "high_pitch")
369    ]
370
371    #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/positives"
372    for w in HOTWORD_LIST:
373        w_path = join(DATA_PATH, w).replace(" ", "_")
374        if not isdir(w_path):
375            makedirs(w_path)
376
377        for voice in voices:
378            for effect in effects:
379                wp_path = join(w_path,
380                               w + "-" + voice + "-" + effect[2] + "-" +
381                               engine_name + ".mp3").replace(" ", "_")
382
383                w2 = "<speak>" + effect[0] + w + effect[1] + "</speak>"
384                Polly.get_tts(w2, wp_path, voice, "ssml")
385
386    #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/negatives"
387    for w in NEGATIVES_LIST:
388        w_path = join(DATA_PATH, w).replace(" ", "_")
389        if not isdir(w_path):
390            makedirs(w_path)
391        for voice in voices:
392            for effect in effects:
393                wp_path = join(w_path,
394                               w + "-" + voice + "-" + effect[2] + "-" +
395                               engine_name + ".mp3").replace(" ", "_")
396
397                w2 = "<speak>" + effect[0] + w + effect[1] + "</speak>"
398                Polly.get_tts(w2, wp_path, voice, "ssml")
399
400    #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
401    for idx, w in enumerate(SENTENCE_LIST):
402        w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
403        if not isdir(w_path):
404            makedirs(w_path)
405        for voice in voices:
406            for effect in effects:
407                wp_path = join(w_path,
408                               str(idx + 1) + "-" + voice + "-" + effect[
409                                   2] + "-" +
410                               engine_name + ".mp3").replace(" ", "_")
411
412                w2 = "<speak>" + effect[0] + w + effect[1] + "</speak>"
413                Polly.get_tts(w2, wp_path, voice, "ssml")
414
415
416def create_polly_mixed_ssml():
417    engine_name = "polly"
418    voices = ["Ivy", "Amy", "Emma", "Nicole", "Russell", "Brian", "Geraint",
419              "Joanna", "Kendra", "Kimberly", "Salli", "Joey", "Matthew",
420              "Justin"]
421    effects = [
422        ('<amazon:effect vocal-tract-length="+20%">',
423         '</amazon:effect>',
424         'strong'),
425        ('<amazon:effect vocal-tract-length="-20%">',
426         '</amazon:effect>',
427         'weak'),
428        # ("<amazon:effect name=\"whispered\">",
429        # "</amazon:effect>",
430        # "whispered"),
431        ("<prosody rate='0.7'>",
432         "</prosody>",
433         "slow"),
434        ("<prosody rate='1.25'>",
435         "</prosody>",
436         "fast"),
437        ('<amazon:effect phonation="soft">',
438         '</amazon:effect>',
439         "softly"),
440        # ("<amazon:auto-breaths>",
441        # "</amazon:auto-breaths>",
442        # "auto_breaths"),
443        ("<prosody pitch='-10%'>",
444         "</prosody>",
445         "low_pitch"),
446        ("<prosody pitch='+10%'>",
447         "</prosody>",
448         "high_pitch")
449    ]
450
451    mixes = [
452        (0, 2, "strong_slow"),
453        (0, 3, "strong_fast"),
454        (0, 5, "strong_low_pitch"),
455        (0, 6, "strong_high_pitch"),
456        (1, 2, "weak_slow"),
457        (1, 3, "weak_fast"),
458        (1, 5, "weak_low_pitch"),
459        (1, 6, "weak_high_pitch"),
460        (2, 5, "slow_low_pitch"),
461        (2, 6, "slow_high_pitch"),
462        (3, 5, "fast_low_pitch"),
463        (3, 6, "fast_high_pitch")
464
465    ]
466    #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/positives"
467    for w in HOTWORD_LIST:
468        w_path = join(DATA_PATH, w).replace(" ", "_")
469        if not isdir(w_path):
470            makedirs(w_path)
471
472        for voice in voices:
473            for m in mixes:
474                effect1 = effects[m[0]]
475                effect2 = effects[m[1]]
476                wp_path = join(w_path,
477                               w + "-" + voice + "-" + m[2] + "-" +
478                               engine_name + ".mp3").replace(" ", "_")
479
480                w2 = "<speak>" + effect2[0] + effect1[0] + w + effect1[1] + \
481                     effect2[1] + "</speak>"
482                Polly.get_tts(w2, wp_path, voice, "ssml")
483
484    #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/negatives"
485    for w in NEGATIVES_LIST:
486        w_path = join(DATA_PATH, w).replace(" ", "_")
487        if not isdir(w_path):
488            makedirs(w_path)
489        for voice in voices:
490            for m in mixes:
491                effect1 = effects[m[0]]
492                effect2 = effects[m[1]]
493                wp_path = join(w_path,
494                               w + "-" + voice + "-" + m[2] + "-" +
495                               engine_name + ".mp3").replace(" ", "_")
496
497                w2 = "<speak>" + effect2[0] + effect1[0] + w + effect1[1] + \
498                     effect2[1] + "</speak>"
499                Polly.get_tts(w2, wp_path, voice, "ssml")
500
501    #DATA_PATH = "/home/user/PycharmProjects/robin_speech/synthetic_data/test"
502    for idx, w in enumerate(SENTENCE_LIST):
503        w_path = join(DATA_PATH, str(idx + 1)).replace(" ", "_")
504        if not isdir(w_path):
505            makedirs(w_path)
506        for voice in voices:
507            for m in mixes:
508                effect1 = effects[m[0]]
509                effect2 = effects[m[1]]
510                wp_path = join(w_path,
511                               str(idx + 1) + "-" + voice + "-" + m[2] + "-" +
512                               engine_name + ".mp3").replace(" ", "_")
513
514                w2 = "<speak>" + effect2[0] + effect1[0] + w + \
515                     effect1[1] + effect2[1] + "</speak>"
516                Polly.get_tts(w2, wp_path, voice, "ssml")
517
518
519if __name__ == "__main__":
520    create_polly_mixed_ssml()
521    create_polly_ssml()
522    create_polly()
523    # create_mimic2()
524    # create_pico()
525    # create_mimic()
526    # create_google()
527    # create_responsive_voice()