· 5 years ago · Nov 16, 2020, 02:18 PM
1# -*- coding: utf-8 -*-
2"""
3Created on Mon Nov 16 17:14:57 2020
4
5@author: Alina Shcherbinina
6"""
7import io
8import requests
9from langdetect import detect
10from pydub import AudioSegment
11
12
13def json_extract(obj, key):
14 """Recursively fetch values from nested JSON."""
15 arr = []
16
17 def extract(obj, arr, key):
18 """Recursively search for values of key in JSON tree."""
19 if isinstance(obj, dict):
20 for k, v in obj.items():
21 if isinstance(v, (dict, list)):
22 extract(v, arr, key)
23 elif k == key:
24 arr.append(v)
25 elif isinstance(obj, list):
26 for item in obj:
27 extract(item, arr, key)
28 return arr
29
30 values = extract(obj, arr, key)
31 return values
32
33
34# my vars
35
36full_text = ""
37test = "voice.wav"
38
39# ask a name of a text file to read
40
41print(" ~ Text to speech! ~ ")
42
43user_file = input('Choose file to read (example: text) : ')
44user_file1 = user_file + ".txt"
45try:
46 file_in = open(user_file1, "r", encoding="utf-8")
47 for line in file_in:
48 full_text +=line
49 print(full_text)
50except:
51 print("Something went wrong with the file")
52
53# get token
54
55auth_url = "https://francecentral.api.cognitive.microsoft.com/sts/v1.0/issueToken"
56key = "be5f4c47488e4d349dbb06b527492c7c"
57
58auth_headers = {
59 "Ocp-Apim-Subscription-Key": key,
60 "Content-Length": "0",
61 "Content-type": "application/x-www-form-urlencoded"
62 }
63
64auth_response = requests.post(auth_url, headers=auth_headers)
65token = auth_response.text #thats a bearer
66
67
68# get voice list
69
70lang_url = "https://francecentral.tts.speech.microsoft.com/cognitiveservices/voices/list"
71
72lang_headers= {
73 "Authorization": "Bearer " + token,
74 }
75
76lang_response = requests.get(lang_url, headers=lang_headers)
77json_langs = lang_response.json()
78
79# choose the right one somehow
80
81print(detect(full_text))
82lang_of_text = detect(full_text)
83for voice in json_langs:
84 locale = json_extract(voice,'Locale')
85 string_l = ''.join(locale)
86 if (string_l.find(lang_of_text) != -1):
87 index = json_langs.index(voice)
88print(json_langs[index])
89
90needed_voice = json_langs[index]
91
92api_url = "https://francecentral.tts.speech.microsoft.com/cognitiveservices/v1"
93
94api_headers = {
95 "Authorization": "Bearer " + token,
96 "X-Microsoft-OutputFormat": "raw-16khz-16bit-mono-pcm",
97 "Content-Type": "application/ssml+xml"
98 }
99
100# make data about all that in xml
101
102lang = json_extract(needed_voice, 'Locale')
103string_l = ''.join(lang)
104gender = json_extract(needed_voice, 'Gender')
105string_g = ''.join(gender)
106name = json_extract(needed_voice, 'ShortName')
107string_n = ''.join(name)
108
109
110final_xml = "<speak version='1.0' xml:lang='" + string_l + "'><voice xml:lang='" + string_l + "' xml:gender='" + string_g + "' name='" + string_n + "'>" + full_text + "</voice></speak>"
111print(final_xml)
112
113# make a request to api. response is binary; file is in mp3 or whatever
114
115request = requests.post(api_url, headers = api_headers, data = final_xml)
116response = request.content
117
118wav_file = user_file + ".wav"
119
120s = io.BytesIO(response)
121audio = AudioSegment.from_raw(s, sample_width=2, frame_rate=16000, channels=1).export(wav_file, format='wav')
122
123# additional work coming soon
124