k5ewn40s

· 5 years ago · Nov 16, 2020, 02:18 PM
1# -*- coding: utf-8 -*-
2"""
3Created on Mon Nov 16 17:14:57 2020
4
5@author: Alina Shcherbinina
6"""
7import io
8import requests
9from langdetect import detect
10from pydub import AudioSegment
11
12
13def json_extract(obj, key):
14    """Recursively fetch values from nested JSON."""
15    arr = []
16
17    def extract(obj, arr, key):
18        """Recursively search for values of key in JSON tree."""
19        if isinstance(obj, dict):
20            for k, v in obj.items():
21                if isinstance(v, (dict, list)):
22                    extract(v, arr, key)
23                elif k == key:
24                    arr.append(v)
25        elif isinstance(obj, list):
26            for item in obj:
27                extract(item, arr, key)
28        return arr
29
30    values = extract(obj, arr, key)
31    return values
32
33
34# my vars
35
36full_text = ""
37test = "voice.wav"
38
39# ask a name of a text file to read
40
41print(" ~ Text to speech! ~ ")
42
43user_file = input('Choose file to read (example: text) : ')
44user_file1 = user_file + ".txt"
45try:
46    file_in = open(user_file1, "r", encoding="utf-8") 
47    for line in file_in:
48        full_text +=line
49    print(full_text)
50except:
51    print("Something went wrong with the file")
52
53# get token
54
55auth_url = "https://francecentral.api.cognitive.microsoft.com/sts/v1.0/issueToken"
56key = "be5f4c47488e4d349dbb06b527492c7c" 
57
58auth_headers = { 
59    "Ocp-Apim-Subscription-Key": key, 
60    "Content-Length": "0", 
61    "Content-type": "application/x-www-form-urlencoded" 
62    }
63
64auth_response = requests.post(auth_url, headers=auth_headers)
65token = auth_response.text #thats a bearer 
66
67
68# get voice list
69
70lang_url = "https://francecentral.tts.speech.microsoft.com/cognitiveservices/voices/list"
71
72lang_headers= { 
73    "Authorization": "Bearer " + token,
74    }
75
76lang_response = requests.get(lang_url, headers=lang_headers)
77json_langs = lang_response.json()
78
79# choose the right one somehow 
80
81print(detect(full_text))
82lang_of_text = detect(full_text)
83for voice in json_langs:
84    locale = json_extract(voice,'Locale')
85    string_l = ''.join(locale)
86    if (string_l.find(lang_of_text) != -1):
87        index = json_langs.index(voice)
88print(json_langs[index])
89
90needed_voice = json_langs[index]
91
92api_url = "https://francecentral.tts.speech.microsoft.com/cognitiveservices/v1"
93
94api_headers = {
95    "Authorization": "Bearer " + token,
96    "X-Microsoft-OutputFormat": "raw-16khz-16bit-mono-pcm",
97    "Content-Type": "application/ssml+xml"
98    }
99
100# make data about all that in xml 
101
102lang = json_extract(needed_voice, 'Locale')
103string_l = ''.join(lang)
104gender = json_extract(needed_voice, 'Gender')
105string_g = ''.join(gender)
106name = json_extract(needed_voice, 'ShortName')
107string_n = ''.join(name)
108
109
110final_xml = "<speak version='1.0' xml:lang='" + string_l + "'><voice xml:lang='" + string_l + "' xml:gender='" + string_g + "' name='" + string_n + "'>" + full_text + "</voice></speak>"
111print(final_xml)
112
113# make a request to api. response is binary; file is in mp3 or whatever
114
115request = requests.post(api_url, headers = api_headers, data = final_xml)
116response = request.content
117
118wav_file = user_file + ".wav"
119
120s = io.BytesIO(response)
121audio = AudioSegment.from_raw(s, sample_width=2, frame_rate=16000, channels=1).export(wav_file, format='wav')
122
123# additional work coming soon 
124