· 5 years ago · Aug 12, 2020, 11:26 AM
1import xml.etree.ElementTree as XmlElementTree
2import httplib2
3import uuid
4from config import ***
5
6***_HOST = '***'
7***_PATH = '/***_xml'
8CHUNK_SIZE = 1024 ** 2
9
10
11def speech_to_text(filename=None, bytes=None, request_id=uuid.uuid4().hex, topic='notes', lang='ru-RU',
12 key=***_API_KEY):
13 '''Translate speech from file to text
14
15 Keyword arguments:
16 filename -- audio file with speech
17 bytes -- speech audio in bytes in pcm 16000 GHz
18 request_id -- uuid for connection to converting api
19 topic -- speech topic
20 lang -- speech language
21
22 Raised exceptions:
23 Exception -- raised when neither file name nor bytes provided
24 SpeechException -- raised when error in audio to text converting
25 '''
26 if filename:
27 with open(filename, 'br') as file:
28 bytes = file.read()
29 if not bytes:
30 raise Exception('Neither file name nor bytes provided.')
31 bytes = convert_to_pcm16b16000r(in_bytes=bytes)
32 url = ***_PATH + '?uuid=%s&key=%s&topic=%s&lang=%s' % (
33 request_id,
34 key,
35 topic,
36 lang
37 )
38 chunks = read_chunks(CHUNK_SIZE, bytes) # splitting bytes to processing chunks
39
40 # openning connection to api for sending chunks to converting api
41 connection = httplib2.HTTPConnectionWithTimeout(***_HOST)
42 connection.connect()
43 connection.putrequest('POST', url)
44 connection.putheader('Transfer-Encoding', 'chunked')
45 connection.putheader('Content-Type', 'audio/x-pcm;bit=16;rate=16000')
46 connection.endheaders()
47
48 # sending chunks to api
49 for chunk in chunks:
50 connection.send(('%s\r\n' % hex(len(chunk))[2:]).encode())
51 connection.send(chunk)
52 connection.send('\r\n'.encode())
53 connection.send('0\r\n\r\n'.encode())
54
55 response = connection.getresponse()
56 if response.code == 200:
57 response_text = response.read()
58 xml = XmlElementTree.fromstring(response_text)
59
60 if int(xml.attrib['success']) == 1:
61 max_confidence = - float("inf")
62 text = ''
63
64 # searching text with largest confidence
65 for child in xml:
66 if float(child.attrib['confidence']) > max_confidence:
67 text = child.text
68 max_confidence = float(child.attrib['confidence'])
69
70 # if no text in response raise SpeechException
71 if max_confidence != - float("inf"):
72 return text
73 else:
74 raise SpeechException('No text found.\n\nResponse:\n%s' % (response_text))
75 else:
76 raise SpeechException('No text found.\n\nResponse:\n%s' % (response_text))
77 else:
78 # raise SpeechException because of api internal error
79 raise SpeechException('Unknown error.\nCode: %s\n\n%s' % (response.code, response.read()))
80
81сlass SpeechException(Exception):
82 pass
83
84
85