· 4 years ago · May 16, 2021, 08:20 PM
1#! /usr/bin/env python
2
3# v. 2.10
4# Interactive subtitles for `mpv` for language learners.
5
6import os, subprocess, sys
7import random, re, time
8import requests
9import threading, queue
10import calendar, math, base64
11import numpy
12import ast
13
14from bs4 import BeautifulSoup
15
16from urllib.parse import quote
17from json import loads
18
19import warnings
20from six.moves import urllib
21
22from PyQt5.QtCore import Qt, QThread, QObject, pyqtSignal, pyqtSlot, QSize
23from PyQt5.QtWidgets import QApplication, QFrame, QVBoxLayout, QHBoxLayout, QLabel, QSizePolicy, QWidget
24from PyQt5.QtGui import QPalette, QPaintEvent, QPainter, QPainterPath, QFont, QFontMetrics, QColor, QPen, QBrush
25
26pth = os.path.expanduser('~/.config/mpv/scripts/')
27os.chdir(pth)
28import interSubs2_config as config
29
30pons_combos = ['enes', 'enfr', 'deen', 'enpl', 'ensl', 'defr', 'dees', 'deru', 'depl', 'desl', 'deit', 'dept', 'detr', 'deel', 'dela', 'espl', 'frpl', 'itpl', 'plru', 'essl', 'frsl', 'itsl', 'enit', 'enpt', 'enru', 'espt', 'esfr', 'delb', 'dezh', 'enzh', 'eszh', 'frzh', 'denl', 'arde', 'aren', 'dade', 'csde', 'dehu', 'deno', 'desv', 'dede', 'dedx']
31
32# returns ([[word, translation]..], [morphology = '', gender = ''])
33# pons.com
34def pons(word):
35 if config.lang_from + config.lang_to in pons_combos:
36 url = 'http://en.pons.com/translate?q=%s&l=%s%s&in=%s' % (quote(word), config.lang_from, config.lang_to, config.lang_from)
37 else:
38 url = 'http://en.pons.com/translate?q=%s&l=%s%s&in=%s' % (quote(word), config.lang_to, config.lang_from, config.lang_from)
39
40 pairs = []
41 fname = 'urls/' + url.replace('/', "-")
42 try:
43 p = open(fname).read().split('=====/////-----')
44 try:
45 word_descr = p[1].strip()
46 except:
47 word_descr = ''
48
49 if len(p[0].strip()):
50 for pi in p[0].strip().split('\n\n'):
51 pi = pi.split('\n')
52 pairs.append([pi[0], pi[1]])
53 except:
54 p = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36'}).text
55
56 soup = BeautifulSoup(p, "lxml")
57 trs = soup.find_all('dl')
58
59 for tr in trs:
60 try:
61 tr1 = tr.find('dt').find('div', class_="source").get_text()
62 tr1 = re.sub('\n|\r|\t', ' ', tr1)
63 tr1 = re.sub(' +', ' ', tr1).strip()
64 if not len(tr1):
65 tr1 = '-'
66
67 tr2 = tr.find('dd').find('div', class_="target").get_text()
68 tr2 = re.sub('\n|\r|\t', ' ', tr2)
69 tr2 = re.sub(' +', ' ', tr2).strip()
70 if not len(tr2):
71 tr2 = '-'
72 except:
73 continue
74
75 pairs.append([tr1, tr2])
76
77 if config.number_of_translations_to_save and len(pairs) > config.number_of_translations_to_save:
78 break
79
80 try:
81 word_descr = soup.find_all('h2', class_='')
82 if '<i class="icon-bolt">' not in str(word_descr[0]):
83 word_descr = re.sub('\n|\r|\t', ' ', word_descr[0].get_text())
84 word_descr = re.sub(' +', ' ', word_descr).replace('<', '<').replace('>', '>').replace(' · ', '·').replace(' , ', ', ').strip()
85 else:
86 word_descr = ''
87 except:
88 word_descr = ''
89
90 # extra check against double-writing from rouge threads
91 if not os.path.isfile(fname):
92 print('\n\n'.join(e[0] + '\n' + e[1] for e in pairs), file=open(fname, 'a'))
93 print('\n'+'=====/////-----'+'\n', file=open(fname, 'a'))
94 print(word_descr, file=open(fname, 'a'))
95
96 if len(word_descr):
97 if word_descr.split(' ')[-1] == 'm':
98 word_descr_gen = [word_descr[:-2], 'm']
99 elif word_descr.split(' ')[-1] == 'f':
100 word_descr_gen = [word_descr[:-2], 'f']
101 elif word_descr.split(' ')[-1] == 'nt':
102 word_descr_gen = [word_descr[:-3], 'nt']
103 else:
104 word_descr_gen = [word_descr, '']
105 else:
106 word_descr_gen = ['', '']
107
108 return pairs, word_descr_gen
109
110# https://github.com/ssut/py-googletrans
111class TokenAcquirer_DISABLED:
112 """Google Translate API token generator
113
114 translate.google.com uses a token to authorize the requests. If you are
115 not Google, you do have this token and will have to pay for use.
116 This class is the result of reverse engineering on the obfuscated and
117 minified code used by Google to generate such token.
118
119 The token is based on a seed which is updated once per hour and on the
120 text that will be translated.
121 Both are combined - by some strange math - in order to generate a final
122 token (e.g. 744915.856682) which is used by the API to validate the
123 request.
124
125 This operation will cause an additional request to get an initial
126 token from translate.google.com.
127
128 Example usage:
129 >>> from googletrans.gtoken import TokenAcquirer
130 >>> acquirer = TokenAcquirer()
131 >>> text = 'test'
132 >>> tk = acquirer.do(text)
133 >>> tk
134 950629.577246
135 """
136 import httpx
137 def rshift(self, val, n):
138 """python port for '>>>'(right shift with padding)
139 """
140 return (val % 0x100000000) >> n
141
142 RE_TKK = re.compile(r'tkk:\'(.+?)\'', re.DOTALL)
143 RE_RAWTKK = re.compile(r'tkk:\'(.+?)\'', re.DOTALL)
144
145 def __init__(self, client = httpx, tkk='0', host='translate.googleapis.com'):
146 self.client = client
147 self.tkk = tkk
148 self.host = host if 'http' in host else 'http://' + host
149
150 def _update(self):
151 """update tkk
152 """
153 # we don't need to update the base TKK value when it is still valid
154 now = math.floor(int(time.time() * 1000) / 3600000.0)
155 if self.tkk and int(self.tkk.split('.')[0]) == now:
156 return
157
158 r = self.client.get(self.host)
159
160 raw_tkk = self.RE_TKK.search(r.text)
161 if raw_tkk:
162 self.tkk = raw_tkk.group(1)
163 return
164
165 try:
166 # this will be the same as python code after stripping out a reserved word 'var'
167 code = self.RE_TKK.search(r.text).group(1).replace('var ', '')
168 # unescape special ascii characters such like a \x3d(=)
169 code = code.encode().decode('unicode-escape')
170 except AttributeError:
171 raise Exception('Could not find TKK token for this request.\nSee https://github.com/ssut/py-googletrans/issues/234 for more details.')
172 except:
173 raise
174
175 if code:
176 tree = ast.parse(code)
177 visit_return = False
178 operator = '+'
179 n, keys = 0, dict(a=0, b=0)
180 for node in ast.walk(tree):
181 if isinstance(node, ast.Assign):
182 name = node.targets[0].id
183 if name in keys:
184 if isinstance(node.value, ast.Num):
185 keys[name] = node.value.n
186 # the value can sometimes be negative
187 elif isinstance(node.value, ast.UnaryOp) and \
188 isinstance(node.value.op, ast.USub): # pragma: nocover
189 keys[name] = -node.value.operand.n
190 elif isinstance(node, ast.Return):
191 # parameters should be set after this point
192 visit_return = True
193 elif visit_return and isinstance(node, ast.Num):
194 n = node.n
195 elif visit_return and n > 0:
196 # the default operator is '+' but implement some more for
197 # all possible scenarios
198 if isinstance(node, ast.Add): # pragma: nocover
199 pass
200 elif isinstance(node, ast.Sub): # pragma: nocover
201 operator = '-'
202 elif isinstance(node, ast.Mult): # pragma: nocover
203 operator = '*'
204 elif isinstance(node, ast.Pow): # pragma: nocover
205 operator = '**'
206 elif isinstance(node, ast.BitXor): # pragma: nocover
207 operator = '^'
208 # a safety way to avoid Exceptions
209 clause = compile('{1}{0}{2}'.format(
210 operator, keys['a'], keys['b']), '', 'eval')
211 value = eval(clause, dict(__builtin__={}))
212 result = '{}.{}'.format(n, value)
213
214 self.tkk = result
215
216 def _lazy(self, value):
217 """like lazy evaluation, this method returns a lambda function that
218 returns value given.
219 We won't be needing this because this seems to have been built for
220 code obfuscation.
221
222 the original code of this method is as follows:
223
224 ... code-block: javascript
225
226 var ek = function(a) {
227 return function() {
228 return a;
229 };
230 }
231 """
232 return lambda: value
233
234 def _xr(self, a, b):
235 size_b = len(b)
236 c = 0
237 while c < size_b - 2:
238 d = b[c + 2]
239 d = ord(d[0]) - 87 if 'a' <= d else int(d)
240 d = rshift(a, d) if '+' == b[c + 1] else a << d
241 a = a + d & 4294967295 if '+' == b[c] else a ^ d
242
243 c += 3
244 return a
245
246 def acquire(self, text):
247 a = []
248 # Convert text to ints
249 for i in text:
250 val = ord(i)
251 if val < 0x10000:
252 a += [val]
253 else:
254 # Python doesn't natively use Unicode surrogates, so account for those
255 a += [
256 math.floor((val - 0x10000) / 0x400 + 0xD800),
257 math.floor((val - 0x10000) % 0x400 + 0xDC00)
258 ]
259
260 b = self.tkk if self.tkk != '0' else ''
261 d = b.split('.')
262 b = int(d[0]) if len(d) > 1 else 0
263
264 # assume e means char code array
265 e = []
266 g = 0
267 size = len(a)
268 while g < size:
269 l = a[g]
270 # just append if l is less than 128(ascii: DEL)
271 if l < 128:
272 e.append(l)
273 # append calculated value if l is less than 2048
274 else:
275 if l < 2048:
276 e.append(l >> 6 | 192)
277 else:
278 # append calculated value if l matches special condition
279 if (l & 64512) == 55296 and g + 1 < size and \
280 a[g + 1] & 64512 == 56320:
281 g += 1
282 l = 65536 + ((l & 1023) << 10) + (a[g] & 1023) # This bracket is important
283 e.append(l >> 18 | 240)
284 e.append(l >> 12 & 63 | 128)
285 else:
286 e.append(l >> 12 | 224)
287 e.append(l >> 6 & 63 | 128)
288 e.append(l & 63 | 128)
289 g += 1
290 a = b
291 for i, value in enumerate(e):
292 a += value
293 a = self._xr(a, '+-a^+6')
294 a = self._xr(a, '+-3^+b+-f')
295 a ^= int(d[1]) if len(d) > 1 else 0
296 if a < 0: # pragma: nocover
297 a = (a & 2147483647) + 2147483648
298 a %= 1000000 # int(1E6)
299
300 return '{}.{}'.format(a, a ^ b)
301
302 def do(self, text):
303 self._update()
304 tk = self.acquire(text)
305 return tk
306
307# https://github.com/Saravananslb/py-googletranslation
308class TokenAcquirer:
309 """Google Translate API token generator
310
311 translate.google.com uses a token to authorize the requests. If you are
312 not Google, you do have this token and will have to pay for use.
313 This class is the result of reverse engineering on the obfuscated and
314 minified code used by Google to generate such token.
315
316 The token is based on a seed which is updated once per hour and on the
317 text that will be translated.
318 Both are combined - by some strange math - in order to generate a final
319 token (e.g. 464393.115905) which is used by the API to validate the
320 request.
321
322 This operation will cause an additional request to get an initial
323 token from translate.google.com.
324
325 Example usage:
326 >>> from pygoogletranslation.gauthtoken import TokenAcquirer
327 >>> acquirer = TokenAcquirer()
328 >>> text = 'test'
329 >>> tk = acquirer.do(text)
330 >>> tk
331 464393.115905
332 """
333
334 def __init__(self, tkk='0', tkk_url='https://translate.google.com/translate_a/element.js', proxies=None):
335
336 if proxies is not None:
337 self.proxies = proxies
338 else:
339 self.proxies = None
340
341 r = requests.get(tkk_url, proxies=self.proxies)
342
343 if r.status_code == 200:
344 re_tkk = re.search("(?<=tkk=\\')[0-9.]{0,}", str(r.content.decode("utf-8")))
345 if re_tkk:
346 self.tkk = re_tkk.group(0)
347 else:
348 self.tkk = '0'
349 else:
350 self.tkk = '0'
351
352
353 def _xr(self, a, b):
354 size_b = len(b)
355 c = 0
356 while c < size_b - 2:
357 d = b[c + 2]
358 d = ord(d[0]) - 87 if 'a' <= d else int(d)
359 d = self.rshift(a, d) if '+' == b[c + 1] else a << d
360 a = a + d & 4294967295 if '+' == b[c] else a ^ d
361
362 c += 3
363 return a
364
365 def acquire(self, text):
366 a = []
367 # Convert text to ints
368 for i in text:
369 val = ord(i)
370 if val < 0x10000:
371 a += [val]
372 else:
373 # Python doesn't natively use Unicode surrogates, so account for those
374 a += [
375 math.floor((val - 0x10000) / 0x400 + 0xD800),
376 math.floor((val - 0x10000) % 0x400 + 0xDC00)
377 ]
378
379 b = self.tkk
380 d = b.split('.')
381 b = int(d[0]) if len(d) > 1 else 0
382
383 # assume e means char code array
384 e = []
385 g = 0
386 size = len(a)
387 while g < size:
388 l = a[g]
389 # just append if l is less than 128(ascii: DEL)
390 if l < 128:
391 e.append(l)
392 # append calculated value if l is less than 2048
393 else:
394 if l < 2048:
395 e.append(l >> 6 | 192)
396 else:
397 # append calculated value if l matches special condition
398 if (l & 64512) == 55296 and g + 1 < size and \
399 a[g + 1] & 64512 == 56320:
400 g += 1
401 l = 65536 + ((l & 1023) << 10) + (a[g] & 1023) # This bracket is important
402 e.append(l >> 18 | 240)
403 e.append(l >> 12 & 63 | 128)
404 else:
405 e.append(l >> 12 | 224)
406 e.append(l >> 6 & 63 | 128)
407 e.append(l & 63 | 128)
408 g += 1
409 a = b
410 for i, value in enumerate(e):
411 a += value
412 a = self._xr(a, '+-a^+6')
413 a = self._xr(a, '+-3^+b+-f')
414 a ^= int(d[1]) if len(d) > 1 else 0
415 if a < 0: # pragma: nocover
416 a = (a & 2147483647) + 2147483648
417 a %= 1000000 # int(1E6)
418 return '{}.{}'.format(a, a ^ b)
419
420 def do(self, text):
421 tk = self.acquire(text)
422 return tk
423
424
425 def rshift(self, val, n):
426 """python port for '>>>'(right shift with padding)
427 """
428 return (val % 0x100000000) >> n
429
430# translate.google.com
431def google(word):
432 word = word.replace('\n', ' ').strip()
433 url = 'https://translate.google.com/translate_a/single?client=t&sl={lang_from}&tl={lang_to}&hl={lang_to}&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&otf=1&pc=1&ssel=3&tsel=3&kc=2&q={word}'.format(lang_from = config.lang_from, lang_to = config.lang_to, word = quote(word))
434
435 pairs = []
436 fname = 'urls/' + url.replace('/', "-")
437 try:
438 if ' ' in word:
439 raise Exception('skip saving')
440
441 p = open(fname).read().split('=====/////-----')
442 try:
443 word_descr = p[1].strip()
444 except:
445 word_descr = ''
446
447 for pi in p[0].strip().split('\n\n'):
448 pi = pi.split('\n')
449 pairs.append([pi[0], pi[1]])
450 except:
451 acquirer = TokenAcquirer()
452 tk = acquirer.do(word)
453
454 url = '{url}&tk={tk}'.format(url = url, tk = tk)
455 p = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36'}).text
456 p = loads(p)
457
458 try:
459 pairs.append([p[0][0][0], p[0][0][1]])
460 except:
461 pass
462
463 if p[1] != None:
464 for translations in p[1]:
465 for translation in translations[2]:
466 try:
467 t1 = translation[5] + ' ' + translation[0]
468 except:
469 t1 = translation[0]
470
471 t2 = ', '.join(translation[1])
472
473 if not len(t1):
474 t1 = '-'
475 if not len(t2):
476 t2 = '-'
477
478 pairs.append([t1, t2])
479
480 word_descr = ''
481 # extra check against double-writing from rouge threads
482 if ' ' not in word and not os.path.isfile(fname):
483 print('\n\n'.join(e[0] + '\n' + e[1] for e in pairs), file=open(fname, 'a'))
484 print('\n'+'=====/////-----'+'\n', file=open(fname, 'a'))
485 print(word_descr, file=open(fname, 'a'))
486
487 return pairs, ['', '']
488
489# reverso.net
490def reverso(word):
491 reverso_combos = {'ar':'Arabic', 'de':'German', 'en':'English', 'es':'Spanish', 'fr':'French', 'he':'Hebrew', 'it':'Italian', 'nl':'Dutch', 'pl':'Polish', 'pt':'Portuguese', 'ro':'Romanian', 'ru':'Russian'}
492
493 if not config.lang_from in reverso_combos and not config.lang_to in reverso_combos:
494 return [['Language code is not correct.', '']], ['', '']
495
496 url = 'http://context.reverso.net/translation/%s-%s/%s' % (reverso_combos[config.lang_from].lower(), reverso_combos[config.lang_to].lower(), quote(word))
497
498 pairs = []
499 fname = 'urls/' + url.replace('/', "-")
500 try:
501 p = open(fname).read().split('=====/////-----')
502
503 if len(p[0].strip()):
504 for pi in p[0].strip().split('\n\n'):
505 pi = pi.split('\n')
506 pairs.append([pi[0], pi[1]])
507 except:
508 p = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36'}).text
509
510 soup = BeautifulSoup(p, "lxml")
511 trs = soup.find_all(class_ = re.compile('translation.*ltr.*'))
512 exmpls = soup.find_all(class_ = 'example')
513
514 tr_combined = []
515 for tr in trs:
516 tr_combined.append(tr.get_text().strip().replace('\n', ' '))
517
518 if len(tr_combined) == 4:
519 pairs.append(['-', ' :: '.join(tr_combined)])
520 tr_combined = []
521
522 for exmpl in exmpls:
523 pairs.append([x.strip() for x in exmpl.get_text().split('\n') if len(x.strip())])
524
525 # extra check against double-writing from rouge threads
526 if not os.path.isfile(fname):
527 print('\n\n'.join(e[0] + '\n' + e[1] for e in pairs), file=open(fname, 'a'))
528 print('\n'+'=====/////-----'+'\n', file=open(fname, 'a'))
529
530 return pairs, ['', '']
531
532# linguee.com (unfinished; site blocks frequent requests)
533def linguee(word):
534 url = 'https://www.linguee.com/german-english/search?source=german&query=%s' % quote(word)
535
536 pairs = []
537 fname = 'urls/' + url.replace('/', "-")
538 try:
539 p = open(fname).read().split('=====/////-----')
540 try:
541 word_descr = p[1].strip()
542 except:
543 word_descr = ''
544
545 for pi in p[0].strip().split('\n\n'):
546 pi = pi.split('\n')
547 pairs.append([pi[0], pi[1]])
548 except:
549 #p = open('/home/lom/d/1.html', encoding="ISO-8859-15").read()
550 p = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36'}).text
551
552 soup = BeautifulSoup(p, "lxml")
553 trs = soup.find_all('div', class_="lemma featured")
554
555 for tr in trs:
556 pairs.append([tr.find_all('a')[0].get_text(), '-'])
557 for tr2 in tr.find_all('a')[1:]:
558 if len(tr2.get_text()):
559 #print(tr2.get_text())
560 pairs.append(['-', tr2.get_text()])
561 word_descr = ''
562
563 # extra check against double-writing from rouge threads
564 if not os.path.isfile(fname):
565 print('\n\n'.join(e[0] + '\n' + e[1] for e in pairs), file=open(fname, 'a'))
566 print('\n'+'=====/////-----'+'\n', file=open(fname, 'a'))
567 print(word_descr, file=open(fname, 'a'))
568
569 return pairs, ['', '']
570
571# dict.cc
572def dict_cc(word):
573 url = 'https://%s-%s.dict.cc/?s=%s' % (config.lang_from, config.lang_to, quote(word))
574
575 pairs = []
576 fname = 'urls/' + url.replace('/', "-")
577 try:
578 p = open(fname).read().split('=====/////-----')
579 try:
580 word_descr = p[1].strip()
581 except:
582 word_descr = ''
583
584 if len(p[0].strip()):
585 for pi in p[0].strip().split('\n\n'):
586 pi = pi.split('\n')
587 pairs.append([pi[0], pi[1]])
588 except:
589 p = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36'}).text
590
591 p = re.sub('<div style="float:right;color:#999">\d*</div>', '', p)
592 p = re.sub('<span style="color:#666;font-size:10px;padding:0 2px;position:relative;top:-3px">\d*</span>', '', p)
593
594 soup = BeautifulSoup(p, "lxml")
595 trs = soup.find_all('tr', id = re.compile('tr\d*'))
596
597 for tr in trs:
598 tr2 = tr.find_all('td', class_ = 'td7nl')
599 pairs.append([tr2[1].get_text(), tr2[0].get_text()])
600
601 if config.number_of_translations_to_save and len(pairs) > config.number_of_translations_to_save:
602 break
603
604 word_descr = ''
605
606 # extra check against double-writing from rouge threads
607 if not os.path.isfile(fname):
608 print('\n\n'.join(e[0] + '\n' + e[1] for e in pairs), file=open(fname, 'a'))
609 print('\n'+'=====/////-----'+'\n', file=open(fname, 'a'))
610 print(word_descr, file=open(fname, 'a'))
611
612 return pairs, ['', '']
613
614# redensarten-index.de
615def redensarten(word):
616 if len(word) < 3:
617 return [], ['', '']
618
619 url = 'https://www.redensarten-index.de/suche.php?suchbegriff=' + quote(word) + '&bool=relevanz&gawoe=an&suchspalte%5B%5D=rart_ou&suchspalte%5B%5D=rart_varianten_ou&suchspalte%5B%5D=erl_ou&suchspalte%5B%5D=erg_ou'
620
621 pairs = []
622 fname = 'urls/' + url.replace('/', "-")
623 try:
624 p = open(fname).read().split('=====/////-----')
625 try:
626 word_descr = p[1].strip()
627 except:
628 word_descr = ''
629
630 if len(p[0].strip()):
631 for pi in p[0].strip().split('\n\n'):
632 pi = pi.split('\n')
633 pairs.append([pi[0], pi[1]])
634 except:
635 p = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36'})
636 p.encoding = 'utf-8'
637 p = p.text
638
639 soup = BeautifulSoup(p, "lxml")
640
641 for a in soup.find_all('a', class_ = 'autosyn-icon'):
642 a.decompose()
643
644 try:
645 table = soup.find_all('table', id = 'tabelle')[0]
646 trs = table.find_all('tr')
647
648 for tr in trs[1:]:
649 tds = tr.find_all('td')
650 if len(tds) > 1:
651 pairs.append([ re.sub(' +', ' ', tds[0].get_text()).strip(), re.sub(' +', ' ', tds[1].get_text()).strip() ])
652 except:
653 pass
654
655 word_descr = ''
656
657 # extra check against double-writing from rouge threads
658 if not os.path.isfile(fname):
659 print('\n\n'.join(e[0] + '\n' + e[1] for e in pairs), file=open(fname, 'a'))
660 print('\n'+'=====/////-----'+'\n', file=open(fname, 'a'))
661 print(word_descr, file=open(fname, 'a'))
662
663 return pairs, ['', '']
664
665# leo.org
666def leo(word):
667 language = config.lang_from if config.lang_from != 'de' else config.lang_to
668
669 url = "https://dict.leo.org/dictQuery/m-vocab/%sde/query.xml?tolerMode=nof&rmWords=off&rmSearch=on&searchLoc=0&resultOrder=basic&multiwordShowSingle=on&lang=de&search=%s" % (language, word)
670
671 pairs = []
672 fname = 'urls/' + url.replace('/', "-")
673 try:
674 p = open(fname).read().split('=====/////-----')
675 try:
676 word_descr = p[1].strip()
677 except:
678 word_descr = ''
679
680 if len(p[0].strip()):
681 for pi in p[0].strip().split('\n\n'):
682 pi = pi.split('\n')
683 pairs.append([pi[0], pi[1]])
684 except:
685 req = requests.get(url.format(lang=language))
686
687 content = BeautifulSoup(req.text, "xml")
688 pairs = []
689 for section in content.sectionlist.findAll('section'):
690 if int(section['sctCount']):
691 for entry in section.findAll('entry'):
692 res0 = entry.find('side', attrs = {'hc' : '0'})
693 res1 = entry.find('side', attrs = {'hc' : '1'})
694 if res0 and res1:
695 line0 = re.sub('\s+', ' ', res0.repr.getText())
696 line1 = re.sub('\s+', ' ', res1.repr.getText())
697 line0 = line0.rstrip('|').strip()
698 line1 = line1.rstrip('|').strip()
699
700 if res0.attrs['lang'] == config.lang_from:
701 pairs.append([line0, line1])
702 else:
703 pairs.append([line1, line0])
704
705 word_descr = ''
706 # extra check against double-writing from rouge threads
707 if not os.path.isfile(fname):
708 print('\n\n'.join(e[0] + '\n' + e[1] for e in pairs), file=open(fname, 'a'))
709 print('\n'+'=====/////-----'+'\n', file=open(fname, 'a'))
710 print(word_descr, file=open(fname, 'a'))
711
712 return pairs, ['', '']
713
714# offline dictionary with word \t translation
715def tab_divided_dict(word):
716 if word in offdict:
717 tr = re.sub('<.*?>', '', offdict[word]) if config.tab_divided_dict_remove_tags_B else offdict[word]
718 tr = tr.replace('\\n', '\n').replace('\\~', '~')
719 return [[tr, '-']], ['', '']
720 else:
721 return [], ['', '']
722
723# morfix.co.il
724def morfix(word):
725
726 url = "http://www.morfix.co.il/en/%s" % quote(word)
727
728 pairs = []
729 fname = 'urls/' + url.replace('/', "-")
730 try:
731 p = open(fname).read().split('=====/////-----')
732 try:
733 word_descr = p[1].strip()
734 except:
735 word_descr = ''
736
737 if len(p[0].strip()):
738 for pi in p[0].strip().split('\n\n'):
739 pi = pi.split('\n')
740 pairs.append([pi[0], pi[1]])
741 except:
742 req = requests.get(url)
743 soup = BeautifulSoup(req.text, "lxml")
744 divs = soup.find_all('div', class_ = 'title_ph')
745
746 pairs = []
747 for div in divs:
748 he = div.find('div', class_ = re.compile('translation_he'))
749 he = re.sub('\s+', ' ', he.get_text()).strip()
750
751 en = div.find('div', class_ = re.compile('translation_en'))
752 en = re.sub('\s+', ' ', en.get_text()).strip()
753
754 if config.lang_from == 'he':
755 pairs.append([he, en])
756 else:
757 pairs.append([en, he])
758
759 word_descr = ''
760 # extra check against double-writing from rouge threads
761 if not os.path.isfile(fname):
762 print('\n\n'.join(e[0] + '\n' + e[1] for e in pairs), file=open(fname, 'a'))
763 print('\n'+'=====/////-----'+'\n', file=open(fname, 'a'))
764 print(word_descr, file=open(fname, 'a'))
765
766 return pairs, ['', '']
767
768# deepl.com
769# https://github.com/EmilioK97/pydeepl
770def deepl(text):
771 l1 = config.lang_from.upper()
772 l2 = config.lang_to.upper()
773
774 if len(text) > 5000:
775 return 'Text too long (limited to 5000 characters).'
776
777 parameters = {
778 'jsonrpc': '2.0',
779 'method': 'LMT_handle_jobs',
780 'params': {
781 'jobs': [
782 {
783 'kind':'default',
784 'raw_en_sentence': text
785 }
786 ],
787 'lang': {
788
789 'source_lang_user_selected': l1,
790 'target_lang': l2
791 }
792 }
793 }
794
795 response = requests.post('https://www2.deepl.com/jsonrpc', json=parameters).json()
796 if 'result' not in response:
797 return 'DeepL call resulted in a unknown result.'
798
799 translations = response['result']['translations']
800
801 if len(translations) == 0 \
802 or translations[0]['beams'] is None \
803 or translations[0]['beams'][0]['postprocessed_sentence'] is None:
804 return 'No translations found.'
805
806 return translations[0]['beams'][0]['postprocessed_sentence']
807
808def listen(word, type = 'gtts'):
809 if type == 'pons':
810 if config.lang_from + config.lang_to in pons_combos:
811 url = 'http://en.pons.com/translate?q=%s&l=%s%s&in=%s' % (quote(word), config.lang_from, config.lang_to, config.lang_from)
812 else:
813 url = 'http://en.pons.com/translate?q=%s&l=%s%s&in=%s' % (quote(word), config.lang_to, config.lang_from, config.lang_from)
814
815 p = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36'}).text
816 x = re.findall('<dl id="([a-zA-Z0-9]*?)" class="dl-horizontal kne(.*?)</dl>', p, re.DOTALL)
817 x2 = re.findall('class="audio tts trackable trk-audio" data-pons-lang="(.*?)"', x[0][1])
818
819 for l in x2:
820 if config.lang_from in l:
821 mp3 = 'http://sounds.pons.com/audio_tts/%s/%s' % (l, x[0][0])
822 break
823
824 os.system('(cd /tmp; wget ' + mp3 + '; mpv --load-scripts=no --loop=1 --volume=40 --force-window=no ' + mp3.split('/')[-1] + '; rm ' + mp3.split('/')[-1] + ') &')
825 elif type == 'gtts':
826 gTTS(text = word, lang = config.lang_from, slow = False).save('/tmp/gtts_word.mp3')
827 os.system('(mpv --load-scripts=no --loop=1 --volume=75 --force-window=no ' + '/tmp/gtts_word.mp3' + '; rm ' + '/tmp/gtts_word.mp3' + ') &')
828 elif type == 'forvo':
829 url = 'https://forvo.com/word/%s/%s/' % (config.lang_from, quote(word))
830
831 try:
832 data = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36'}).text
833
834 soup = BeautifulSoup(data, "lxml")
835 trs = soup.find_all('article', class_ = 'pronunciations')[0].find_all('span', class_ = 'play')
836
837 mp3s = ''
838 for tr in trs[:2]:
839 tr = tr['onclick']
840 tr = re.findall('Play\((.*?)\)', tr)[0]
841 tr = tr.split(',')[4].replace("'", '')
842 tr = base64.b64decode(tr)
843 tr = tr.decode("utf-8")
844
845 mp3s += 'mpv --load-scripts=no --loop=1 --volume=111 --force-window=no https://audio00.forvo.com/audios/mp3/%s ; ' % tr
846 os.system('(%s) &' % mp3s)
847 except:
848 return
849
850# https://github.com/Boudewijn26/gTTS-token
851class Token:
852 """ Token (Google Translate Token)
853 Generate the current token key and allows generation of tokens (tk) with it
854 Python version of `token-script.js` itself from translate.google.com
855 """
856
857 SALT_1 = "+-a^+6"
858 SALT_2 = "+-3^+b+-f"
859
860 def __init__(self):
861 self.token_key = None
862
863 def calculate_token(self, text, seed=None):
864 """ Calculate the request token (`tk`) of a string
865 :param text: str The text to calculate a token for
866 :param seed: str The seed to use. By default this is the number of hours since epoch
867 """
868
869 if seed is None:
870 seed = self._get_token_key()
871
872 [first_seed, second_seed] = seed.split(".")
873
874 try:
875 d = bytearray(text.encode('UTF-8'))
876 except UnicodeDecodeError:
877 # This will probably only occur when d is actually a str containing UTF-8 chars, which means we don't need
878 # to encode.
879 d = bytearray(text)
880
881 a = int(first_seed)
882 for value in d:
883 a += value
884 a = self._work_token(a, self.SALT_1)
885 a = self._work_token(a, self.SALT_2)
886 a ^= int(second_seed)
887 if 0 > a:
888 a = (a & 2147483647) + 2147483648
889 a %= 1E6
890 a = int(a)
891 return str(a) + "." + str(a ^ int(first_seed))
892
893 def _get_token_key(self):
894 if self.token_key is not None:
895 return self.token_key
896
897 response = requests.get("https://translate.google.com/")
898 tkk_expr = re.search("(tkk:.*?),", response.text)
899 if not tkk_expr:
900 raise ValueError(
901 "Unable to find token seed! Did https://translate.google.com change?"
902 )
903
904 tkk_expr = tkk_expr.group(1)
905 try:
906 # Grab the token directly if already generated by function call
907 result = re.search("\d{6}\.[0-9]+", tkk_expr).group(0)
908 except AttributeError:
909 # Generate the token using algorithm
910 timestamp = calendar.timegm(time.gmtime())
911 hours = int(math.floor(timestamp / 3600))
912 a = re.search("a\\\\x3d(-?\d+);", tkk_expr).group(1)
913 b = re.search("b\\\\x3d(-?\d+);", tkk_expr).group(1)
914
915 result = str(hours) + "." + str(int(a) + int(b))
916
917 self.token_key = result
918 return result
919
920 """ Functions used by the token calculation algorithm """
921 def _rshift(self, val, n):
922 return val >> n if val >= 0 else (val + 0x100000000) >> n
923
924 def _work_token(self, a, seed):
925 for i in range(0, len(seed) - 2, 3):
926 char = seed[i + 2]
927 d = ord(char[0]) - 87 if char >= "a" else int(char)
928 d = self._rshift(a, d) if seed[i + 1] == "+" else a << d
929 a = a + d & 4294967295 if seed[i] == "+" else a ^ d
930 return a
931
932# https://github.com/pndurette/gTTS
933class gTTS:
934 """ gTTS (Google Text to Speech): an interface to Google's Text to Speech API """
935
936 # Google TTS API supports two read speeds
937 # (speed <= 0.3: slow; speed > 0.3: normal; default: 1)
938 class Speed:
939 SLOW = 0.3
940 NORMAL = 1
941
942 GOOGLE_TTS_URL = 'https://translate.google.com/translate_tts'
943 MAX_CHARS = 100 # Max characters the Google TTS API takes at a time
944 LANGUAGES = {
945 'af' : 'Afrikaans',
946 'sq' : 'Albanian',
947 'ar' : 'Arabic',
948 'hy' : 'Armenian',
949 'bn' : 'Bengali',
950 'ca' : 'Catalan',
951 'zh' : 'Chinese',
952 'zh-cn' : 'Chinese (Mandarin/China)',
953 'zh-tw' : 'Chinese (Mandarin/Taiwan)',
954 'zh-yue' : 'Chinese (Cantonese)',
955 'hr' : 'Croatian',
956 'cs' : 'Czech',
957 'da' : 'Danish',
958 'nl' : 'Dutch',
959 'en' : 'English',
960 'en-au' : 'English (Australia)',
961 'en-uk' : 'English (United Kingdom)',
962 'en-us' : 'English (United States)',
963 'eo' : 'Esperanto',
964 'fi' : 'Finnish',
965 'fr' : 'French',
966 'de' : 'German',
967 'el' : 'Greek',
968 'hi' : 'Hindi',
969 'hu' : 'Hungarian',
970 'is' : 'Icelandic',
971 'id' : 'Indonesian',
972 'it' : 'Italian',
973 'iw' : 'Hebrew',
974 'ja' : 'Japanese',
975 'km' : 'Khmer (Cambodian)',
976 'ko' : 'Korean',
977 'la' : 'Latin',
978 'lv' : 'Latvian',
979 'mk' : 'Macedonian',
980 'no' : 'Norwegian',
981 'pl' : 'Polish',
982 'pt' : 'Portuguese',
983 'ro' : 'Romanian',
984 'ru' : 'Russian',
985 'sr' : 'Serbian',
986 'si' : 'Sinhala',
987 'sk' : 'Slovak',
988 'es' : 'Spanish',
989 'es-es' : 'Spanish (Spain)',
990 'es-us' : 'Spanish (United States)',
991 'sw' : 'Swahili',
992 'sv' : 'Swedish',
993 'ta' : 'Tamil',
994 'th' : 'Thai',
995 'tr' : 'Turkish',
996 'uk' : 'Ukrainian',
997 'vi' : 'Vietnamese',
998 'cy' : 'Welsh'
999 }
1000
1001 def __init__(self, text, lang = 'en', slow = False, debug = False):
1002 self.debug = debug
1003 if lang.lower() not in self.LANGUAGES:
1004 raise Exception('Language not supported: %s' % lang)
1005 else:
1006 self.lang = lang.lower()
1007
1008 if not text:
1009 raise Exception('No text to speak')
1010 else:
1011 self.text = text
1012
1013 # Read speed
1014 if slow:
1015 self.speed = self.Speed().SLOW
1016 else:
1017 self.speed = self.Speed().NORMAL
1018
1019
1020 # Split text in parts
1021 if self._len(text) <= self.MAX_CHARS:
1022 text_parts = [text]
1023 else:
1024 text_parts = self._tokenize(text, self.MAX_CHARS)
1025
1026 # Clean
1027 def strip(x): return x.replace('\n', '').strip()
1028 text_parts = [strip(x) for x in text_parts]
1029 text_parts = [x for x in text_parts if len(x) > 0]
1030 self.text_parts = text_parts
1031
1032 # Google Translate token
1033 self.token = Token()
1034
1035 def save(self, savefile):
1036 """ Do the Web request and save to `savefile` """
1037 with open(savefile, 'wb') as f:
1038 self.write_to_fp(f)
1039
1040 def write_to_fp(self, fp):
1041 """ Do the Web request and save to a file-like object """
1042 for idx, part in enumerate(self.text_parts):
1043 payload = { 'ie' : 'UTF-8',
1044 'q' : part,
1045 'tl' : self.lang,
1046 'ttsspeed' : self.speed,
1047 'total' : len(self.text_parts),
1048 'idx' : idx,
1049 'client' : 'tw-ob',
1050 'textlen' : self._len(part),
1051 'tk' : self.token.calculate_token(part)}
1052 headers = {
1053 "Referer" : "http://translate.google.com/",
1054 "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36"
1055 }
1056 if self.debug: print(payload)
1057 try:
1058 # Disable requests' ssl verify to accomodate certain proxies and firewalls
1059 # Filter out urllib3's insecure warnings. We can live without ssl verify here
1060 with warnings.catch_warnings():
1061 warnings.filterwarnings("ignore", category=requests.packages.urllib3.exceptions.InsecureRequestWarning)
1062 r = requests.get(self.GOOGLE_TTS_URL,
1063 params=payload,
1064 headers=headers,
1065 proxies=urllib.request.getproxies(),
1066 verify=False)
1067 if self.debug:
1068 print("Headers: {}".format(r.request.headers))
1069 print("Request url: {}".format(r.request.url))
1070 print("Response: {}, Redirects: {}".format(r.status_code, r.history))
1071 r.raise_for_status()
1072 for chunk in r.iter_content(chunk_size=1024):
1073 fp.write(chunk)
1074 except Exception as e:
1075 raise
1076
1077 def _len(self, text):
1078 """ Get char len of `text`, after decoding if Python 2 """
1079 try:
1080 # Python 2
1081 return len(text.decode('utf8'))
1082 except AttributeError:
1083 # Python 3
1084 return len(text)
1085
1086 def _tokenize(self, text, max_size):
1087 """ Tokenizer on basic roman punctuation """
1088
1089 punc = "¡!()[]¿?.,;:—«»\n"
1090 punc_list = [re.escape(c) for c in punc]
1091 pattern = '|'.join(punc_list)
1092 parts = re.split(pattern, text)
1093
1094 min_parts = []
1095 for p in parts:
1096 min_parts += self._minimize(p, " ", max_size)
1097 return min_parts
1098
1099 def _minimize(self, thestring, delim, max_size):
1100 """ Recursive function that splits `thestring` in chunks
1101 of maximum `max_size` chars delimited by `delim`. Returns list. """
1102
1103 if self._len(thestring) > max_size:
1104 idx = thestring.rfind(delim, 0, max_size)
1105 return [thestring[:idx]] + self._minimize(thestring[idx:], delim, max_size)
1106 else:
1107 return [thestring]
1108
1109def mpv_pause():
1110 os.system('echo \'{ "command": ["set_property", "pause", true] }\' | socat - "' + mpv_socket + '" > /dev/null')
1111
1112def mpv_resume():
1113 os.system('echo \'{ "command": ["set_property", "pause", false] }\' | socat - "' + mpv_socket + '" > /dev/null')
1114
1115def mpv_pause_status():
1116 stdoutdata = subprocess.getoutput('echo \'{ "command": ["get_property", "pause"] }\' | socat - "' + mpv_socket + '"')
1117
1118 try:
1119 return loads(stdoutdata)['data']
1120 except:
1121 return mpv_pause_status()
1122
1123def mpv_fullscreen_status():
1124 stdoutdata = subprocess.getoutput('echo \'{ "command": ["get_property", "fullscreen"] }\' | socat - "' + mpv_socket + '"')
1125
1126 try:
1127 return loads(stdoutdata)['data']
1128 except:
1129 return mpv_fullscreen_status()
1130
1131def mpv_message(message, timeout = 3000):
1132 os.system('echo \'{ "command": ["show-text", "' + message + '", "' + str(timeout) + '"] }\' | socat - "' + mpv_socket + '" > /dev/null')
1133
1134def stripsd2(phrase):
1135 return ''.join(e for e in phrase.strip().lower() if e == ' ' or (e.isalnum() and not e.isdigit())).strip()
1136
1137def r2l(l):
1138 l2 = ''
1139
1140 try:
1141 l2 = re.findall('(?!%)\W+$', l)[0][::-1]
1142 except:
1143 pass
1144
1145 l2 += re.sub('^\W+|(?!%)\W+$', '', l)
1146
1147 try:
1148 l2 += re.findall('^\W+', l)[0][::-1]
1149 except:
1150 pass
1151
1152 return l2
1153
1154def split_long_lines(line, chunks = 2, max_symbols_per_line = False):
1155 if max_symbols_per_line:
1156 chunks = 0
1157 while 1:
1158 chunks += 1
1159 new_lines = []
1160 for i in range(chunks):
1161 new_line = ' '.join(numpy.array_split(line.split(' '), chunks)[i])
1162 new_lines.append(new_line)
1163
1164 if len(max(new_lines, key = len)) <= max_symbols_per_line:
1165 return '\n'.join(new_lines)
1166 else:
1167 new_lines = []
1168 for i in range(chunks):
1169 new_line = ' '.join(numpy.array_split(line.split(' '), chunks)[i])
1170 new_lines.append(new_line)
1171
1172 return '\n'.join(new_lines)
1173
1174def dir2(name):
1175 print('\n'.join(dir( name )))
1176 exit()
1177
1178class thread_subtitles(QObject):
1179 update_subtitles = pyqtSignal(bool, bool)
1180
1181 @pyqtSlot()
1182 def main(self):
1183 global subs
1184
1185 was_hidden = 0
1186 inc = 0
1187 auto_pause_2_ind = 0
1188 last_updated = time.time()
1189
1190 while 1:
1191 time.sleep(config.update_time)
1192
1193 try:
1194 tmp_file_subs = open(sub_file).read()
1195 except:
1196 continue
1197
1198 # tmp hack
1199 # if config.R2L_from_B:
1200 # tmp_file_subs = r2l(tmp_file_subs.strip())
1201
1202 if config.extend_subs_duration2max_B and not len(tmp_file_subs):
1203 if not config.extend_subs_duration_limit_sec:
1204 continue
1205 if config.extend_subs_duration_limit_sec > time.time() - last_updated:
1206 continue
1207
1208 last_updated = time.time()
1209
1210 # automatically switch into Hebrew if it's detected
1211 if config.lang_from != 'he' and config.lang_from != 'iw' and any((c in set('קראטוןםפשדגכעיחלךףזסבהנמצתץ')) for c in tmp_file_subs):
1212 config.lang_from = 'he'
1213
1214 frf = random.choice(config.he_fonts)
1215 config.style_subs = re.sub('font-family: ".*?";', lambda ff: 'font-family: "%s";' % frf, config.style_subs, flags = re.I)
1216
1217 config.R2L_from_B = True
1218 config.translation_function_names = config.translation_function_names_2
1219 config.listen_via = 'forvo'
1220
1221 os.system('notify-send -i none -t 1111 "He"')
1222 os.system('notify-send -i none -t 1111 "%s"' % str(frf))
1223
1224 self.update_subtitles.emit(False, True)
1225
1226 while tmp_file_subs != subs:
1227 if config.auto_pause == 2:
1228 if not auto_pause_2_ind and len(re.sub(' +', ' ', stripsd2(subs.replace('\n', ' '))).split(' ')) > config.auto_pause_min_words - 1 and not mpv_pause_status():
1229 mpv_pause()
1230 auto_pause_2_ind = 1
1231
1232 if auto_pause_2_ind and mpv_pause_status():
1233 break
1234
1235 auto_pause_2_ind = 0
1236
1237 subs = tmp_file_subs
1238
1239 if config.auto_pause == 1:
1240 if len(re.sub(' +', ' ', stripsd2(subs.replace('\n', ' '))).split(' ')) > config.auto_pause_min_words - 1:
1241 mpv_pause()
1242
1243 self.update_subtitles.emit(False, False)
1244
1245 break
1246
1247class thread_translations(QObject):
1248 get_translations = pyqtSignal(str, int, bool)
1249
1250 @pyqtSlot()
1251 def main(self):
1252 while 1:
1253 to_new_word = False
1254
1255 try:
1256 word, globalX = config.queue_to_translate.get(False)
1257 except:
1258 time.sleep(config.update_time)
1259 continue
1260
1261 # changing cursor to hourglass during translation
1262
1263 threads = []
1264 for translation_function_name in config.translation_function_names:
1265 threads.append(threading.Thread(target = globals()[translation_function_name], args = (word,)))
1266 for x in threads:
1267 x.start()
1268 while any(thread.is_alive() for thread in threads):
1269 if config.queue_to_translate.qsize():
1270 to_new_word = True
1271 break
1272 time.sleep(config.update_time)
1273
1274 QApplication.restoreOverrideCursor()
1275
1276 if to_new_word:
1277 continue
1278
1279 if config.block_popup:
1280 continue
1281
1282 self.get_translations.emit(word, globalX, False)
1283
1284# drawing layer
1285# because can't calculate outline with precision
1286class drawing_layer(QLabel):
1287 def __init__(self, line, subs, parent=None):
1288 super().__init__(None)
1289 self.line = line
1290 self.setStyleSheet(config.style_subs)
1291 self.psuedo_line = 0
1292
1293 def draw_text_n_outline(self, painter: QPainter, x, y, outline_width, outline_blur, text):
1294 outline_color = QColor(config.outline_color)
1295
1296 font = self.font()
1297 text_path = QPainterPath()
1298 if config.R2L_from_B:
1299 text_path.addText(x, y, font, ' ' + r2l(text.strip()) + ' ')
1300 else:
1301 text_path.addText(x, y, font, text)
1302
1303 # draw blur
1304 range_width = range(outline_width, outline_width + outline_blur)
1305 # ~range_width = range(outline_width + outline_blur, outline_width, -1)
1306
1307 for width in range_width:
1308 if width == min(range_width):
1309 alpha = 200
1310 else:
1311 alpha = (max(range_width) - width) / max(range_width) * 200
1312 alpha = int(alpha)
1313
1314 blur_color = QColor(outline_color.red(), outline_color.green(), outline_color.blue(), alpha)
1315 blur_brush = QBrush(blur_color, Qt.SolidPattern)
1316 blur_pen = QPen(blur_brush, width, Qt.SolidLine, Qt.RoundCap, Qt.RoundJoin)
1317
1318 painter.setPen(blur_pen)
1319 painter.drawPath(text_path)
1320
1321 # draw outline
1322 outline_color = QColor(outline_color.red(), outline_color.green(), outline_color.blue(), 255)
1323 outline_brush = QBrush(outline_color, Qt.SolidPattern)
1324 outline_pen = QPen(outline_brush, outline_width, Qt.SolidLine, Qt.RoundCap, Qt.RoundJoin)
1325
1326 painter.setPen(outline_pen)
1327 painter.drawPath(text_path)
1328
1329 # draw text
1330 color = self.palette().color(QPalette.Text)
1331 painter.setPen(color)
1332 painter.drawText(x, y, text)
1333
1334 if config.outline_B:
1335 def paintEvent(self, evt: QPaintEvent):
1336 if not self.psuedo_line:
1337 self.psuedo_line = 1
1338 return
1339
1340 x = y = 0
1341 y += self.fontMetrics().ascent()
1342 painter = QPainter(self)
1343
1344 self.draw_text_n_outline(
1345 painter,
1346 x,
1347 y + config.outline_top_padding - config.outline_bottom_padding,
1348 config.outline_thickness,
1349 config.outline_blur,
1350 text = self.line
1351 )
1352
1353 def resizeEvent(self, *args):
1354 self.setFixedSize(
1355 self.fontMetrics().width(self.line),
1356 self.fontMetrics().height() +
1357 config.outline_bottom_padding +
1358 config.outline_top_padding
1359 )
1360
1361 def sizeHint(self):
1362 return QSize(
1363 self.fontMetrics().width(self.line),
1364 self.fontMetrics().height()
1365 )
1366
1367class events_class(QLabel):
1368 mouseHover = pyqtSignal(str, int, bool)
1369 redraw = pyqtSignal(bool, bool)
1370
1371 def __init__(self, word, subs, skip = False, parent=None):
1372 super().__init__(word)
1373 self.setMouseTracking(True)
1374 self.word = word
1375 self.subs = subs
1376 self.skip = skip
1377 self.highlight = False
1378
1379 self.setStyleSheet('background: transparent; color: transparent;')
1380
1381 def highligting(self, color, underline_width):
1382 color = QColor(color)
1383 color = QColor(color.red(), color.green(), color.blue(), 255)
1384 painter = QPainter(self)
1385
1386 if config.hover_underline:
1387 font_metrics = QFontMetrics(self.font())
1388 text_width = font_metrics.width(self.word)
1389 text_height = font_metrics.height()
1390
1391 brush = QBrush(color)
1392 pen = QPen(brush, underline_width, Qt.SolidLine, Qt.RoundCap)
1393 painter.setPen(pen)
1394 if not self.skip:
1395 painter.drawLine(0, text_height, text_width, text_height)
1396
1397 if config.hover_hightlight:
1398 x = y = 0
1399 y += self.fontMetrics().ascent()
1400
1401 painter.setPen(color)
1402 painter.drawText(x, y + config.outline_top_padding - config.outline_bottom_padding, self.word)
1403
1404 if config.outline_B:
1405 def paintEvent(self, evt: QPaintEvent):
1406 if self.highlight:
1407 self.highligting(config.hover_color, config.hover_underline_thickness)
1408
1409 #####################################################
1410
1411 def resizeEvent(self, event):
1412 text_height = self.fontMetrics().height()
1413 text_width = self.fontMetrics().width(self.word)
1414
1415 self.setFixedSize(text_width, text_height + config.outline_bottom_padding + config.outline_top_padding)
1416
1417 def enterEvent(self, event):
1418 if not self.skip:
1419 self.highlight = True
1420 self.repaint()
1421 config.queue_to_translate.put((self.word, event.globalX()))
1422
1423 @pyqtSlot()
1424 def leaveEvent(self, event):
1425 if not self.skip:
1426 self.highlight = False
1427 self.repaint()
1428
1429 config.scroll = {}
1430 self.mouseHover.emit('', 0, False)
1431 QApplication.restoreOverrideCursor()
1432
1433 def wheel_scrolling(self, event):
1434 if event.y() > 0:
1435 return 'ScrollUp'
1436 if event.y():
1437 return 'ScrollDown'
1438 if event.x() > 0:
1439 return 'ScrollLeft'
1440 if event.x():
1441 return 'ScrollRight'
1442
1443 def wheelEvent(self, event):
1444 for mouse_action in config.mouse_buttons:
1445 if self.wheel_scrolling(event.angleDelta()) == mouse_action[0]:
1446 if event.modifiers() == eval('Qt.%s' % mouse_action[1]):
1447 exec('self.%s(event)' % mouse_action[2])
1448
1449 def mousePressEvent(self, event):
1450 for mouse_action in config.mouse_buttons:
1451 if 'Scroll' not in mouse_action[0]:
1452 if event.button() == eval('Qt.%s' % mouse_action[0]):
1453 if event.modifiers() == eval('Qt.%s' % mouse_action[1]):
1454 exec('self.%s(event)' % mouse_action[2])
1455
1456 #####################################################
1457
1458 def f_show_in_browser(self, event):
1459 config.avoid_resuming = True
1460 os.system(config.show_in_browser.replace('${word}', self.word))
1461
1462 def f_auto_pause_options(self, event):
1463 if config.auto_pause == 2:
1464 config.auto_pause = 0
1465 else:
1466 config.auto_pause += 1
1467 mpv_message('auto_pause: %d' % config.auto_pause)
1468
1469 def f_listen(self, event):
1470 listen(self.word, config.listen_via)
1471
1472 @pyqtSlot()
1473 def f_subs_screen_edge_padding_decrease(self, event):
1474 config.subs_screen_edge_padding -= 5
1475 mpv_message('subs_screen_edge_padding: %d' % config.subs_screen_edge_padding)
1476 self.redraw.emit(False, True)
1477
1478 @pyqtSlot()
1479 def f_subs_screen_edge_padding_increase(self, event):
1480 config.subs_screen_edge_padding += 5
1481 mpv_message('subs_screen_edge_padding: %d' % config.subs_screen_edge_padding)
1482 self.redraw.emit(False, True)
1483
1484 @pyqtSlot()
1485 def f_font_size_decrease(self, event):
1486 config.style_subs = re.sub('font-size: (\d+)px;', lambda size: [ 'font-size: %dpx;' % ( int(size.group(1)) - 1 ), mpv_message('font: %s' % size.group(1)) ][0], config.style_subs, flags = re.I)
1487 self.redraw.emit(False, True)
1488
1489 @pyqtSlot()
1490 def f_font_size_increase(self, event):
1491 config.style_subs = re.sub('font-size: (\d+)px;', lambda size: [ 'font-size: %dpx;' % ( int(size.group(1)) + 1 ), mpv_message('font: %s' % size.group(1)) ][0], config.style_subs, flags = re.I)
1492 self.redraw.emit(False, True)
1493
1494 def f_auto_pause_min_words_decrease(self, event):
1495 config.auto_pause_min_words -= 1
1496 mpv_message('auto_pause_min_words: %d' % config.auto_pause_min_words)
1497
1498 def f_auto_pause_min_words_increase(self, event):
1499 config.auto_pause_min_words += 1
1500 mpv_message('auto_pause_min_words: %d' % config.auto_pause_min_words)
1501
1502 # f_deepl_translation -> f_translation_full_sentence
1503 @pyqtSlot()
1504 def f_deepl_translation(self, event):
1505 self.mouseHover.emit(self.subs , event.globalX(), True)
1506
1507 @pyqtSlot()
1508 def f_translation_full_sentence(self, event):
1509 self.mouseHover.emit(self.subs , event.globalX(), True)
1510
1511 def f_save_word_to_file(self, event):
1512 if ( os.path.isfile(os.path.expanduser(config.save_word_to_file_fname)) and self.word not in [ x.strip() for x in open(os.path.expanduser(config.save_word_to_file_fname)).readlines() ] ) or not os.path.isfile(os.path.expanduser(config.save_word_to_file_fname)):
1513 print(self.word, file = open(os.path.expanduser(config.save_word_to_file_fname), 'a'))
1514
1515 @pyqtSlot()
1516 def f_scroll_translations_up(self, event):
1517 if self.word in config.scroll and config.scroll[self.word] > 0:
1518 config.scroll[self.word] = config.scroll[self.word] - 1
1519 else:
1520 config.scroll[self.word] = 0
1521 self.mouseHover.emit(self.word, event.globalX(), False)
1522
1523 @pyqtSlot()
1524 def f_scroll_translations_down(self, event):
1525 if self.word in config.scroll:
1526 config.scroll[self.word] = config.scroll[self.word] + 1
1527 else:
1528 config.scroll[self.word] = 1
1529 self.mouseHover.emit(self.word, event.globalX(), False)
1530
1531class main_class(QWidget):
1532 def __init__(self):
1533 super().__init__()
1534
1535 self.thread_subs = QThread()
1536 self.obj = thread_subtitles()
1537 self.obj.update_subtitles.connect(self.render_subtitles)
1538 self.obj.moveToThread(self.thread_subs)
1539 self.thread_subs.started.connect(self.obj.main)
1540 self.thread_subs.start()
1541
1542 self.thread_translations = QThread()
1543 self.obj2 = thread_translations()
1544 self.obj2.get_translations.connect(self.render_popup)
1545 self.obj2.moveToThread(self.thread_translations)
1546 self.thread_translations.started.connect(self.obj2.main)
1547 self.thread_translations.start()
1548
1549 # start the forms
1550 self.subtitles_base()
1551 self.subtitles_base2()
1552 self.popup_base()
1553
1554 def clearLayout(self, layout):
1555 if layout == 'subs':
1556 layout = self.subtitles_vbox
1557 self.subtitles.hide()
1558 elif layout == 'subs2':
1559 layout = self.subtitles_vbox2
1560 self.subtitles2.hide()
1561 elif layout == 'popup':
1562 layout = self.popup_vbox
1563 self.popup.hide()
1564
1565 if layout is not None:
1566 while layout.count():
1567 item = layout.takeAt(0)
1568 widget = item.widget()
1569
1570 if widget is not None:
1571 widget.deleteLater()
1572 else:
1573 self.clearLayout(item.layout())
1574
1575 def subtitles_base(self):
1576 self.subtitles = QFrame()
1577 self.subtitles.setAttribute(Qt.WA_TranslucentBackground)
1578 self.subtitles.setWindowFlags(Qt.X11BypassWindowManagerHint | Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
1579 self.subtitles.setStyleSheet(config.style_subs)
1580
1581 self.subtitles_vbox = QVBoxLayout(self.subtitles)
1582 self.subtitles_vbox.setSpacing(config.subs_padding_between_lines)
1583 self.subtitles_vbox.setContentsMargins(0, 0, 0, 0)
1584
1585 def subtitles_base2(self):
1586 self.subtitles2 = QFrame()
1587 self.subtitles2.setAttribute(Qt.WA_TranslucentBackground)
1588 self.subtitles2.setWindowFlags(Qt.X11BypassWindowManagerHint | Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
1589 self.subtitles2.setStyleSheet(config.style_subs)
1590
1591 self.subtitles_vbox2 = QVBoxLayout(self.subtitles2)
1592 self.subtitles_vbox2.setSpacing(config.subs_padding_between_lines)
1593 self.subtitles_vbox2.setContentsMargins(0, 0, 0, 0)
1594
1595 if config.pause_during_translation_B:
1596 self.subtitles2.enterEvent = lambda event : [mpv_pause(), setattr(config, 'block_popup', False)][0]
1597 self.subtitles2.leaveEvent = lambda event : [mpv_resume(), setattr(config, 'block_popup', True)][0] if not config.avoid_resuming else [setattr(config, 'avoid_resuming', False), setattr(config, 'block_popup', True)][0]
1598
1599 def popup_base(self):
1600 self.popup = QFrame()
1601 self.popup.setAttribute(Qt.WA_TranslucentBackground)
1602 self.popup.setWindowFlags(Qt.X11BypassWindowManagerHint | Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
1603 self.popup.setStyleSheet(config.style_popup)
1604
1605 self.popup_inner = QFrame()
1606 outer_box = QVBoxLayout(self.popup)
1607 outer_box.addWidget(self.popup_inner)
1608
1609 self.popup_vbox = QVBoxLayout(self.popup_inner)
1610 self.popup_vbox.setSpacing(0)
1611
1612 def render_subtitles(self, hide = False, redraw = False):
1613 if hide or not len(subs):
1614 try:
1615 self.subtitles.hide()
1616 self.subtitles2.hide()
1617 finally:
1618 return
1619
1620 if redraw:
1621 self.subtitles.setStyleSheet(config.style_subs)
1622 self.subtitles2.setStyleSheet(config.style_subs)
1623 else:
1624 self.clearLayout('subs')
1625 self.clearLayout('subs2')
1626
1627 if hasattr(self, 'popup'):
1628 self.popup.hide()
1629
1630 # if subtitle consists of one overly long line - split into two
1631 if config.split_long_lines_B and len(subs.split('\n')) == 1 and len(subs.split(' ')) > config.split_long_lines_words_min - 1:
1632 subs2 = split_long_lines(subs)
1633 else:
1634 subs2 = subs
1635
1636 subs2 = re.sub(' +', ' ', subs2).strip()
1637
1638 ##############################
1639
1640 for line in subs2.split('\n'):
1641 line2 = ' %s ' % line.strip()
1642 ll = drawing_layer(line2, subs2)
1643
1644 hbox = QHBoxLayout()
1645 hbox.setContentsMargins(0, 0, 0, 0)
1646 hbox.setSpacing(0)
1647 hbox.addStretch()
1648 hbox.addWidget(ll)
1649 hbox.addStretch()
1650 self.subtitles_vbox.addLayout(hbox)
1651
1652 ####################################
1653
1654 hbox = QHBoxLayout()
1655 hbox.setContentsMargins(0, 0, 0, 0)
1656 hbox.setSpacing(0)
1657 hbox.addStretch()
1658
1659 if config.R2L_from_B:
1660 line2 = line2[::-1]
1661
1662 line2 += '\00'
1663 word = ''
1664 for smbl in line2:
1665 if smbl.isalpha():
1666 word += smbl
1667 else:
1668 if len(word):
1669 if config.R2L_from_B:
1670 word = word[::-1]
1671
1672 ll = events_class(word, subs2)
1673 ll.mouseHover.connect(self.render_popup)
1674 ll.redraw.connect(self.render_subtitles)
1675
1676 hbox.addWidget(ll)
1677 word = ''
1678
1679 if smbl != '\00':
1680 ll = events_class(smbl, subs2, skip = True)
1681 hbox.addWidget(ll)
1682
1683 hbox.addStretch()
1684 self.subtitles_vbox2.addLayout(hbox)
1685
1686 self.subtitles.adjustSize()
1687 self.subtitles2.adjustSize()
1688
1689 w = self.subtitles.geometry().width()
1690 h = self.subtitles.height = self.subtitles.geometry().height()
1691
1692 x = (config.screen_width/2) - (w/2)
1693
1694 if config.subs_top_placement_B:
1695 y = config.subs_screen_edge_padding
1696 else:
1697 y = config.screen_height - config.subs_screen_edge_padding - h
1698
1699 self.subtitles.setGeometry(int(x), int(y), 0, 0)
1700 self.subtitles.show()
1701
1702 self.subtitles2.setGeometry(int(x), int(y), 0, 0)
1703 self.subtitles2.show()
1704
1705 def render_popup(self, text, x_cursor_pos, is_line):
1706 if text == '':
1707 if hasattr(self, 'popup'):
1708 self.popup.hide()
1709 return
1710
1711 self.clearLayout('popup')
1712
1713 if is_line:
1714 QApplication.setOverrideCursor(Qt.WaitCursor)
1715
1716 line = globals()[config.translation_function_name_full_sentence](text)
1717 if config.translation_function_name_full_sentence == 'google':
1718 try:
1719 line = line[0][0][0].strip()
1720 except:
1721 line = 'Google translation failed.'
1722
1723 if config.split_long_lines_B and len(line.split('\n')) == 1 and len(line.split(' ')) > config.split_long_lines_words_min - 1:
1724 line = split_long_lines(line)
1725
1726 ll = QLabel(line)
1727 ll.setObjectName("first_line")
1728 self.popup_vbox.addWidget(ll)
1729 else:
1730 word = text
1731
1732 for translation_function_name_i, translation_function_name in enumerate(config.translation_function_names):
1733 pairs, word_descr = globals()[translation_function_name](word)
1734
1735 if not len(pairs):
1736 pairs = [['', '[Not found]']]
1737 #return
1738
1739 # ~pairs = [ [ str(i) + ' ' + pair[0], pair[1] ] for i, pair in enumerate(pairs) ]
1740
1741 if word in config.scroll:
1742 if len(pairs[config.scroll[word]:]) > config.number_of_translations:
1743 pairs = pairs[config.scroll[word]:]
1744 else:
1745 pairs = pairs[-config.number_of_translations:]
1746 if len(config.translation_function_names) == 1:
1747 config.scroll[word] -= 1
1748
1749 for i1, pair in enumerate(pairs):
1750 if i1 == config.number_of_translations:
1751 break
1752
1753 if config.split_long_lines_in_popup_B:
1754 pair[0] = split_long_lines(pair[0], max_symbols_per_line = config.split_long_lines_in_popup_symbols_min)
1755 pair[1] = split_long_lines(pair[1], max_symbols_per_line = config.split_long_lines_in_popup_symbols_min)
1756
1757 if pair[0] == '-':
1758 pair[0] = ''
1759 if pair[1] == '-':
1760 pair[1] = ''
1761
1762 # ~if config.R2L_from_B:
1763 # ~pair[0] = pair[0][::-1]
1764 # ~if config.R2L_to_B:
1765 # ~pair[1] = pair[1][::-1]
1766
1767 if pair[0] != '':
1768 # to emphasize the exact form of the word
1769 # to ignore case on input and match it on output
1770 chnks = re.split(word, pair[0], flags = re.I)
1771 exct_words = re.findall(word, pair[0], flags = re.I)
1772
1773 hbox = QHBoxLayout()
1774 hbox.setContentsMargins(0, 0, 0, 0)
1775
1776 for i2, chnk in enumerate(chnks):
1777 if len(chnk):
1778 ll = QLabel(chnk)
1779 ll.setObjectName("first_line")
1780 hbox.addWidget(ll)
1781 if i2 + 1 < len(chnks):
1782 ll = QLabel(exct_words[i2])
1783 ll.setObjectName("first_line_emphasize_word")
1784 hbox.addWidget(ll)
1785
1786 # filling the rest of the line with empty bg
1787 ll = QLabel()
1788 ll.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred)
1789 hbox.addWidget(ll)
1790
1791 self.popup_vbox.addLayout(hbox)
1792
1793 if pair[1] != '':
1794 ll = QLabel(pair[1])
1795 ll.setObjectName("second_line")
1796 self.popup_vbox.addWidget(ll)
1797
1798 # padding
1799 ll = QLabel()
1800 ll.setStyleSheet("font-size: 6px;")
1801 self.popup_vbox.addWidget(ll)
1802
1803 if len(word_descr[0]):
1804 ll = QLabel(word_descr[0])
1805 ll.setProperty("morphology", word_descr[1])
1806 ll.setAlignment(Qt.AlignRight)
1807 self.popup_vbox.addWidget(ll)
1808
1809 # delimiter between dictionaries
1810 if translation_function_name_i + 1 < len(config.translation_function_names):
1811 ll = QLabel()
1812 ll.setObjectName("delimiter")
1813 self.popup_vbox.addWidget(ll)
1814
1815 self.popup_inner.adjustSize()
1816 self.popup.adjustSize()
1817
1818 w = self.popup.geometry().width()
1819 h = self.popup.geometry().height()
1820
1821 if w > config.screen_width:
1822 w = config.screen_width - 20
1823
1824 if not is_line:
1825 if w < config.screen_width / 3:
1826 w = config.screen_width / 3
1827
1828 if x_cursor_pos == -1:
1829 x = (config.screen_width/2) - (w/2)
1830 else:
1831 x = x_cursor_pos - w/5
1832 if x+w > config.screen_width:
1833 x = config.screen_width - w
1834
1835 if config.subs_top_placement_B:
1836 y = self.subtitles.height + config.subs_screen_edge_padding
1837 else:
1838 y = config.screen_height - config.subs_screen_edge_padding - self.subtitles.height - h
1839
1840 self.popup.setGeometry(int(x), int(y), int(w), 0)
1841 self.popup.show()
1842
1843 QApplication.restoreOverrideCursor()
1844
1845if __name__ == "__main__":
1846 print('[py part] Starting interSubs ...')
1847
1848 try:
1849 os.mkdir('urls')
1850 except:
1851 pass
1852
1853 if 'tab_divided_dict' in config.translation_function_names:
1854 offdict = { x.split('\t')[0].strip().lower() : x.split('\t')[1].strip() for x in open(os.path.expanduser(config.tab_divided_dict_fname)).readlines() if '\t' in x }
1855
1856 mpv_socket = sys.argv[1]
1857 sub_file = sys.argv[2]
1858 # sub_file = '/tmp/mpv_sub_'
1859 # mpv_socket = '/tmp/mpv_socket_'
1860
1861 subs = ''
1862
1863 app = QApplication(sys.argv)
1864
1865 config.avoid_resuming = False
1866 config.block_popup = False
1867 config.scroll = {}
1868 config.queue_to_translate = queue.Queue()
1869 config.screen_width = app.primaryScreen().size().width()
1870 config.screen_height = app.primaryScreen().size().height()
1871
1872 form = main_class()
1873 app.exec_()