· 6 years ago · Nov 07, 2019, 09:24 PM
1from kivy.app import App
2from kivy.lang import Builder
3from kivy.uix.recycleview import RecycleView
4from kivy.uix.screenmanager import ScreenManager, Screen
5from kivy.uix.boxlayout import BoxLayout
6from kivy.uix.gridlayout import GridLayout
7from kivy.config import Config
8
9Config.set("graphics", 'resizable', 0)
10Config.set("graphics", 'height', 960)
11Config.set("graphics", 'width', 640)
12Builder.load_string("""
13
14<Button1@Button>:
15 background_color:255, 255, 255, 1
16 size:300,150
17 size_hint:None, None
18
19<Image>:
20 source:'фон.jpg'
21 allow_stretch: True
22
23<Label>:
24 color:0,0,0,1
25 font_size:30
26
27<BoxLayout>:
28 orientation:'vertical'
29 spacing:200
30 padding:170,200
31 background_color:1,1,1,1
32
33<GridLayout10@GridLayout>:
34 cols:2
35 spacing:10,40
36 padding:15
37
38<MainScreen>:
39 name:"Menu"
40 Image
41 BoxLayout:
42 Button1:
43 on_press:root.manager.current="Subjects"
44 text:"Subjects"
45 Button1:
46 text:"Special"
47 on_press:root.manager.current="Special"
48
49
50<SubjectsScreen>:
51 name:"Subjects"
52 Image
53 BoxLayout:
54 Button1:
55 text:"10 class"
56 on_press:root.manager.current="SubjectsScreen10"
57 Button1:
58 text:"11 class"
59 on_press:root.manager.current="SubjectsScreen11"
60
61<SubjectsScreen10>:
62 name:"SubjectsScreen10"
63 Image
64 GridLayout10:
65 Button1:
66 text:"Геометрия"
67 Button1:
68 text:"Алгебра"
69 Button1:
70 text:"Химия"
71 Button1:
72 text:"Физика"
73 Button1:
74 text:"Русская литература"
75 Button1:
76 text:"Русский язык"
77 on_press:russ_yaz
78 Button1:
79 text:"География"
80 Button1:
81 text:"Биология"
82 Button1:
83 text:"Белорусский язык"
84
85<SubjectsScreen11>:
86 name:"SubjectsScreen11"
87 Image
88 GridLayout10:
89 Button1:
90 text:"Алгебра"
91 Button1:
92 text:"Геометрия"
93 Button1:
94 text:"Физика"
95 Button1:
96 text:"Химия"
97 Button1:
98 text:"Русская литература"
99 Button1:
100 text:"Русский язык"
101 on_press:self.russ_yaz
102
103 Button1:
104 text:"География"
105 Button1:
106 text:"Биология"
107 Button1:
108 text:"Белорусский язык"
109 Button1:
110 text:"Астрономия"
111
112<SpecialSubjectsScreen>:
113 name:"Special"
114 Image
115 AnchorLayout:
116 Button1:
117 text:"No material(Back)"
118 on_press: root.manager.current='Menu'
119""")
120
121
122class MainScreen(Screen):
123 pass
124
125
126class SpecialSubjectsScreen(Screen):
127 pass
128
129
130class SubjectsScreen(Screen):
131 pass
132
133
134class SubjectsScreen10(Screen):
135 pass
136
137
138class SubjectsScreen11(Screen):
139 pass
140
141
142sm = ScreenManager()
143sm.add_widget(MainScreen(name='Menu'))
144sm.add_widget(SpecialSubjectsScreen(name='Special'))
145sm.add_widget(SubjectsScreen(name="Subjects"))
146sm.add_widget(SubjectsScreen10(name="SubjectsScreen10"))
147sm.add_widget(SubjectsScreen11(name="SubjectsScreen11"))
148
149
150class LibraryApp(App):
151
152 def build(self):
153 return sm
154
155 def russ_yaz(self,instance):
156 fn = 'Дудников, А.В Русский язык.pdf'
157 root = PDFDocumentWidget(source=fn, cols=1)
158 runTouchApp(root)
159
160if __name__ == '__main__':
161 LibraryApp().run()
162
163
164
165
166
167from os.path import exists
168from tempfile import mkdtemp, mkstemp
169from shutil import rmtree
170from binascii import b2a_hex
171from os import write, close
172from threading import Thread
173
174from pdfminer.pdfpage import PDFPage
175from pdfminer.pdfparser import PDFParser
176from pdfminer.converter import PDFPageAggregator
177from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines
178from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
179from pdfminer.layout import (
180 LAParams, LTTextBox, LTTextLine, LTFigure, LTImage, LTChar, LTCurve,
181 LTLine, LTRect,
182)
183
184from kivy.lang import Builder
185from kivy.clock import Clock
186
187from kivy.graphics import Mesh, Color
188from kivy.graphics.tesselator import Tesselator
189
190from kivy.uix.widget import Widget
191from kivy.uix.recycleview import RecycleView
192from kivy.uix.label import Label
193from kivy.uix.image import Image
194from kivy.uix.relativelayout import RelativeLayout
195from kivy.uix.boxlayout import BoxLayout
196
197from kivy.properties import (
198 StringProperty, ListProperty, NumericProperty, AliasProperty,
199 DictProperty, ObjectProperty, BooleanProperty, ColorProperty,
200)
201
202Builder.load_string('''
203#:import RGBA kivy.utils.rgba
204
205<PDFDocumentWidget>:
206 viewclass: 'PDFPageWidget'
207 key_size: 'size'
208 # async load is buggy at the moment
209 # async_load: True
210
211 RecycleGridLayout:
212 spacing: 5
213 cols: root.cols
214 rows: root.rows
215 size_hint: None, None
216 size: self.minimum_size
217 default_size_hint: None, None
218
219<PDFPageWidget>:
220 size_hint: None, None
221
222 canvas.before:
223 Color:
224 rgba: RGBA('FFFFFF')
225 Rectangle:
226 size: self.size
227
228<PDFLabelWidget,PDFImageWidget>:
229 size_hint: None, None
230
231<PDFImageWidget>:
232 pos: self.bbox[:2]
233 size: self.bbox[2] - self.x, self.bbox[3] - self.y
234
235<PDFLabelWidget>:
236 text_size: self.width, None
237 height: self.texture_size[1]
238 color: RGBA('000000')
239 font_size: 8
240
241<PDFCurveWidget>:
242''')
243
244
245class PDFDocumentWidget(RecycleView):
246 source = StringProperty()
247 password = StringProperty()
248 cols = NumericProperty(None)
249 rows = NumericProperty(None)
250 _toc = ListProperty()
251 async_load = BooleanProperty(False)
252
253 def __init__(self, **kwargs):
254 super(PDFDocumentWidget, self).__init__(**kwargs)
255 self._fp = None
256 self._document = None
257 self._tmpdir = None
258 self.bind(source=self.load)
259 if self.source:
260 self.load()
261
262 def load(self, *args):
263 if self._fp:
264 # close the previous pdf file
265 self._fp.close()
266
267 pdf_doc = self.source
268 data = []
269 if not pdf_doc or not exists(pdf_doc):
270 self.pages = []
271 self._doc = []
272 self._document = None
273 if self._tmpdir:
274 rmtree(self._tmpdir)
275 self._tmpdir = None
276
277 try:
278 # open the pdf file
279 self._fp = fp = open(pdf_doc, 'rb')
280 # create a parser object associated with the file object
281 parser = PDFParser(fp)
282 # create a PDFDocument object that stores the document structure
283 doc = PDFDocument(parser)
284 # connect the parser and document objects
285 parser.set_document(doc)
286 # supply the password for initialization
287 # doc.initialize(self.password)
288
289 # if doc.is_extractable:
290 # apply the function and return the result
291 self._document = doc
292 self._parse_toc()
293 self._create_tmpdir()
294 self._parse_pages()
295 except IOError as e:
296 # the file doesn't exist or similar problem
297 print(e)
298
299 def _create_tmpdir(self):
300 if not self._tmpdir:
301 self._tmpdir = mkdtemp()
302 return self._tmpdir
303
304 def _parse_toc(self):
305 """With an open PDFDocument object, get the table of contents (toc) data
306 [this is a higher-order function to be passed to with_pdf()]"""
307 toc = []
308 doc = self._document
309 try:
310 outlines = doc.get_outlines()
311 for (level, title, dest, a, se) in outlines:
312 toc.append((level, title))
313 except:
314 pass
315 finally:
316 self._toc = toc
317
318 def _parse_pages(self):
319 doc = self._document
320 if not doc:
321 self.data = []
322 return
323
324 data = []
325
326 rsrcmgr = PDFResourceManager()
327 laparams = LAParams()
328 self.device = device = PDFPageAggregator(rsrcmgr, laparams=laparams)
329 self.interpreter = PDFPageInterpreter(rsrcmgr, device)
330
331 for i, page in enumerate(PDFPage.create_pages(doc)):
332 p = {
333 'manager': self,
334 'page': page,
335 'size': page.attrs.get('MediaBox', [0, 0, 0, 0])[2:],
336 }
337 data.append(p)
338 self.data = data
339
340
341class PDFImageWidget(Image):
342 bbox = ListProperty([0, 0, 100, 100])
343
344
345class PDFLabelWidget(Label):
346 bbox = ListProperty([0, 0, 100, 100])
347
348
349class PDFCurveWidget(Widget):
350 points = ListProperty()
351 line_width = NumericProperty()
352 stroke = BooleanProperty(False)
353 fill = BooleanProperty(False)
354 even_odd = BooleanProperty()
355 color = ColorProperty()
356 fill_color = ColorProperty()
357
358 def __init__(self, **kwargs):
359 super(PDFCurveWidget, self).__init__(**kwargs)
360 build = Clock.create_trigger(self.build, 0)
361
362 self.bind(
363 points=build,
364 line_width=build,
365 stroke=build,
366 fill=build,
367 even_odd=build,
368 color=build,
369 fill_color=build
370 )
371
372 def build(self, *args):
373 self.canvas.clear()
374 if not self.points:
375 return
376
377 with self.canvas:
378 if self.fill:
379 Color(rgba=self.fill_color)
380 t = Tesselator()
381 t.add_contour(self.points)
382 if tess.tesselate:
383 for vertices, indices in tess.meshes:
384 Mesh(
385 vertices=vertices,
386 indices=indices,
387 mode='triangle fan'
388 )
389 else:
390 print("mesh didn't tesselate!")
391
392 if self.stroke:
393 Color(rgba=self.color)
394 Line(
395 points=self.points,
396 width=self.line_width
397 )
398
399
400class PDFPageWidget(RelativeLayout):
401 labels = DictProperty()
402 attributes = DictProperty()
403 manager = ObjectProperty()
404 page = ObjectProperty()
405 items = ListProperty()
406
407 def on_page(self, *args):
408 if self.manager.async_load:
409 Thread(target=self._load_page).start()
410 else:
411 self._load_page()
412
413 def _load_page(self):
414 self.manager.interpreter.process_page(self.page)
415 self.items = self.manager.device.get_result()
416
417 def on_items(self, *args):
418 self.clear_widgets()
419 self._render_content(self.items)
420
421 def _render_content(self, lt_objs):
422 """Iterate through the list of LT* objects and capture the text
423 or image data contained in each
424 """
425 for lt_obj in lt_objs:
426 print(lt_obj)
427 if isinstance(lt_obj, LTChar):
428 self.add_text(
429 text=lt_obj.get_text(),
430 box_pos=(lt_obj.x0, lt_obj.y0),
431 box_size=(lt_obj.width, lt_obj.height),
432 # font_size=lt_obj.fontsize,
433 # font_name=lt_obj.fontname,
434 )
435
436 elif isinstance(lt_obj, (LTTextBox, LTTextLine)):
437 # text, so arrange is logically based on its column width
438 # this way is very limited style wise, and doesn't allow
439 # support for font, color, style, etc management, as
440 # pdfminer doesn't provide these information at text box
441 # level, by using the following nested loop, it's
442 # possible to have font family info, but for individual
443 # character, which is impractical to create direct
444 # labels for.
445 # for obj in lt_obj:
446 # print(obj)
447 # for o in obj:
448 # print(o)
449
450 self.add_text(
451 text=lt_obj.get_text(),
452 box_pos=(lt_obj.x0, lt_obj.y0),
453 box_size=(lt_obj.width, lt_obj.height),
454 )
455
456 elif isinstance(lt_obj, LTImage):
457 saved_file = self.save_image(lt_obj)
458 if saved_file:
459 self.add_widget(
460 PDFImageWidget(
461 source=saved_file,
462 bbox=lt_obj.bbox
463 )
464 )
465
466 elif isinstance(lt_obj, LTFigure):
467 self._render_content(lt_obj)
468
469 # all of these are actually LTCurves, but all types here for
470 # clarity
471 elif isinstance(lt_obj, (LTLine, LTRect, LTCurve)):
472 self.add_widget(
473 PDFCurveWidget(
474 points=lt_obj.pts or [],
475 line_width=lt_obj.linewidth or 1.0,
476 stroke=lt_obj.stroke,
477 fill=lt_obj.fill,
478 even_odd=lt_obj.evenodd,
479 # colors seem to be indices, to some dict i
480 # can't find in what pdfminer exposes
481 color='#FFFFFFFF', # lt_obj.stroking_color or
482 fill_color='#00000000' # lt_obj.non_stroking_color or
483 )
484 )
485
486 def save_image(self, lt_image):
487 """Try to save the image data from this LTImage object, and
488 return the file name, if successful
489 """
490 if lt_image.stream:
491 file_stream = lt_image.stream.get_rawdata()
492 if file_stream:
493 file_ext = self.determine_image_type(file_stream[0:4])
494 if file_ext:
495 fd, fn = mkstemp(dir=self.manager._tmpdir, suffix='.{}'.format(file_ext))
496 write(fd, file_stream)
497 close(fd)
498 return fn
499
500 @staticmethod
501 def determine_image_type(stream_first_4_bytes):
502 """Find out the image file type based on the magic number comparison of the first 4 (or 2) bytes"""
503 file_type = None
504 bytes_as_hex = b2a_hex(stream_first_4_bytes)
505 if bytes_as_hex.startswith(b'ffd8'):
506 file_type = '.jpeg'
507 elif bytes_as_hex == b'89504e47':
508 file_type = '.png'
509 elif bytes_as_hex == b'47494638':
510 file_type = '.gif'
511 elif bytes_as_hex.startswith(b'424d'):
512 file_type = '.bmp'
513 return file_type
514
515 def add_text(self, text, box_pos, box_size, **kwargs):
516 label = self.labels.get((box_pos, box_pos))
517 if not label:
518 label = PDFLabelWidget(text=text, pos=box_pos, size=box_size, **kwargs)
519 self.labels[(box_pos, box_size)] = label
520 self.add_widget(label)
521 else:
522 label.text += text
523
524 def add_image(self, lt_image):
525 source = self.save_image(lt_image)
526 if source:
527 image = PDFImageWidget(
528 source=source,
529 pos=(lt_image.x0, lt_image.y0),
530 size=(lt_image.widt, lt_image.height)
531 )
532 self.add_widget(image)
533 self.images.append(image)
534
535
536if __name__ == '__main__':
537 from sys import argv
538 from kivy.base import runTouchApp
539 from kivy.uix.scrollview import ScrollView
540
541 if len(argv) > 1:
542 fn = argv[1]
543 else:
544 fn = 'Дудников, А.В Русский язык.pdf'
545 root = PDFDocumentWidget(source=fn, cols=1)
546 runTouchApp(root)