· 6 years ago · Nov 07, 2019, 08:15 PM
1from kivy.app import App
2from kivy.lang import Builder
3from kivy.uix.recycleview import RecycleView
4from kivy.uix.screenmanager import ScreenManager, Screen
5from kivy.uix.boxlayout import BoxLayout
6from kivy.uix.gridlayout import GridLayout
7from kivy.config import Config
8
9Config.set("graphics", 'resizable', 0)
10Config.set("graphics", 'height', 960)
11Config.set("graphics", 'width', 640)
12Builder.load_string("""
13
14<Button1@Button>:
15 background_color:255, 255, 255, 1
16 size:300,150
17 size_hint:None, None
18
19<Image>:
20 source:'фон.jpg'
21 allow_stretch: True
22
23<Label>:
24 color:0,0,0,1
25 font_size:30
26
27<BoxLayout>:
28 orientation:'vertical'
29 spacing:200
30 padding:170,200
31 background_color:1,1,1,1
32
33<GridLayout10@GridLayout>:
34 cols:2
35 spacing:10,40
36 padding:15
37
38<MainScreen>:
39 name:"Menu"
40 Image
41 BoxLayout:
42 Button1:
43 on_press:root.manager.current="Subjects"
44 text:"Subjects"
45 Button1:
46 text:"Special"
47 on_press:root.manager.current="Special"
48
49
50<SubjectsScreen>:
51 name:"Subjects"
52 Image
53 BoxLayout:
54 Button1:
55 text:"10 class"
56 on_press:root.manager.current="SubjectsScreen10"
57 Button1:
58 text:"11 class"
59 on_press:root.manager.current="SubjectsScreen11"
60
61<SubjectsScreen10>:
62 name:"SubjectsScreen10"
63 Image
64 GridLayout10:
65 Button1:
66 text:"Геометрия"
67 Button1:
68 text:"Алгебра"
69 Button1:
70 text:"Химия"
71 Button1:
72 text:"Физика"
73 Button1:
74 text:"Русская литература"
75 Button1:
76 text:"Русский язык"
77 Button1:
78 text:"География"
79 Button1:
80 text:"Биология"
81 Button1:
82 text:"Белорусский язык"
83
84<SubjectsScreen11>:
85 name:"SubjectsScreen11"
86 Image
87 GridLayout10:
88 Button1:
89 text:"Алгебра"
90 Button1:
91 text:"Геометрия"
92 Button1:
93 text:"Физика"
94 Button1:
95 text:"Химия"
96 Button1:
97 text:"Русская литература"
98 Button1:
99 text:"Русский язык"
100 Button1:
101 text:"География"
102 Button1:
103 text:"Биология"
104 Button1:
105 text:"Белорусский язык"
106 Button1:
107 text:"Астрономия"
108
109<SpecialSubjectsScreen>:
110 name:"Special"
111 Image
112 AnchorLayout:
113 Button1:
114 text:"No material(Back)"
115 on_press: root.manager.current='Menu'
116""")
117
118
119class MainScreen(Screen):
120 pass
121
122
123class SpecialSubjectsScreen(Screen):
124 pass
125
126
127class SubjectsScreen(Screen):
128 pass
129
130
131class SubjectsScreen10(Screen):
132 pass
133
134
135class SubjectsScreen11(Screen):
136 pass
137
138
139sm = ScreenManager()
140sm.add_widget(MainScreen(name='Menu'))
141sm.add_widget(SpecialSubjectsScreen(name='Special'))
142sm.add_widget(SubjectsScreen(name="Subjects"))
143sm.add_widget(SubjectsScreen10(name="SubjectsScreen10"))
144sm.add_widget(SubjectsScreen11(name="SubjectsScreen11"))
145
146
147class LibraryApp(App):
148 def build(self):
149 return sm
150
151
152if __name__ == '__main__':
153 LibraryApp().run()
154
155
156
157
158
159from os.path import exists
160from tempfile import mkdtemp, mkstemp
161from shutil import rmtree
162from binascii import b2a_hex
163from os import write, close
164from threading import Thread
165
166from pdfminer.pdfpage import PDFPage
167from pdfminer.pdfparser import PDFParser
168from pdfminer.converter import PDFPageAggregator
169from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines
170from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
171from pdfminer.layout import (
172 LAParams, LTTextBox, LTTextLine, LTFigure, LTImage, LTChar, LTCurve,
173 LTLine, LTRect,
174)
175
176from kivy.lang import Builder
177from kivy.clock import Clock
178
179from kivy.graphics import Mesh, Color
180from kivy.graphics.tesselator import Tesselator
181
182from kivy.uix.widget import Widget
183from kivy.uix.recycleview import RecycleView
184from kivy.uix.label import Label
185from kivy.uix.image import Image
186from kivy.uix.relativelayout import RelativeLayout
187from kivy.uix.boxlayout import BoxLayout
188
189from kivy.properties import (
190 StringProperty, ListProperty, NumericProperty, AliasProperty,
191 DictProperty, ObjectProperty, BooleanProperty, ColorProperty,
192)
193
194Builder.load_string('''
195#:import RGBA kivy.utils.rgba
196
197<PDFDocumentWidget>:
198 viewclass: 'PDFPageWidget'
199 key_size: 'size'
200 # async load is buggy at the moment
201 # async_load: True
202
203 RecycleGridLayout:
204 spacing: 5
205 cols: root.cols
206 rows: root.rows
207 size_hint: None, None
208 size: self.minimum_size
209 default_size_hint: None, None
210
211<PDFPageWidget>:
212 size_hint: None, None
213
214 canvas.before:
215 Color:
216 rgba: RGBA('FFFFFF')
217 Rectangle:
218 size: self.size
219
220<PDFLabelWidget,PDFImageWidget>:
221 size_hint: None, None
222
223<PDFImageWidget>:
224 pos: self.bbox[:2]
225 size: self.bbox[2] - self.x, self.bbox[3] - self.y
226
227<PDFLabelWidget>:
228 text_size: self.width, None
229 height: self.texture_size[1]
230 color: RGBA('000000')
231 font_size: 8
232
233<PDFCurveWidget>:
234''')
235
236
237class PDFDocumentWidget(RecycleView):
238 source = StringProperty()
239 password = StringProperty()
240 cols = NumericProperty(None)
241 rows = NumericProperty(None)
242 _toc = ListProperty()
243 async_load = BooleanProperty(False)
244
245 def __init__(self, **kwargs):
246 super(PDFDocumentWidget, self).__init__(**kwargs)
247 self._fp = None
248 self._document = None
249 self._tmpdir = None
250 self.bind(source=self.load)
251 if self.source:
252 self.load()
253
254 def load(self, *args):
255 if self._fp:
256 # close the previous pdf file
257 self._fp.close()
258
259 pdf_doc = self.source
260 data = []
261 if not pdf_doc or not exists(pdf_doc):
262 self.pages = []
263 self._doc = []
264 self._document = None
265 if self._tmpdir:
266 rmtree(self._tmpdir)
267 self._tmpdir = None
268
269 try:
270 # open the pdf file
271 self._fp = fp = open(pdf_doc, 'rb')
272 # create a parser object associated with the file object
273 parser = PDFParser(fp)
274 # create a PDFDocument object that stores the document structure
275 doc = PDFDocument(parser)
276 # connect the parser and document objects
277 parser.set_document(doc)
278 # supply the password for initialization
279 # doc.initialize(self.password)
280
281 # if doc.is_extractable:
282 # apply the function and return the result
283 self._document = doc
284 self._parse_toc()
285 self._create_tmpdir()
286 self._parse_pages()
287 except IOError as e:
288 # the file doesn't exist or similar problem
289 print(e)
290
291 def _create_tmpdir(self):
292 if not self._tmpdir:
293 self._tmpdir = mkdtemp()
294 return self._tmpdir
295
296 def _parse_toc(self):
297 """With an open PDFDocument object, get the table of contents (toc) data
298 [this is a higher-order function to be passed to with_pdf()]"""
299 toc = []
300 doc = self._document
301 try:
302 outlines = doc.get_outlines()
303 for (level, title, dest, a, se) in outlines:
304 toc.append((level, title))
305 except:
306 pass
307 finally:
308 self._toc = toc
309
310 def _parse_pages(self):
311 doc = self._document
312 if not doc:
313 self.data = []
314 return
315
316 data = []
317
318 rsrcmgr = PDFResourceManager()
319 laparams = LAParams()
320 self.device = device = PDFPageAggregator(rsrcmgr, laparams=laparams)
321 self.interpreter = PDFPageInterpreter(rsrcmgr, device)
322
323 for i, page in enumerate(PDFPage.create_pages(doc)):
324 p = {
325 'manager': self,
326 'page': page,
327 'size': page.attrs.get('MediaBox', [0, 0, 0, 0])[2:],
328 }
329 data.append(p)
330 self.data = data
331
332
333class PDFImageWidget(Image):
334 bbox = ListProperty([0, 0, 100, 100])
335
336
337class PDFLabelWidget(Label):
338 bbox = ListProperty([0, 0, 100, 100])
339
340
341class PDFCurveWidget(Widget):
342 points = ListProperty()
343 line_width = NumericProperty()
344 stroke = BooleanProperty(False)
345 fill = BooleanProperty(False)
346 even_odd = BooleanProperty()
347 color = ColorProperty()
348 fill_color = ColorProperty()
349
350 def __init__(self, **kwargs):
351 super(PDFCurveWidget, self).__init__(**kwargs)
352 build = Clock.create_trigger(self.build, 0)
353
354 self.bind(
355 points=build,
356 line_width=build,
357 stroke=build,
358 fill=build,
359 even_odd=build,
360 color=build,
361 fill_color=build
362 )
363
364 def build(self, *args):
365 self.canvas.clear()
366 if not self.points:
367 return
368
369 with self.canvas:
370 if self.fill:
371 Color(rgba=self.fill_color)
372 t = Tesselator()
373 t.add_contour(self.points)
374 if tess.tesselate:
375 for vertices, indices in tess.meshes:
376 Mesh(
377 vertices=vertices,
378 indices=indices,
379 mode='triangle fan'
380 )
381 else:
382 print("mesh didn't tesselate!")
383
384 if self.stroke:
385 Color(rgba=self.color)
386 Line(
387 points=self.points,
388 width=self.line_width
389 )
390
391
392class PDFPageWidget(RelativeLayout):
393 labels = DictProperty()
394 attributes = DictProperty()
395 manager = ObjectProperty()
396 page = ObjectProperty()
397 items = ListProperty()
398
399 def on_page(self, *args):
400 if self.manager.async_load:
401 Thread(target=self._load_page).start()
402 else:
403 self._load_page()
404
405 def _load_page(self):
406 self.manager.interpreter.process_page(self.page)
407 self.items = self.manager.device.get_result()
408
409 def on_items(self, *args):
410 self.clear_widgets()
411 self._render_content(self.items)
412
413 def _render_content(self, lt_objs):
414 """Iterate through the list of LT* objects and capture the text
415 or image data contained in each
416 """
417 for lt_obj in lt_objs:
418 print(lt_obj)
419 if isinstance(lt_obj, LTChar):
420 self.add_text(
421 text=lt_obj.get_text(),
422 box_pos=(lt_obj.x0, lt_obj.y0),
423 box_size=(lt_obj.width, lt_obj.height),
424 # font_size=lt_obj.fontsize,
425 # font_name=lt_obj.fontname,
426 )
427
428 elif isinstance(lt_obj, (LTTextBox, LTTextLine)):
429 # text, so arrange is logically based on its column width
430 # this way is very limited style wise, and doesn't allow
431 # support for font, color, style, etc management, as
432 # pdfminer doesn't provide these information at text box
433 # level, by using the following nested loop, it's
434 # possible to have font family info, but for individual
435 # character, which is impractical to create direct
436 # labels for.
437 # for obj in lt_obj:
438 # print(obj)
439 # for o in obj:
440 # print(o)
441
442 self.add_text(
443 text=lt_obj.get_text(),
444 box_pos=(lt_obj.x0, lt_obj.y0),
445 box_size=(lt_obj.width, lt_obj.height),
446 )
447
448 elif isinstance(lt_obj, LTImage):
449 saved_file = self.save_image(lt_obj)
450 if saved_file:
451 self.add_widget(
452 PDFImageWidget(
453 source=saved_file,
454 bbox=lt_obj.bbox
455 )
456 )
457
458 elif isinstance(lt_obj, LTFigure):
459 self._render_content(lt_obj)
460
461 # all of these are actually LTCurves, but all types here for
462 # clarity
463 elif isinstance(lt_obj, (LTLine, LTRect, LTCurve)):
464 self.add_widget(
465 PDFCurveWidget(
466 points=lt_obj.pts or [],
467 line_width=lt_obj.linewidth or 1.0,
468 stroke=lt_obj.stroke,
469 fill=lt_obj.fill,
470 even_odd=lt_obj.evenodd,
471 # colors seem to be indices, to some dict i
472 # can't find in what pdfminer exposes
473 color='#FFFFFFFF', # lt_obj.stroking_color or
474 fill_color='#00000000' # lt_obj.non_stroking_color or
475 )
476 )
477
478 def save_image(self, lt_image):
479 """Try to save the image data from this LTImage object, and
480 return the file name, if successful
481 """
482 if lt_image.stream:
483 file_stream = lt_image.stream.get_rawdata()
484 if file_stream:
485 file_ext = self.determine_image_type(file_stream[0:4])
486 if file_ext:
487 fd, fn = mkstemp(dir=self.manager._tmpdir, suffix='.{}'.format(file_ext))
488 write(fd, file_stream)
489 close(fd)
490 return fn
491
492 @staticmethod
493 def determine_image_type(stream_first_4_bytes):
494 """Find out the image file type based on the magic number comparison of the first 4 (or 2) bytes"""
495 file_type = None
496 bytes_as_hex = b2a_hex(stream_first_4_bytes)
497 if bytes_as_hex.startswith(b'ffd8'):
498 file_type = '.jpeg'
499 elif bytes_as_hex == b'89504e47':
500 file_type = '.png'
501 elif bytes_as_hex == b'47494638':
502 file_type = '.gif'
503 elif bytes_as_hex.startswith(b'424d'):
504 file_type = '.bmp'
505 return file_type
506
507 def add_text(self, text, box_pos, box_size, **kwargs):
508 label = self.labels.get((box_pos, box_pos))
509 if not label:
510 label = PDFLabelWidget(text=text, pos=box_pos, size=box_size, **kwargs)
511 self.labels[(box_pos, box_size)] = label
512 self.add_widget(label)
513 else:
514 label.text += text
515
516 def add_image(self, lt_image):
517 source = self.save_image(lt_image)
518 if source:
519 image = PDFImageWidget(
520 source=source,
521 pos=(lt_image.x0, lt_image.y0),
522 size=(lt_image.widt, lt_image.height)
523 )
524 self.add_widget(image)
525 self.images.append(image)
526
527
528if __name__ == '__main__':
529 from sys import argv
530 from kivy.base import runTouchApp
531 from kivy.uix.scrollview import ScrollView
532
533 if len(argv) > 1:
534 fn = argv[1]
535 else:
536 fn = 'Дудников, А.В Русский язык.pdf'
537 root = PDFDocumentWidget(source=fn, cols=1)
538 runTouchApp(root)