· 6 years ago · Jan 10, 2020, 03:58 AM
1# encoding: utf-8
2# module tesserocr._tesserocr
3# from C:\Users\slooh\PycharmProjects\ocr\venv\lib\site-packages\tesserocr\_tesserocr.cp37-win_amd64.pyd
4# by generator 1.147
5"""
6Python wrapper around the Tesseract-OCR C++ API
7
8This module provides a wrapper class :class:`PyTessBaseAPI` to call
9Tesseract API methods. See :class:`PyTessBaseAPI` for details.
10
11In addition, helper functions are provided for ocr operations:
12
13>>> text = image_to_text(Image.open('./image.jpg').convert('L'), lang='eng')
14>>> text = file_to_text('./image.jpg', psm=PSM.AUTO)
15>>> print tesseract_version()
16tesseract 3.04.00
17 leptonica-1.72
18 libjpeg 8d (libjpeg-turbo 1.3.0) : libpng 1.2.51 : libtiff 4.0.3 : zlib 1.2.8
19>>> get_languages()
20('/usr/share/tesseract-ocr/tessdata/',
21 ['eng', 'osd', 'equ'])
22"""
23
24# imports
25import builtins as __builtins__ # <module 'builtins' (built-in)>
26import os as os # C:\Users\slooh\Anaconda3\lib\os.py
27import PIL.Image as Image # C:\Users\slooh\Anaconda3\lib\site-packages\PIL\Image.py
28from _io import BytesIO
29
30
31# Variables with simple values
32
33_init_lang = 'eng'
34
35__version__ = '2.4.0'
36
37# functions
38
39def abspath(path): # reliably restored by inspect
40 """ Return the absolute version of a path. """
41 pass
42
43def file_to_text(*args, **kwargs): # real signature unknown
44 """
45 Extract OCR text from an image file.
46
47 Args:
48 filename (str): Image file relative or absolute path.
49
50 Kwargs:
51 lang (str): An ISO 639-3 language string. Defaults to 'eng'
52 psm (int): Page segmentation mode. Defaults to :attr:`PSM.AUTO`
53 See :class:`PSM` for all available psm options.
54 path (str): The name of the parent directory of tessdata.
55 Must end in /.
56 oem (int): OCR engine mode. Defaults to :attr:`OEM.DEFAULT`.
57 see :class:`OEM` for all avaialble oem options.
58
59 Returns:
60 unicode: The text extracted from the image.
61
62 Raises:
63 :exc:`RuntimeError`: When image fails to be loaded or recognition fails.
64 """
65 pass
66
67def get_languages(*args, **kwargs): # real signature unknown
68 """
69 Return available languages in the given path.
70
71 Args:
72 path (str): The name of the parent directory of tessdata.
73 Must end in /. Default tesseract-ocr datapath is used
74 if no path is provided.
75
76 Retruns
77 tuple: Tuple with two elements:
78 - path (str): tessdata parent directory path
79 - languages (list): list of available languages as ISO 639-3 strings.
80 """
81 pass
82
83def image_to_text(*args, **kwargs): # real signature unknown
84 """
85 Recognize OCR text from an image object.
86
87 Args:
88 image (:class:`PIL.Image`): image to be processed.
89
90 Kwargs:
91 lang (str): An ISO 639-3 language string. Defaults to 'eng'.
92 psm (int): Page segmentation mode. Defaults to :attr:`PSM.AUTO`.
93 See :class:`PSM` for all available psm options.
94 path (str): The name of the parent directory of tessdata.
95 Must end in /.
96 oem (int): OCR engine mode. Defaults to :attr:`OEM.DEFAULT`.
97 see :class:`OEM` for all avaialble oem options.
98
99 Returns:
100 unicode: The text extracted from the image.
101
102 Raises:
103 :exc:`RuntimeError`: When image fails to be loaded or recognition fails.
104 """
105 pass
106
107def iterate_choices(*args, **kwargs): # real signature unknown
108 """ Helper generator function to iterate :class:`PyChoiceIterator`. """
109 pass
110
111def iterate_level(*args, **kwargs): # real signature unknown
112 """
113 Helper generator function to iterate a :class:`PyPageIterator`
114 level.
115
116 Args:
117 iterator: Instance of :class:`PyPageIterator`
118 level: Page iterator level :class:`RIL`
119 """
120 pass
121
122def join(path, *paths): # reliably restored by inspect
123 # no doc
124 pass
125
126def tesseract_version(*args, **kwargs): # real signature unknown
127 """ Return tesseract-ocr and leptonica version info """
128 pass
129
130def __pyx_unpickle_DIR(*args, **kwargs): # real signature unknown
131 pass
132
133def __pyx_unpickle_Justification(*args, **kwargs): # real signature unknown
134 pass
135
136def __pyx_unpickle_OEM(*args, **kwargs): # real signature unknown
137 pass
138
139def __pyx_unpickle_Orientation(*args, **kwargs): # real signature unknown
140 pass
141
142def __pyx_unpickle_PSM(*args, **kwargs): # real signature unknown
143 pass
144
145def __pyx_unpickle_PT(*args, **kwargs): # real signature unknown
146 pass
147
148def __pyx_unpickle_RIL(*args, **kwargs): # real signature unknown
149 pass
150
151def __pyx_unpickle_TextlineOrder(*args, **kwargs): # real signature unknown
152 pass
153
154def __pyx_unpickle_WritingDirection(*args, **kwargs): # real signature unknown
155 pass
156
157def __pyx_unpickle__Enum(*args, **kwargs): # real signature unknown
158 pass
159
160# classes
161
162class _Enum(object):
163 # no doc
164 def __init__(self, *args, **kwargs): # real signature unknown
165 pass
166
167 @staticmethod # known case of __new__
168 def __new__(*args, **kwargs): # real signature unknown
169 """ Create and return a new object. See help(type) for accurate signature. """
170 pass
171
172 def __reduce__(self, *args, **kwargs): # real signature unknown
173 pass
174
175 def __setstate__(self, *args, **kwargs): # real signature unknown
176 pass
177
178
179class DIR(_Enum):
180 """
181 Enum for strong text direction values.
182
183 Attributes:
184 NEUTRAL: Text contains only neutral characters.
185 LEFT_TO_RIGHT: Text contains no Right-to-Left characters.
186 RIGHT_TO_LEFT: Text contains no Left-to-Right characters.
187 MIX: Text contains a mixture of left-to-right and right-to-left characters.
188 """
189 def __init__(self, *args, **kwargs): # real signature unknown
190 pass
191
192 @staticmethod # known case of __new__
193 def __new__(*args, **kwargs): # real signature unknown
194 """ Create and return a new object. See help(type) for accurate signature. """
195 pass
196
197 def __reduce__(self, *args, **kwargs): # real signature unknown
198 pass
199
200 def __setstate__(self, *args, **kwargs): # real signature unknown
201 pass
202
203 LEFT_TO_RIGHT = 1
204 MIX = 3
205 NEUTRAL = 0
206 RIGHT_TO_LEFT = 2
207
208
209class Justification(_Enum):
210 """ Enum for justification options. """
211 def __init__(self, *args, **kwargs): # real signature unknown
212 pass
213
214 @staticmethod # known case of __new__
215 def __new__(*args, **kwargs): # real signature unknown
216 """ Create and return a new object. See help(type) for accurate signature. """
217 pass
218
219 def __reduce__(self, *args, **kwargs): # real signature unknown
220 pass
221
222 def __setstate__(self, *args, **kwargs): # real signature unknown
223 pass
224
225 CENTER = 2
226 LEFT = 1
227 RIGHT = 3
228 UNKNOWN = 0
229
230
231class OEM(_Enum):
232 """
233 An enum that defines avaialble OCR engine modes.
234
235 Attributes:
236 TESSERACT_ONLY: Run Tesseract only - fastest
237 LSTM_ONLY: Run just the LSTM line recognizer. (>=v4.00)
238 TESSERACT_LSTM_COMBINED: Run the LSTM recognizer, but allow fallback
239 to Tesseract when things get difficult. (>=v4.00)
240 CUBE_ONLY: Specify this mode when calling Init*(), to indicate that
241 any of the above modes should be automatically inferred from the
242 variables in the language-specific config, command-line configs, or
243 if not specified in any of the above should be set to the default
244 `OEM.TESSERACT_ONLY`.
245 TESSERACT_CUBE_COMBINED: Run Cube only - better accuracy, but slower.
246 DEFAULT: Run both and combine results - best accuracy.
247 """
248 def __init__(self, *args, **kwargs): # real signature unknown
249 pass
250
251 @staticmethod # known case of __new__
252 def __new__(*args, **kwargs): # real signature unknown
253 """ Create and return a new object. See help(type) for accurate signature. """
254 pass
255
256 def __reduce__(self, *args, **kwargs): # real signature unknown
257 pass
258
259 def __setstate__(self, *args, **kwargs): # real signature unknown
260 pass
261
262 DEFAULT = 3
263 LSTM_ONLY = 1
264 TESSERACT_LSTM_COMBINED = 2
265 TESSERACT_ONLY = 0
266
267
268class Orientation(_Enum):
269 """ Enum for orientation options. """
270 def __init__(self, *args, **kwargs): # real signature unknown
271 pass
272
273 @staticmethod # known case of __new__
274 def __new__(*args, **kwargs): # real signature unknown
275 """ Create and return a new object. See help(type) for accurate signature. """
276 pass
277
278 def __reduce__(self, *args, **kwargs): # real signature unknown
279 pass
280
281 def __setstate__(self, *args, **kwargs): # real signature unknown
282 pass
283
284 PAGE_DOWN = 2
285 PAGE_LEFT = 3
286 PAGE_RIGHT = 1
287 PAGE_UP = 0
288
289
290class PSM(_Enum):
291 """
292 An enum that defines all available page segmentation modes.
293
294 Attributes:
295 OSD_ONLY: Orientation and script detection only.
296 AUTO_OSD: Automatic page segmentation with orientation and script detection. (OSD)
297 AUTO_ONLY: Automatic page segmentation, but no OSD, or OCR.
298 AUTO: Fully automatic page segmentation, but no OSD. (:mod:`tesserocr` default)
299 SINGLE_COLUMN: Assume a single column of text of variable sizes.
300 SINGLE_BLOCK_VERT_TEXT: Assume a single uniform block of vertically aligned text.
301 SINGLE_BLOCK: Assume a single uniform block of text.
302 SINGLE_LINE: Treat the image as a single text line.
303 SINGLE_WORD: Treat the image as a single word.
304 CIRCLE_WORD: Treat the image as a single word in a circle.
305 SINGLE_CHAR: Treat the image as a single character.
306 SPARSE_TEXT: Find as much text as possible in no particular order.
307 SPARSE_TEXT_OSD: Sparse text with orientation and script det.
308 RAW_LINE: Treat the image as a single text line, bypassing hacks that are Tesseract-specific.
309 COUNT: Number of enum entries.
310 """
311 def __init__(self, *args, **kwargs): # real signature unknown
312 pass
313
314 @staticmethod # known case of __new__
315 def __new__(*args, **kwargs): # real signature unknown
316 """ Create and return a new object. See help(type) for accurate signature. """
317 pass
318
319 def __reduce__(self, *args, **kwargs): # real signature unknown
320 pass
321
322 def __setstate__(self, *args, **kwargs): # real signature unknown
323 pass
324
325 AUTO = 3
326 AUTO_ONLY = 2
327 AUTO_OSD = 1
328 CIRCLE_WORD = 9
329 COUNT = 14
330 OSD_ONLY = 0
331 RAW_LINE = 13
332 SINGLE_BLOCK = 6
333 SINGLE_BLOCK_VERT_TEXT = 5
334 SINGLE_CHAR = 10
335 SINGLE_COLUMN = 4
336 SINGLE_LINE = 7
337 SINGLE_WORD = 8
338 SPARSE_TEXT = 11
339 SPARSE_TEXT_OSD = 12
340
341
342class PT(_Enum):
343 """
344 An enum the defines avaialbe Poly Block types.
345
346 Attributes:
347 UNKNOWN: Type is not yet known. Keep as the first element.
348 FLOWING_TEXT: Text that lives inside a column.
349 HEADING_TEXT: Text that spans more than one column.
350 PULLOUT_TEXT: Text that is in a cross-column pull-out region.
351 EQUATION: Partition belonging to an equation region.
352 INLINE_EQUATION: Partition has inline equation.
353 TABLE: Partition belonging to a table region.
354 VERTICAL_TEXT: Text-line runs vertically.
355 CAPTION_TEXT: Text that belongs to an image.
356 FLOWING_IMAGE: Image that lives inside a column.
357 HEADING_IMAGE: Image that spans more than one column.
358 PULLOUT_IMAGE: Image that is in a cross-column pull-out region.
359 HORZ_LINE: Horizontal Line.
360 VERT_LINE: Vertical Line.
361 NOISE: Lies outside of any column.
362 COUNT: Count
363 """
364 def __init__(self, *args, **kwargs): # real signature unknown
365 pass
366
367 @staticmethod # known case of __new__
368 def __new__(*args, **kwargs): # real signature unknown
369 """ Create and return a new object. See help(type) for accurate signature. """
370 pass
371
372 def __reduce__(self, *args, **kwargs): # real signature unknown
373 pass
374
375 def __setstate__(self, *args, **kwargs): # real signature unknown
376 pass
377
378 CAPTION_TEXT = 8
379 COUNT = 15
380 EQUATION = 4
381 FLOWING_IMAGE = 9
382 FLOWING_TEXT = 1
383 HEADING_IMAGE = 10
384 HEADING_TEXT = 2
385 HORZ_LINE = 12
386 INLINE_EQUATION = 5
387 NOISE = 14
388 PULLOUT_IMAGE = 11
389 PULLOUT_TEXT = 3
390 TABLE = 6
391 UNKNOWN = 0
392 VERTICAL_TEXT = 7
393 VERT_LINE = 13
394
395
396class PyChoiceIterator(object):
397 # no doc
398 def Confidence(self, *args, **kwargs): # real signature unknown
399 """
400 Return the confidence of the current choice.
401
402 The number should be interpreted as a percent probability. (0.0f-100.0f)
403 """
404 pass
405
406 def GetUTF8Text(self, *args, **kwargs): # real signature unknown
407 """
408 Return the UTF-8 encoded text string for the current
409 choice.
410 """
411 pass
412
413 def Next(self, *args, **kwargs): # real signature unknown
414 """
415 Move to the next choice for the symbol and returns False if there
416 are none left.
417 """
418 pass
419
420 def __init__(self, *args, **kwargs): # real signature unknown
421 pass
422
423 def __iter__(self, *args, **kwargs): # real signature unknown
424 """ Implement iter(self). """
425 pass
426
427 @staticmethod # known case of __new__
428 def __new__(*args, **kwargs): # real signature unknown
429 """ Create and return a new object. See help(type) for accurate signature. """
430 pass
431
432 def __reduce__(self, *args, **kwargs): # real signature unknown
433 pass
434
435 def __setstate__(self, *args, **kwargs): # real signature unknown
436 pass
437
438 __pyx_vtable__ = None # (!) real value is '<capsule object NULL at 0x0000022C8F958720>'
439
440
441class PyPageIterator(object):
442 """
443 Wrapper around Tesseract's ``PageIterator`` class.
444 Returned by :meth:`PyTessBaseAPI.AnalyseLayout`.
445
446 Instances of this class and its subclasses cannot be instantiated from Python.
447
448 Accessing data
449 ==============
450
451 Coordinate system:
452
453 Integer coordinates are at the cracks between the pixels.
454 The top-left corner of the top-left pixel in the image is at (0,0).
455 The bottom-right corner of the bottom-right pixel in the image is at
456 (width, height).
457
458 Every bounding box goes from the top-left of the top-left contained
459 pixel to the bottom-right of the bottom-right contained pixel, so
460 the bounding box of the single top-left pixel in the image is:
461 (0,0)->(1,1).
462
463 If an image rectangle has been set in the API, then returned coordinates
464 relate to the original (full) image, rather than the rectangle.
465
466 .. note::
467
468 You can iterate through the elements of a level using the :func:`iterate_level`
469 helper function:
470
471 >>> for e in iterate_level(api.AnalyseLayout(), RIL.WORD):
472 ... orientation = e.Orientation()
473
474 .. warning::
475
476 This class points to data held within the :class:`PyTessBaseAPI`
477 instance, and therefore can only be used while the :class:`PyTessBaseAPI`
478 instance still exists and has not been subjected to a call of :meth:`Init`,
479 :meth:`SetImage`, :meth:`Recognize`, :meth:`Clear`, :meth:`End`,
480 or anything else that changes the internal `PAGE_RES`.
481 """
482 def Baseline(self, *args, **kwargs): # real signature unknown
483 """
484 Return the baseline of the current object at the given level.
485
486 The baseline is the line that passes through (x1, y1) and (x2, y2).
487
488 .. warning::
489
490 with vertical text, baselines may be vertical!
491
492 Args:
493 level (int): Iterator level. See :class:`RIL`.
494
495 Returns:
496 tuple: Baseline points' coordinates (x1, y1), (x2, y2).
497 ``None`` if there is no baseline at the current position.
498 """
499 pass
500
501 def Begin(self, *args, **kwargs): # real signature unknown
502 """ Move the iterator to point to the start of the page to begin an iteration. """
503 pass
504
505 def BlockPolygon(self, *args, **kwargs): # real signature unknown
506 """
507 Return the polygon outline of the current block.
508
509 Returns:
510 list or None: list of points (x,y tuples) which list the vertices
511 of the polygon, and the last edge is the line segment between the last
512 point and the first point.
513
514 ``None`` will be returned if the iterator is
515 at the end of the document or layout analysis was not used.
516 """
517 pass
518
519 def BlockType(self, *args, **kwargs): # real signature unknown
520 """
521 Return the type of the current block. See :class:`PolyBlockType` for
522 possible types.
523 """
524 pass
525
526 def BoundingBox(self, *args, **kwargs): # real signature unknown
527 """
528 Return the bounding rectangle of the current object at the given level.
529
530 See comment on coordinate system above.
531
532 Args:
533 level (int): Page Iteration Level. See :class:`RIL` for avaialbe levels.
534
535 Kwargs:
536 padding (int): The padding argument to :meth:`GetImage` can be used to expand
537 the image to include more foreground pixels.
538
539 Returns:
540 tuple or None if there is no such object at the current position.
541 The returned bounding box (left, top, right and bottom values
542 respectively) is guaranteed to match the size and position of
543 the image returned by :meth:`GetBinaryImage`, but may clip
544 foreground pixels from a grey image.
545 """
546 pass
547
548 def BoundingBoxInternal(self, *args, **kwargs): # real signature unknown
549 """
550 Return the bounding rectangle of the object in a coordinate system of the
551 working image rectangle having its origin at (rect_left_, rect_top_) with
552 respect to the original image and is scaled by a factor scale_.
553
554 Args:
555 level (int): Page Iteration Level. See :class:`RIL` for avaialbe levels.
556
557 Returns:
558 tuple or None if there is no such object at the current position.
559 The returned bounding box is represented as a tuple with
560 left, top, right and bottom values respectively.
561 """
562 pass
563
564 def Empty(self, *args, **kwargs): # real signature unknown
565 """
566 Return whether there is no object of a given level.
567
568 Args:
569 level (int): Iterator level. See :class:`RIL`.
570
571 Returns:
572 bool: ``True`` if there is no object at the given level.
573 """
574 pass
575
576 def GetBinaryImage(self, *args, **kwargs): # real signature unknown
577 """
578 Return a binary image of the current object at the given level.
579
580 The position and size match the return from :meth:`BoundingBoxInternal`, and so
581 this could be upscaled with respect to the original input image.
582
583 Args:
584 level (int): Iterator level. See :class:`RIL`.
585
586 Returns:
587 :class:`PIL.Image`: Image object or None if no image is returned.
588 """
589 pass
590
591 def GetImage(self, *args, **kwargs): # real signature unknown
592 """
593 Return an image of the current object at the given level in greyscale
594 if available in the input.
595
596 To guarantee a binary image use :meth:`BinaryImage`.
597
598 Args:
599 level (int): Iterator level. See :class:`RIL`.
600 padding (int): Padding by which to expand the returned image.
601
602 .. note::
603
604 in order to give the best possible image, the bounds are
605 expanded slightly over the binary connected component, by
606 the supplied padding, so the top-left position of the returned
607 image is returned along with the image (left, top respectively).
608 These will most likely not match the coordinates returned by
609 :meth:`BoundingBox`.
610
611 original_image (:class:`PIL.Image`): Original image.
612 If you do not supply an original image (None), you will get a binary one.
613
614 Returns:
615 tuple: The image (:class:`PIL.Image`) of the current object at the given level in greyscale
616 followed by its top and left positions.
617 """
618 pass
619
620 def IsAtBeginningOf(self, RIL_BLOCK): # real signature unknown; restored from __doc__
621 """
622 Return whether the iterator is at the start of an object at the given
623 level.
624
625 For instance, suppose an iterator it is pointed to the first symbol of the
626 first word of the third line of the second paragraph of the first block in
627 a page, then::
628
629 it.IsAtBeginningOf(RIL.BLOCK) is False
630 it.IsAtBeginningOf(RIL.PARA) is False
631 it.IsAtBeginningOf(RIL.TEXTLINE) is True
632 it.IsAtBeginningOf(RIL.WORD) is True
633 it.IsAtBeginningOf(RIL.SYMBOL) is True
634
635 Args:
636 level (int): Iterator level. See :class:`RIL`.
637
638 Returns:
639 bool: ``True`` if the iterator is at the start of an object at the
640 given level.
641 """
642 pass
643
644 def IsAtFinalElement(self, RIL_PARA, RIL_SYMBOL): # real signature unknown; restored from __doc__
645 """
646 Return whether the iterator is positioned at the last element in a
647 given level. (e.g. the last word in a line, the last line in a block)
648
649 Here's some two-paragraph example
650 text:
651
652 It starts off innocuously
653 enough but quickly turns bizarre.
654 The author inserts a cornucopia
655 of words to guard against confused
656 references.
657
658 Now take an iterator ``it`` pointed to the start of "bizarre."
659
660 it.IsAtFinalElement(RIL.PARA, RIL.SYMBOL) = False
661 it.IsAtFinalElement(RIL.PARA, RIL.WORD) = True
662 it.IsAtFinalElement(RIL.BLOCK, RIL.WORD) = False
663
664 Args:
665 level (int): Iterator Level. See :class:`RIL`.
666 element (int): Element level. See :class:`RIL`.
667
668 Returns:
669 bool: ``True`` if the iterator is positioned at the last element
670 in the given level.
671 """
672 pass
673
674 def IsWithinFirstTextlineOfParagraph(self, *args, **kwargs): # real signature unknown
675 """
676 Return whether this iterator points anywhere in the first textline of a
677 paragraph.
678 """
679 pass
680
681 def Next(self, *args, **kwargs): # real signature unknown
682 """
683 Move to the start of the next object at the given level in the
684 page hierarchy, and returns false if the end of the page was reached.
685
686 .. note::
687
688 :attr:`RIL.SYMBOL` will skip non-text blocks, but all other
689 :class:`RIL` level values will visit each non-text block once.
690
691 Think of non text blocks as containing a single para, with a single line,
692 with a single imaginary word.
693
694 Calls to Next with different levels may be freely intermixed.
695 This function iterates words in right-to-left scripts correctly, if
696 the appropriate language has been loaded into Tesseract.
697
698 Args:
699 level (int): Iterator level. See :class:`RIL`.
700 """
701 pass
702
703 def Orientation(self, *args, **kwargs): # real signature unknown
704 """
705 Return the orientation for the block the iterator points to.
706
707 Returns:
708 tuple: The following values are returned respectively::
709
710 orientation: See :class:`Orientation`
711 writing_direction: See :class:`WritingDirection`
712 textline_order: See :class:`TextlineOrder`
713 deskew_angle: After rotating the block so the text orientation is
714 upright, how many radians does one have to rotate the
715 block anti-clockwise for it to be level?
716 -Pi/4 <= deskew_angle <= Pi/4
717 """
718 pass
719
720 def ParagraphInfo(self, *args, **kwargs): # real signature unknown
721 """
722 Return information about the current paragraph, if available.
723
724 Returns:
725 tuple: The following values are returned respectively::
726
727 justification:
728 LEFT if ragged right, or fully justified and script is left-to-right.
729 RIGHT if ragged left, or fully justified and script is right-to-left.
730 UNKNOWN if it looks like source code or we have very few lines.
731 See :class:`Justification`.
732 is_list_item:
733 ``True`` if we believe this is a member of an ordered or unordered list.
734 is_crown:
735 ``True`` if the first line of the paragraph is aligned with the other
736 lines of the paragraph even though subsequent paragraphs have first
737 line indents. This typically indicates that this is the continuation
738 of a previous paragraph or that it is the very first paragraph in
739 the chapter.
740 first_line_indent:
741 For LEFT aligned paragraphs, the first text line of paragraphs of
742 this kind are indented this many pixels from the left edge of the
743 rest of the paragraph.
744 for RIGHT aligned paragraphs, the first text line of paragraphs of
745 this kind are indented this many pixels from the right edge of the
746 rest of the paragraph.
747 NOTE 1: This value may be negative.
748 NOTE 2: if ``is_crown == True``, the first line of this paragraph is
749 actually flush, and first_line_indent is set to the "common"
750 first_line_indent for subsequent paragraphs in this block
751 of text.
752 """
753 pass
754
755 def RestartParagraph(self, *args, **kwargs): # real signature unknown
756 """
757 Move the iterator to the beginning of the paragraph.
758
759 This class implements this functionality by moving it to the zero indexed
760 blob of the first (leftmost) word on the first row of the paragraph.
761 """
762 pass
763
764 def RestartRow(self, *args, **kwargs): # real signature unknown
765 """
766 Move the iterator to the beginning of the text line.
767
768 This class implements this functionality by moving it to the zero indexed
769 blob of the first (leftmost) word of the row.
770 """
771 pass
772
773 def SetBoundingBoxComponents(self, *args, **kwargs): # real signature unknown
774 """
775 Controls what to include in a bounding box. Bounding boxes of all levels
776 between :attr:`RIL.WORD` and :attr:`RIL.BLOCK` can include or exclude potential diacritics.
777
778 Between layout analysis and recognition, it isn't known where all
779 diacritics belong, so this control is used to include or exclude some
780 diacritics that are above or below the main body of the word. In most cases
781 where the placement is obvious, and after recognition, it doesn't make as
782 much difference, as the diacritics will already be included in the word.
783
784 Args:
785 include_upper_dots (bool): Include upper dots.
786 include_lower_dots (bool): Include lower dots.
787 """
788 pass
789
790 def __init__(self, *args, **kwargs): # real signature unknown
791 pass
792
793 @staticmethod # known case of __new__
794 def __new__(*args, **kwargs): # real signature unknown
795 """ Create and return a new object. See help(type) for accurate signature. """
796 pass
797
798 def __reduce__(self, *args, **kwargs): # real signature unknown
799 pass
800
801 def __setstate__(self, *args, **kwargs): # real signature unknown
802 pass
803
804 __pyx_vtable__ = None # (!) real value is '<capsule object NULL at 0x0000022C8F9585D0>'
805
806
807class PyLTRResultIterator(PyPageIterator):
808 # no doc
809 def BlanksBeforeWord(self, *args, **kwargs): # real signature unknown
810 """ Return True if the current word is numeric. """
811 pass
812
813 def Confidence(self, *args, **kwargs): # real signature unknown
814 """
815 Return the mean confidence of the current object at the given level.
816
817 The number should be interpreted as a percent probability. (0.0-100.0)
818 """
819 pass
820
821 def EquivalentToTruth(self, *args, **kwargs): # real signature unknown
822 """
823 Return True if the given string is equivalent to the truth string for
824 the current word.
825 """
826 pass
827
828 def GetBlamerDebug(self, *args, **kwargs): # real signature unknown
829 """ Return a string with blamer information for this word. """
830 pass
831
832 def GetBlamerMisadaptionDebug(self, *args, **kwargs): # real signature unknown
833 """ Return a string with misadaption information for this word. """
834 pass
835
836 def GetChoiceIterator(self, *args, **kwargs): # real signature unknown
837 """
838 Return `PyChoiceIterator` instance to iterate over symbol choices.
839
840 Returns `None` on failure.
841 """
842 pass
843
844 def GetUTF8Text(self, *args, **kwargs): # real signature unknown
845 """
846 Returns the UTF-8 encoded text string for the current
847 object at the given level.
848
849 Args:
850 level (int): Iterator level. See :class:`RIL`.
851
852 Returns:
853 unicode: UTF-8 encoded text for the given level's current object.
854
855 Raises:
856 :exc:`RuntimeError`: If no text returned.
857 """
858 pass
859
860 def HasBlamerInfo(self, *args, **kwargs): # real signature unknown
861 """ Return True if the word contains blamer information. """
862 pass
863
864 def HasTruthString(self, *args, **kwargs): # real signature unknown
865 """ Returns True if a truth string was recorded for the current word. """
866 pass
867
868 def SetLineSeparator(self, *args, **kwargs): # real signature unknown
869 """
870 Set the string inserted at the end of each text line. "
871 " by default.
872 """
873 pass
874
875 def SetParagraphSeparator(self, *args, **kwargs): # real signature unknown
876 """
877 Set the string inserted at the end of each paragraph. "
878 " by default.
879 """
880 pass
881
882 def SymbolIsDropcap(self, *args, **kwargs): # real signature unknown
883 """
884 Return True if the current symbol is a dropcap.
885
886 If iterating at a higher level object than symbols, eg words, then
887 this will return the attributes of the first symbol in that word.
888 """
889 pass
890
891 def SymbolIsSubscript(self, *args, **kwargs): # real signature unknown
892 """
893 Return True if the current symbol is a subscript.
894
895 If iterating at a higher level object than symbols, eg words, then
896 this will return the attributes of the first symbol in that word.
897 """
898 pass
899
900 def SymbolIsSuperscript(self, *args, **kwargs): # real signature unknown
901 """
902 Return True if the current symbol is a superscript.
903
904 If iterating at a higher level object than symbols, eg words, then
905 this will return the attributes of the first symbol in that word.
906 """
907 pass
908
909 def WordDirection(self, *args, **kwargs): # real signature unknown
910 """
911 Return the overall directionality of this word.
912
913 See :class:`DIR` for available values.
914 """
915 pass
916
917 def WordFontAttributes(self, *args, **kwargs): # real signature unknown
918 """
919 Return the font attributes of the current word.
920
921 .. note::
922 If iterating at a higher level object than words, eg textlines,
923 then this will return the attributes of the first word in that textline.
924
925 Returns:
926 dict: `None` if nothing found or a dictionary with the font attributes::
927
928 font_name: String representing a font name. Lifespan is the same as
929 the iterator itself, ie rendered invalid by various members of
930 :class:`PyTessBaseAPI`, including `Init`, `SetImage`, `End` or
931 deleting the :class:`PyTessBaseAPI`.
932 bold (bool): ``True`` if bold.
933 italic (bool): ``True`` if italic.
934 underlined (bool): ``True`` if underlined.
935 monospace (bool): ``True`` if monospace.
936 serif (bool): ``True`` if serif.
937 smallcaps (bool): ``True`` if smallcaps.
938 pointsize (int): printers points (1/72 inch.)
939 font_id (int): font id.
940 """
941 pass
942
943 def WordIsFromDictionary(self, *args, **kwargs): # real signature unknown
944 """ Return True if the current word was found in a dictionary. """
945 pass
946
947 def WordIsNumeric(self, *args, **kwargs): # real signature unknown
948 """ Return True if the current word is numeric. """
949 pass
950
951 def WordLattice(self, *args, **kwargs): # real signature unknown
952 """ Return a serialized choice lattice. """
953 pass
954
955 def WordNormedUTF8Text(self, *args, **kwargs): # real signature unknown
956 """
957 Returns a UTF-8 encoded normalized OCR string for the
958 current word.
959 """
960 pass
961
962 def WordRecognitionLanguage(self, *args, **kwargs): # real signature unknown
963 """
964 Return the name of the language used to recognize this word.
965
966 Returns ``None`` on error.
967 """
968 pass
969
970 def WordTruthUTF8Text(self, *args, **kwargs): # real signature unknown
971 """ Return a UTF-8 encoded truth string for the current word. """
972 pass
973
974 def __init__(self, *args, **kwargs): # real signature unknown
975 pass
976
977 @staticmethod # known case of __new__
978 def __new__(*args, **kwargs): # real signature unknown
979 """ Create and return a new object. See help(type) for accurate signature. """
980 pass
981
982 def __reduce__(self, *args, **kwargs): # real signature unknown
983 pass
984
985 def __setstate__(self, *args, **kwargs): # real signature unknown
986 pass
987
988 __pyx_vtable__ = None # (!) real value is '<capsule object NULL at 0x0000022C8F9586C0>'
989
990
991class PyResultIterator(PyLTRResultIterator):
992 """
993 Wrapper around Tesseract's ``ResultIterator`` class.
994
995 .. note::
996
997 You can iterate through the elements of a level using the :func:`iterate_level`
998 helper function:
999
1000 >>> for e in iterate_level(api.GetIterator(), RIL.WORD):
1001 ... word = e.GetUTF8Text()
1002
1003 See :class:`PyPageIterator` for more details.
1004 """
1005 def GetBestLSTMSymbolChoices(self, *args, **kwargs): # real signature unknown
1006 pass
1007
1008 def IsAtBeginningOf(self, *args, **kwargs): # real signature unknown
1009 """
1010 Return whether we're at the logical beginning of the
1011 given level. (as opposed to :class:`PyResultIterator`'s left-to-right
1012 top-to-bottom order).
1013
1014 Otherwise, this acts the same as :meth:`PyPageIterator.IsAtBeginningOf`.
1015 """
1016 pass
1017
1018 def ParagraphIsLtr(self, *args, **kwargs): # real signature unknown
1019 """
1020 Return whether the current paragraph's dominant reading direction
1021 is left-to-right (as opposed to right-to-left).
1022 """
1023 pass
1024
1025 def __init__(self, *args, **kwargs): # real signature unknown
1026 pass
1027
1028 @staticmethod # known case of __new__
1029 def __new__(*args, **kwargs): # real signature unknown
1030 """ Create and return a new object. See help(type) for accurate signature. """
1031 pass
1032
1033 def __reduce__(self, *args, **kwargs): # real signature unknown
1034 pass
1035
1036 def __setstate__(self, *args, **kwargs): # real signature unknown
1037 pass
1038
1039 __pyx_vtable__ = None # (!) real value is '<capsule object NULL at 0x0000022C8F9586F0>'
1040
1041
1042class PyTessBaseAPI(object):
1043 """
1044 Cython wrapper class around the C++ TessBaseAPI class.
1045
1046 Usage as a context manager:
1047
1048 >>> with PyTessBaseAPI(path='./', lang='eng') as tesseract:
1049 ... tesseract.SetImage(image)
1050 ... text = tesseract.GetUTF8Text()
1051
1052 Example with manual handling:
1053
1054 >>> tesseract = PyTessBaseAPI(path='./', lang='eng')
1055 >>> try:
1056 ... tesseract.SetImage(image)
1057 ... text = tesseract.GetUTF8Text()
1058 ... finally:
1059 ... tesseract.End()
1060
1061 Args:
1062 path (str): The name of the parent directory of tessdata.
1063 Must end in /.
1064 lang (str): An ISO 639-3 language string. Defaults to 'eng'.
1065 The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
1066 that multiple languages are to be loaded. Eg hin+eng will load Hindi and
1067 English. Languages may specify internally that they want to be loaded
1068 with one or more other languages, so the ~ sign is available to override
1069 that. Eg if hin were set to load eng by default, then hin+~eng would force
1070 loading only hin. The number of loaded languages is limited only by
1071 memory, with the caveat that loading additional languages will impact
1072 both speed and accuracy, as there is more work to do to decide on the
1073 applicable language, and there is more chance of hallucinating incorrect
1074 words.
1075 psm (int): Page segmentation mode. Defaults to :attr:`PSM.AUTO`.
1076 See :class:`PSM` for avaialble psm values.
1077 init (bool): If ``False``, :meth:`Init` will not be called and has to be called
1078 after initialization.
1079 oem (int): OCR engine mode. Defaults to :attr:`OEM.DEFAULT`.
1080
1081 Raises:
1082 :exc:`RuntimeError`: If `init` is ``True`` and API initialization fails.
1083 """
1084 def AdaptToWordStr(self, *args, **kwargs): # real signature unknown
1085 """
1086 Apply the given word to the adaptive classifier if possible.
1087
1088 Assumes that :meth:`SetImage` / :meth:`SetRectangle` have been used to set the image
1089 to the given word.
1090
1091 Args:
1092 psm (int): Should be :attr:`PSM.SINGLE_WORD` or
1093 :attr:`PSM.CIRCLE_WORD`, as that will be used to control layout analysis.
1094 The currently set PageSegMode is preserved.
1095 word (str): The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
1096 tell the boundaries of the graphemes.
1097
1098 Returns:
1099 bool: ``False`` if adaption was not possible for some reason.
1100 """
1101 pass
1102
1103 def AllWordConfidences(self, *args, **kwargs): # real signature unknown
1104 """
1105 Return all word confidences (between 0 and 100) as a list.
1106
1107 The number of confidences should correspond to the number of space-
1108 delimited words in `GetUTF8Text`.
1109 """
1110 pass
1111
1112 def AllWords(self, *args, **kwargs): # real signature unknown
1113 """
1114 Return list of all detected words.
1115
1116 Returns an empty list if :meth:`Recognize` was not called first.
1117 """
1118 pass
1119
1120 def AnalyseLayout(self, *args, **kwargs): # real signature unknown
1121 """
1122 Runs page layout analysis in the mode set by :meth:`SetPageSegMode`.
1123
1124 May optionally be called prior to :meth:`Recognize` to get access to just
1125 the page layout results. Returns a :class:`PyPageIterator` iterator to the results.
1126
1127 Kwargs:
1128 merge_similar_words (bool): If ``True``, words are combined where suitable
1129 for use with a line recognizer. Use if you want to use AnalyseLayout to find the
1130 textlines, and then want to process textline fragments with an external
1131 line recognizer.
1132 Returns:
1133 :class:`PyPageIterator`: Page iterator or `None` on error or an empty page.
1134 """
1135 pass
1136
1137 def Clear(self, *args, **kwargs): # real signature unknown
1138 """
1139 Free up recognition results and any stored image data, without actually
1140 freeing any recognition data that would be time-consuming to reload.
1141 """
1142 pass
1143
1144 def ClearAdaptiveClassifier(self, *args, **kwargs): # real signature unknown
1145 """
1146 Call between pages or documents etc to free up memory and forget
1147 adaptive data.
1148 """
1149 pass
1150
1151 def ClearPersistentCache(self, *args, **kwargs): # real signature unknown
1152 pass
1153
1154 def DetectOrientationScript(self, *args, **kwargs): # real signature unknown
1155 """
1156 Detect the orientation of the input image and apparent script (alphabet).
1157
1158 Returns:
1159 `dict` or `None` if image was not successfully processed. dict contains:
1160 - orient_deg: Orientation of detected clockwise rotation of the input image in degrees
1161 (0, 90, 180, 270).
1162 - orient_conf: The orientation confidence (15.0 is reasonably confident).
1163 - script_name: ASCII string, the name of the script, e.g. "Latin".
1164 - script_conf: Script confidence.
1165 """
1166 pass
1167
1168 def DetectOS(self, *args, **kwargs): # real signature unknown
1169 """
1170 Estimate the Orientation and Script of the image.
1171
1172 Returns:
1173 `dict` or `None` if image was not successfully processed. dict contains:
1174 - orientation: Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
1175 page respectively, where the values refer to the amount of clockwise
1176 rotation to be applied to the page for the text to be upright and readable.
1177 - oconfidence: Orientation confidence.
1178 - script: Index of the script with the highest score for this orientation.
1179 - sconfidence: script confidence.
1180 """
1181 pass
1182
1183 def End(self, *args, **kwargs): # real signature unknown
1184 """ Close down tesseract and free up all memory. """
1185 pass
1186
1187 def GetAvailableLanguages(self, *args, **kwargs): # real signature unknown
1188 """ Return list of available languages in the init data path """
1189 pass
1190
1191 def GetBestLSTMSymbolChoices(self, *args, **kwargs): # real signature unknown
1192 """
1193 Return Symbol choices as multi-dimensional array of tupels. The
1194 first dimension contains words. The second dimension contains the LSTM
1195 timesteps of the respective word. They are either accumulated over
1196 characters or pure which depends on the value set in lstm_choice_mode:
1197 1 = pure; 2 = accumulated. The third dimension contains the symbols
1198 and their probability as tupels for the respective timestep.
1199 Returns an empty list if :meth:`Recognize` was not called first.
1200 """
1201 pass
1202
1203 def GetBoolVariable(self, *args, **kwargs): # real signature unknown
1204 """
1205 Return the value of the given bool parameter if it exists among Tesseract parameters.
1206
1207 Returns ``None`` if the paramter was not found.
1208 """
1209 pass
1210
1211 def GetBoxText(self, *args, **kwargs): # real signature unknown
1212 """
1213 Return recognized text coded in the same
1214 format as a box file used in training.
1215
1216 Constructs coordinates in the original image - not just the rectangle.
1217
1218 Args:
1219 page_number (int): Page number is a 0-based page index that will appear
1220 in the box file.
1221 """
1222 pass
1223
1224 def GetComponentImages(self, *args, **kwargs): # real signature unknown
1225 """
1226 Get the given level kind of components (block, textline, word etc.) as a
1227 list of image, box bounds {x, y, width, height} tuples in reading order.
1228
1229 Can be called before or after :meth:`Recognize`.
1230
1231 Args:
1232 level (int): Iterator level. See :class:`RIL`.
1233 text_only (bool): If ``True``, then only text components are returned.
1234
1235 Kwargs:
1236 raw_image (bool): If ``True``, then portions of the original image are extracted
1237 instead of the thresholded image and padded with `raw_padding`. Defaults to
1238 ``False``.
1239 raw_padding (int): Image padding pixels. Defaults to 0.
1240 blockids (bool): If ``True``, the block-id of each component is also included
1241 in the returned tuples (`None` otherwise). Defaults to ``True``.
1242 paraids (bool): If ``True``, the paragraph-id of each component with its block
1243 is also included in the returned tuples.
1244
1245 Returns:
1246 list: List of tuples containing the following values respectively::
1247
1248 image (:class:`PIL.Image`): Image object.
1249 bounding box (dict): dict with x, y, w, h keys.
1250 block id (int): textline block id (if blockids is ``True``). ``None`` otherwise.
1251 paragraph id (int): textline paragraph id within its block (if paraids is True).
1252 ``None`` otherwise.
1253 """
1254 pass
1255
1256 def GetConnectedComponents(self, *args, **kwargs): # real signature unknown
1257 """
1258 Gets the individual connected (text) components (created
1259 after pages segmentation step, but before recognition)
1260 as a list of image, box bounds {x, y, width, height} tuples
1261 in reading order.
1262
1263 Can be called before or after :meth:`Recognize`.
1264
1265 Returns:
1266 list: List of tuples containing the following values respectively:
1267
1268 image (:class:`PIL.Image`): Image object.
1269 bounding box (dict): dict with x, y, w, h keys.
1270 """
1271 pass
1272
1273 def GetDatapath(self, *args, **kwargs): # real signature unknown
1274 """ Return tessdata parent directory """
1275 pass
1276
1277 def GetDoubleVariable(self, *args, **kwargs): # real signature unknown
1278 """
1279 Return the value of the given double parameter if it exists among Tesseract parameters.
1280
1281 Returns ``None`` if the paramter was not found.
1282 """
1283 pass
1284
1285 def GetHOCRText(self, *args, **kwargs): # real signature unknown
1286 """
1287 Return a HTML-formatted string with hOCR markup from the internal
1288 data structures.
1289
1290 Args:
1291 page_number (int): Page number is 0-based but will appear in the output as 1-based.
1292 """
1293 pass
1294
1295 def GetInitLanguagesAsString(self, *args, **kwargs): # real signature unknown
1296 """
1297 Return the languages string used in the last valid initialization.
1298
1299 If the last initialization specified "deu+hin" then that will be
1300 returned. If hin loaded eng automatically as well, then that will
1301 not be included in this list. To find the languages actually
1302 loaded use :meth:`GetLoadedLanguages`.
1303 """
1304 pass
1305
1306 def GetIntVariable(self, *args, **kwargs): # real signature unknown
1307 """
1308 Return the value of the given int parameter if it exists among Tesseract parameters.
1309
1310 Returns ``None`` if the paramter was not found.
1311 """
1312 pass
1313
1314 def GetIterator(self, *args, **kwargs): # real signature unknown
1315 """
1316 Get a reading-order iterator to the results of :meth:`LayoutAnalysis` and/or
1317 :meth:`Recognize`.
1318
1319 Returns:
1320 :class:`PyResultIterator`: reading-order iterator or `None` on failure.
1321 """
1322 pass
1323
1324 def GetLoadedLanguages(self, *args, **kwargs): # real signature unknown
1325 """
1326 Return the loaded languages as a list of STRINGs.
1327
1328 Includes all languages loaded by the last Init, including those loaded
1329 as dependencies of other loaded languages.
1330 """
1331 pass
1332
1333 def GetPageSegMode(self, *args, **kwargs): # real signature unknown
1334 """ Return the current page segmentation mode. """
1335 pass
1336
1337 def GetRegions(self, *args, **kwargs): # real signature unknown
1338 """
1339 Get the result of page layout analysis as a list of
1340 image, box bounds {x, y, width, height} tuples in reading order.
1341
1342 Can be called before or after :meth:`Recognize`.
1343
1344 Returns:
1345 list: List of tuples containing the following values respectively::
1346
1347 image (:class:`PIL.Image`): Image object.
1348 bounding box (dict): dict with x, y, w, h keys.
1349 """
1350 pass
1351
1352 def GetStringVariable(self, *args, **kwargs): # real signature unknown
1353 """
1354 Return the value of the given string parameter if it exists among Tesseract parameters.
1355
1356 Returns ``None`` if the paramter was not found.
1357 """
1358 pass
1359
1360 def GetStrips(self, *args, **kwargs): # real signature unknown
1361 """
1362 Get the textlines and strips of image regions as a list
1363 of image, box bounds {x, y, width, height} tuples in reading order.
1364
1365 Enables downstream handling of non-rectangular regions.
1366
1367 Can be called before or after :meth:`Recognize`.
1368
1369 Kwargs:
1370 blockids (bool): If ``True`` (default), the block-id of each line is also
1371 included in the returned tuples.
1372 Returns:
1373 list: List of tuples containing the following values respectively::
1374 image (:class:`PIL.Image`): Image object.
1375 bounding box (dict): dict with x, y, w, h keys.
1376 block id (int): textline block id (if blockids is ``True``). ``None`` otherwise.
1377 """
1378 pass
1379
1380 def GetTextDirection(self, *args, **kwargs): # real signature unknown
1381 """
1382 Get text direction.
1383
1384 Returns:
1385 tuple: offset and slope
1386 """
1387 pass
1388
1389 def GetTextlines(self, *args, **kwargs): # real signature unknown
1390 """
1391 Get the textlines as a list of image, box bounds
1392 {x, y, width, height} tuples in reading order.
1393
1394 Can be called before or after :meth:`Recognize`.
1395
1396 Args:
1397 raw_image (bool): If ``True``, then extract from the original image
1398 instead of the thresholded image and pad by `raw_padding` pixels.
1399 raw_padding (int): Padding pixels.
1400
1401 Kwargs:
1402 blockids (bool): If ``True`` (default), the block-id of each line is also
1403 included in the returned tuples (`None` otherwise).
1404 paraids (bool): If ``True``, the paragraph-id of each line within its block is
1405 also included in the returned tuples (`None` otherwise). Default is ``False``.
1406
1407 Returns:
1408 list: List of tuples containing the following values respectively::
1409
1410 image (:class:`PIL.Image`): Image object.
1411 bounding box (dict): dict with x, y, w, h keys.
1412 block id (int): textline block id (if blockids is ``True``). ``None`` otherwise.
1413 paragraph id (int): textline paragraph id within its block (if paraids is True).
1414 ``None`` otherwise.
1415 """
1416 pass
1417
1418 def GetThresholdedImage(self, *args, **kwargs): # real signature unknown
1419 """
1420 Return a copy of the internal thresholded image from Tesseract.
1421
1422 May be called any time after SetImage.
1423 """
1424 pass
1425
1426 def GetThresholdedImageScaleFactor(self, *args, **kwargs): # real signature unknown
1427 """
1428 Return the scale factor of the thresholded image that would be returned by
1429 GetThresholdedImage().
1430
1431 Returns:
1432 int: 0 if no thresholder has been set.
1433 """
1434 pass
1435
1436 def GetTSVText(self, *args, **kwargs): # real signature unknown
1437 """
1438 Make a TSV-formatted string from the internal data structures.
1439
1440 Args:
1441 page_number (int): Page number is 0-based but will appear in the output as 1-based.
1442 """
1443 pass
1444
1445 def GetUnichar(self, *args, **kwargs): # real signature unknown
1446 """
1447 Return the string form of the specified unichar.
1448
1449 Args:
1450 unichar_id (int): unichar id.
1451 """
1452 pass
1453
1454 def GetUNLVText(self, *args, **kwargs): # real signature unknown
1455 """
1456 Return the recognized text coded as UNLV format Latin-1 with
1457 specific reject and suspect codes.
1458 """
1459 pass
1460
1461 def GetUTF8Text(self, *args, **kwargs): # real signature unknown
1462 """ Return the recognized text coded as UTF-8 from the image. """
1463 pass
1464
1465 def GetVariableAsString(self, *args, **kwargs): # real signature unknown
1466 """
1467 Return the value of named variable as a string (regardless of type),
1468 if it exists.
1469
1470 Returns ``None`` if paramter was not found.
1471 """
1472 pass
1473
1474 def GetWords(self, *args, **kwargs): # real signature unknown
1475 """
1476 Get the words as a list of image, box bounds
1477 {x, y, width, height} tuples in reading order.
1478
1479 Can be called before or after :meth:`Recognize`.
1480
1481 Returns:
1482 list: List of tuples containing the following values respectively::
1483 image (:class:`PIL.Image`): Image object.
1484 bounding box (dict): dict with x, y, w, h keys.
1485 """
1486 pass
1487
1488 def Init(self, *args, **kwargs): # real signature unknown
1489 """
1490 Initialize the API with the given data path, language and OCR engine mode.
1491
1492 See :meth:`InitFull` for more intialization info and options.
1493
1494 Args:
1495 path (str): The name of the parent directory of tessdata.
1496 Must end in /. Uses default installation path if not specified.
1497 lang (str): An ISO 639-3 language string. Defaults to 'eng'.
1498 See :meth:`InitFull` for full description of this parameter.
1499 oem (int): OCR engine mode. Defaults to :attr:`OEM.DEFAULT`.
1500 See :class:`OEM` for all avaialbe options.
1501
1502 Raises:
1503 :exc:`RuntimeError`: If API initialization fails.
1504 """
1505 pass
1506
1507 def InitForAnalysePage(self, *args, **kwargs): # real signature unknown
1508 """
1509 Init only for page layout analysis.
1510
1511 Use only for calls to :meth:`SetImage` and :meth:`AnalysePage`.
1512 Calls that attempt recognition will generate an error.
1513 """
1514 pass
1515
1516 def InitFull(self, *args, **kwargs): # real signature unknown
1517 """
1518 Initialize the API with the given parameters (advanced).
1519
1520 It is entirely safe (and eventually will be efficient too) to call
1521 :meth:`Init` multiple times on the same instance to change language, or just
1522 to reset the classifier.
1523
1524 Page Segmentation Mode is set to :attr:`PSM.AUTO` after initialization by default.
1525
1526 Args:
1527 path (str): The name of the parent directory of tessdata.
1528 Must end in /.
1529 lang (str): An ISO 639-3 language string. Defaults to 'eng'.
1530 The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
1531 that multiple languages are to be loaded. Eg hin+eng will load Hindi and
1532 English. Languages may specify internally that they want to be loaded
1533 with one or more other languages, so the ~ sign is available to override
1534 that. Eg if hin were set to load eng by default, then hin+~eng would force
1535 loading only hin. The number of loaded languages is limited only by
1536 memory, with the caveat that loading additional languages will impact
1537 both speed and accuracy, as there is more work to do to decide on the
1538 applicable language, and there is more chance of hallucinating incorrect
1539 words.
1540 oem (int): OCR engine mode. Defaults to :attr:`OEM.DEFAULT`.
1541 See :class:`OEM` for all avaialbe options.
1542 configs (list): List of config files to load variables from.
1543 variables (dict): Extra variables to be set.
1544 set_only_non_debug_params (bool): If ``True``, only params that do not contain
1545 "debug" in the name will be set.
1546
1547 Raises:
1548 :exc:`RuntimeError`: If API initialization fails.
1549 """
1550 pass
1551
1552 def IsValidCharacter(self, *args, **kwargs): # real signature unknown
1553 """
1554 Return True if character is defined in the UniCharset.
1555
1556 Args:
1557 character: UTF-8 encoded character.
1558 """
1559 pass
1560
1561 def MapWordConfidences(self, *args, **kwargs): # real signature unknown
1562 """ Return list of word, confidence tuples """
1563 pass
1564
1565 def MeanTextConf(self, *args, **kwargs): # real signature unknown
1566 """ Return the (average) confidence value between 0 and 100. """
1567 pass
1568
1569 def oem(self, *args, **kwargs): # real signature unknown
1570 """ Return the last set OCR engine mode. """
1571 pass
1572
1573 def ProcessPage(self, *args, **kwargs): # real signature unknown
1574 """
1575 Turn a single image into symbolic text.
1576
1577 See :meth:`ProcessPages` for desciptions of the keyword arguments
1578 and all other details.
1579
1580 Args:
1581 outputbase (str): The name of the output file excluding
1582 extension. For example, "/path/to/chocolate-chip-cookie-recipe".
1583 image (:class:`PIL.Image`): The image processed.
1584 page_index (int): Page index (metadata).
1585 filename (str): `filename` and `page_index` are metadata
1586 used by side-effect processes, such as reading a box
1587 file or formatting as hOCR.
1588
1589 Raises:
1590 RuntimeError: If `image` is invalid or no renderers are enabled.
1591 """
1592 pass
1593
1594 def ProcessPages(self, *args, **kwargs): # real signature unknown
1595 """
1596 Turns images into symbolic text.
1597
1598 Set at least one of the following variables to enable renderers
1599 before calling this method::
1600
1601 tessedit_create_hocr (bool): hOCR Renderer
1602 if ``font_info`` is ``True`` then it'll be included in the output.
1603 tessedit_create_pdf (bool): PDF Renderer
1604 tessedit_write_unlv (bool): UNLV Renderer
1605 tessedit_create_boxfile (bool): Box Text Renderer
1606 tessedit_create_txt (bool): Text Renderer
1607
1608 .. note:
1609
1610 If tessedit_page_number variable is non-negative, will only process that
1611 single page. Works for multi-page tiff file, or filelist.
1612
1613 Args:
1614 outputbase (str): The name of the output file excluding
1615 extension. For example, "/path/to/chocolate-chip-cookie-recipe".
1616 filename (str): Can point to a single image, a multi-page TIFF,
1617 or a plain text list of image filenames.
1618
1619 Kwargs:
1620 retry_config (str): Is useful for debugging. If specified, you can fall
1621 back to an alternate configuration if a page fails for some reason.
1622 timeout (int): Terminates processing if any single page
1623 takes too long (`timeout` milliseconds). Defaults to 0 (unlimited).
1624
1625 Returns:
1626 bool: True if successful, False on error.
1627
1628 Raises:
1629 :exc:`RuntimeError`: If no renderers enabled in api variables.
1630 """
1631 pass
1632
1633 def ReadConfigFile(self, *args, **kwargs): # real signature unknown
1634 """
1635 Read a "config" file containing a set of param, value pairs.
1636
1637 Searches the standard places: tessdata/configs, tessdata/tessconfigs.
1638
1639 Args:
1640 filename: config file name. Also accepts relative or absolute path name.
1641 """
1642 pass
1643
1644 def Recognize(self, *args, **kwargs): # real signature unknown
1645 """
1646 Recognize the image from :meth:`SetImage`, generating Tesseract
1647 internal structures. Returns ``True`` on success.
1648
1649 Optional. The `Get*Text` methods below will call :meth:`Recognize` if needed.
1650
1651 After :meth:`Recognize`, the output is kept internally until the next :meth:`SetImage`.
1652
1653 Kwargs:
1654 timeout (int): time to wait in milliseconds before timing out.
1655
1656 Returns:
1657 bool: ``True`` if the operation is successful.
1658 """
1659 pass
1660
1661 def RecognizeForChopTest(self, *args, **kwargs): # real signature unknown
1662 """ Variant on :meth:`Recognize` used for testing chopper. """
1663 pass
1664
1665 def SetDebugVariable(self, *args, **kwargs): # real signature unknown
1666 """
1667 Set the value of an internal parameter. (debug)
1668
1669 Supply the name of the parameter and the value as a string, just as
1670 you would in a config file.
1671
1672 Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
1673 Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
1674
1675 SetVariable may be used before Init, but settings will revert to
1676 defaults on End().
1677
1678 Args:
1679 name (str): Variable name
1680 value (str): Variable value
1681
1682 Returns:
1683 bool: ``False`` if the name lookup failed.
1684 """
1685 pass
1686
1687 def SetImage(self, *args, **kwargs): # real signature unknown
1688 """
1689 Provide an image for Tesseract to recognize.
1690
1691 This method can be called multiple times after :meth:`Init`.
1692
1693 Args:
1694 image (:class:PIL.Image): Image object.
1695
1696 Raises:
1697 :exc:`RuntimeError`: If for any reason the api failed
1698 to load the given image.
1699 """
1700 pass
1701
1702 def SetImageBytes(self, *args, **kwargs): # real signature unknown
1703 """
1704 Provide an image for Tesseract to recognize.
1705
1706 Format is as :meth:`TesseractRect` above. Does not copy the image buffer, or take
1707 ownership. The source image may be destroyed after Recognize is called,
1708 either explicitly or implicitly via one of the `Get*Text` methods.
1709
1710 This method clears all recognition results, and sets the rectangle to the
1711 full image, so it may be followed immediately by a :meth:`GetUTF8Text`, and it
1712 will automatically perform recognition.
1713
1714 Args:
1715 imagedata (str): Raw image bytes.
1716 width (int): image width.
1717 height (int): image height.
1718 bytes_per_pixel (int): bytes per pixel.
1719 Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
1720 Palette color images will not work properly and must be converted to
1721 24 bit.
1722 Binary images of 1 bit per pixel may also be given but they must be
1723 byte packed with the MSB of the first byte being the first pixel, and a
1724 1 represents WHITE. For binary images set bytes_per_pixel=0.
1725 bytes_per_line (int): bytes per line.
1726 """
1727 pass
1728
1729 def SetImageFile(self, *args, **kwargs): # real signature unknown
1730 """
1731 Set image from file for Tesserac to recognize.
1732
1733 Args:
1734 filename (str): Image file relative or absolute path.
1735
1736 Raises:
1737 :exc:`RuntimeError`: If for any reason the api failed
1738 to load the given image.
1739 """
1740 pass
1741
1742 def SetOutputName(self, *args, **kwargs): # real signature unknown
1743 """ Set the name of the bonus output files. Needed only for debugging. """
1744 pass
1745
1746 def SetPageSegMode(self, *args, **kwargs): # real signature unknown
1747 """
1748 Set page segmentation mode.
1749
1750 Args:
1751 psm (int): page segmentation mode.
1752 See :class:`PSM` for all available psm options.
1753 """
1754 pass
1755
1756 def SetRectangle(self, *args, **kwargs): # real signature unknown
1757 """
1758 Restrict recognition to a sub-rectangle of the image. Call after :meth:`SetImage`.
1759
1760 Each SetRectangle clears the recogntion results so multiple rectangles
1761 can be recognized with the same image.
1762
1763 Args:
1764 left (int): poisition from left
1765 top (int): position from top
1766 width (int): width
1767 height (int): height
1768 """
1769 pass
1770
1771 def SetSourceResolution(self, *args, **kwargs): # real signature unknown
1772 """
1773 Set the resolution of the source image in pixels per inch so font size
1774 information can be calculated in results.
1775
1776 Call this after :meth:`SetImage`.
1777 """
1778 pass
1779
1780 def SetVariable(self, tessedit_char_blacklist, xyz): # real signature unknown; restored from __doc__
1781 """
1782 Set the value of an internal parameter.
1783
1784 Supply the name of the parameter and the value as a string, just as
1785 you would in a config file.
1786
1787 Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
1788 Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
1789
1790 SetVariable may be used before Init, but settings will revert to
1791 defaults on End().
1792
1793 Args:
1794 name (str): Variable name
1795 value (str): Variable value
1796
1797 Returns:
1798 bool: ``False`` if the name lookup failed.
1799 """
1800 pass
1801
1802 def set_min_orientation_margin(self, *args, **kwargs): # real signature unknown
1803 """
1804 Set minimum orientation margin.
1805
1806 Args:
1807 margin (float): orientation margin.
1808 """
1809 pass
1810
1811 def TesseractRect(self, *args, **kwargs): # real signature unknown
1812 """
1813 Recognize a rectangle from an image and return the result as a string.
1814
1815 May be called many times for a single Init.
1816 Currently has no error checking.
1817
1818 .. note::
1819
1820 `TesseractRect` is the simplified convenience interface. For advanced
1821 uses, use :meth:`SetImage`, (optionally) :meth:`SetRectangle`,
1822 :meth:`Recognize`, and one or more of the `Get*Text` methods below.
1823
1824 Args:
1825 imagedata (str): Raw image bytes.
1826 bytes_per_pixel (int): bytes per pixel.
1827 Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
1828 Palette color images will not work properly and must be converted to
1829 24 bit.
1830 Binary images of 1 bit per pixel may also be given but they must be
1831 byte packed with the MSB of the first byte being the first pixel, and a
1832 1 represents WHITE. For binary images set bytes_per_pixel=0.
1833 bytes_per_line (int): bytes per line.
1834 left (int): left rectangle ordonate.
1835 top (int): top rectangle ordonate.
1836 width (int): image width.
1837 height (int): image height.
1838
1839 Returns:
1840 unicode: The recognized text as UTF8.
1841 """
1842 pass
1843
1844 def Version(self, *args, **kwargs): # real signature unknown
1845 pass
1846
1847 def __enter__(self, *args, **kwargs): # real signature unknown
1848 pass
1849
1850 def __exit__(self, *args, **kwargs): # real signature unknown
1851 pass
1852
1853 def __init__(self, path=None, lang='eng'): # real signature unknown; restored from __doc__
1854 pass
1855
1856 @staticmethod # known case of __new__
1857 def __new__(*args, **kwargs): # real signature unknown
1858 """ Create and return a new object. See help(type) for accurate signature. """
1859 pass
1860
1861 def __reduce__(self, *args, **kwargs): # real signature unknown
1862 pass
1863
1864 def __setstate__(self, *args, **kwargs): # real signature unknown
1865 pass
1866
1867 __pyx_vtable__ = None # (!) real value is '<capsule object NULL at 0x0000022C8F958750>'
1868
1869
1870class RIL(_Enum):
1871 """
1872 An enum that defines available Page Iterator levels.
1873
1874 Attributes:
1875 BLOCK: of text/image/separator line.
1876 PARA: within a block.
1877 TEXTLINE: within a paragraph.
1878 WORD: within a textline.
1879 SYMBOL: character within a word.
1880 """
1881 def __init__(self, *args, **kwargs): # real signature unknown
1882 pass
1883
1884 @staticmethod # known case of __new__
1885 def __new__(*args, **kwargs): # real signature unknown
1886 """ Create and return a new object. See help(type) for accurate signature. """
1887 pass
1888
1889 def __reduce__(self, *args, **kwargs): # real signature unknown
1890 pass
1891
1892 def __setstate__(self, *args, **kwargs): # real signature unknown
1893 pass
1894
1895 BLOCK = 0
1896 PARA = 1
1897 SYMBOL = 4
1898 TEXTLINE = 2
1899 WORD = 3
1900
1901
1902class TextlineOrder(_Enum):
1903 """ Enum for text line order options. """
1904 def __init__(self, *args, **kwargs): # real signature unknown
1905 pass
1906
1907 @staticmethod # known case of __new__
1908 def __new__(*args, **kwargs): # real signature unknown
1909 """ Create and return a new object. See help(type) for accurate signature. """
1910 pass
1911
1912 def __reduce__(self, *args, **kwargs): # real signature unknown
1913 pass
1914
1915 def __setstate__(self, *args, **kwargs): # real signature unknown
1916 pass
1917
1918 LEFT_TO_RIGHT = 0
1919 RIGHT_TO_LEFT = 1
1920 TOP_TO_BOTTOM = 2
1921
1922
1923class WritingDirection(_Enum):
1924 """ Enum for writing direction options. """
1925 def __init__(self, *args, **kwargs): # real signature unknown
1926 pass
1927
1928 @staticmethod # known case of __new__
1929 def __new__(*args, **kwargs): # real signature unknown
1930 """ Create and return a new object. See help(type) for accurate signature. """
1931 pass
1932
1933 def __reduce__(self, *args, **kwargs): # real signature unknown
1934 pass
1935
1936 def __setstate__(self, *args, **kwargs): # real signature unknown
1937 pass
1938
1939 LEFT_TO_RIGHT = 0
1940 RIGHT_TO_LEFT = 1
1941 TOP_TO_BOTTOM = 2
1942
1943
1944# variables with complex values
1945
1946__loader__ = None # (!) real value is '<_frozen_importlib_external.ExtensionFileLoader object at 0x0000022C8F98CEC8>'
1947
1948__spec__ = None # (!) real value is "ModuleSpec(name='tesserocr._tesserocr', loader=<_frozen_importlib_external.ExtensionFileLoader object at 0x0000022C8F98CEC8>, origin='C:\\\\Users\\\\slooh\\\\Anaconda3\\\\lib\\\\site-packages\\\\tesserocr\\\\_tesserocr.cp37-win_amd64.pyd')"
1949
1950__test__ = {}