· 7 years ago · Jul 12, 2018, 01:16 PM
1from __future__ import division, print_function
2
3import argparse
4import json
5import os
6
7import requests
8import polling
9
10DEFAULT_API_URL='https://all.rir.rossum.ai'
11
12class ElisClient(object):
13 """
14 Simple client for Rossum Elis API that allows to submit a document for
15 extraction and then wait for the processed result.
16 Usage:
17 ```
18 client = ElisClient(secret_key, base_url)
19 document_id = client.send_document(document_path)
20 extracted_document = client.get_document(document_id)
21 ```
22 """
23 def __init__(self, secret_key, url=DEFAULT_API_URL):
24 self.secret_key = secret_key
25 self.url = url
26 # we do not use requests.auth.HTTPBasicAuth
27 self.headers = {'Authorization': 'secret_key ' + self.secret_key}
28
29 def send_document(self, document_path):
30 """
31 Submits a document to Elis API for extractions.
32 Returns: dict with 'id' representing job id
33 """
34 with open(document_path, 'rb') as f:
35 content_type = self._content_type(document_path)
36 response = requests.post(
37 self.url + '/document',
38 files={'file': (os.path.basename(document_path), f, content_type)},
39 headers=self.headers)
40 return json.loads(response.text)
41
42 @staticmethod
43 def _content_type(document_path):
44 return 'image/png' if document_path.lower().endswith('.png') else 'application/pdf'
45
46 def get_document_status(self, document_id):
47 """
48 Gets a single document status.
49 """
50 response = requests.get(self.url + '/document/' + document_id, headers=self.headers)
51 response_json = json.loads(response.text)
52 if response_json['status'] != 'ready':
53 print(response_json)
54 return response_json
55
56 def get_document(self, document_id, max_retries=30, sleep_secs=5):
57 """
58 Waits for document via polling.
59 """
60 def is_done(response_json):
61 return response_json['status'] != 'processing'
62
63 return polling.poll(
64 lambda: self.get_document_status(document_id),
65 check_success=is_done,
66 step=sleep_secs,
67 timeout=int(round(max_retries * sleep_secs)))
68
69def parse_args():
70 parser = argparse.ArgumentParser(description='Elis API client example.')
71 parser.add_argument('document_path', metavar='DOCUMENT_PATH',
72 help='Document path (PDF/PNG)')
73 parser.add_argument('-s', '--secret-key', help='Secret API key')
74 parser.add_argument('-u', '--base-url', default=DEFAULT_API_URL, help='Base API URL')
75
76 return parser.parse_args()
77
78def main():
79 args = parse_args()
80 client = ElisClient(args.secret_key, args.base_url)
81 print('Submitting document:', args.document_path)
82 send_result = client.send_document(args.document_path)
83 document_id = send_result['id']
84 print('Document id:', document_id)
85 extracted_document = client.get_document(document_id)
86 print('Extracted data:')
87 print(json.dumps(extracted_document, indent=4))
88
89if __name__ == '__main__':
90 main()
91
92python elis_client_example.py ../data/invoice.pdf -s xxxxxxxxxxxxxxxxxxxxxx_YOUR_ELIS_API_KEY_xxxxxxxxxxxxxxxxxxxxxxx