· 5 years ago · Oct 25, 2020, 04:16 PM
1from urllib.request import urlopen
2from urllib.parse import quote
3from urllib.error import HTTPError
4import re
5import sqlite3
6import argparse
7import hashlib
8import time
9
10
11class AddressSpliter:
12 def __init__(self):
13 self.split_street_pattern = re.compile(r'(.+?) (гавань|слобода|'
14 r'коса|роща|поселение|'
15 r'шоссе|парк|территория|'
16 r'лесничество|улица|кольцо|'
17 r'просек|берег|набережная|'
18 r'пансионат|вал|проспект|'
19 r'км|поле|канал|платформа|'
20 r'дорожка|двор|бульвар|м-н|'
21 r'село|пр-т|сад|проток|'
22 r'аэропорт|дорога|островок|'
23 r'километр|микрорайон|'
24 r'остров|аллея|проезд|'
25 r'квартал|товарищество|'
26 r'линия|станция|тупик|'
27 r'площадь|городок|пост|'
28 r'переулок|тракт)')
29
30 def get_street_and_streettype(self, street):
31 split_street = self.split_street_pattern.search(street)
32 type_and_name = street.split(' ', 1)
33 if split_street is None:
34 if len(type_and_name) != 2:
35 return '-', street
36 return type_and_name
37 if split_street[2] == 'пр-т':
38 return 'проспект', split_street[1]
39 if split_street[2] == 'км':
40 return 'километр', split_street[1]
41 if split_street[2] == 'м-н':
42 return 'микрорайон', split_street[1]
43 return split_street[2], split_street[1]
44
45
46class Dumper:
47 def __init__(self, name_db):
48 self.conn = sqlite3.connect('../{0}'.format(name_db))
49 self.cursor = self.conn.cursor()
50 self.spliter = AddressSpliter()
51 self.cursor.execute(
52 """CREATE TABLE IF NOT EXISTS
53 cities (city text, region text, name_table text)""")
54
55 def dump_city(self, region, name, code, way_iter):
56 self.cursor.execute(
57 """INSERT OR IGNORE INTO cities VALUES ("{0}", "{1}", "{2}")"""
58 .format(name, region, code))
59 self.cursor.execute(
60 """CREATE TABLE "{0}"
61 (hash text, streettype text, street text, housenumber text,
62 lat float, lon float, UNIQUE (hash))""" .format(code))
63 print(name)
64 for way in way_iter:
65 street_info = self.spliter.get_street_and_streettype(way[4])
66 address = \
67 '{0} {1} {2}'.format(street_info[0], street_info[1], way[3])
68 hash_ = hashlib.md5(address.encode('utf-8'))
69 self.cursor.execute(
70 """INSERT OR IGNORE INTO "{0}" VALUES
71 ("{1}", "{2}", "{3}", "{4}", "{5}", "{6}")"""
72 .format(
73 code, hash_.hexdigest(), street_info[0], street_info[1],
74 way[3], way[1], way[2]))
75 self.conn.commit()
76
77 def kill(self):
78 self.conn.commit()
79 self.conn.close()
80
81
82class OverpassApiGetter:
83 def __init__(self):
84 self.overpass_api = 'http://overpass-api.de/api/interpreter?data='
85
86 def get_xml_ways(self, code):
87 query = quote(r'[out:xml][timeout:360];area({0});'
88 r'way["building"!="no"]'
89 r'["addr:housenumber"]'
90 r'["addr:street"](area);'
91 r'out center;'.format(3600000000 + int(code)))
92 with urlopen(self.overpass_api + query) as page:
93 content = page.read().decode('utf-8', errors='ignore')
94 return content
95
96
97def get_iter_ways(xml):
98 way_pattern = re.compile(r'<way.+?'
99 r'<center lat="(.+?)" lon="(.+?)".+?'
100 r'<tag k="addr:housenumber" v="(.+?)".+?'
101 r'<tag k="addr:street" v="(.+?)"/>',
102 flags=re.DOTALL)
103 if xml.find('<way') == -1:
104 return None
105 return way_pattern.finditer(xml)
106
107
108def get_args_parser():
109 arg_parser = argparse.ArgumentParser(description='Write in DB')
110 arg_parser.add_argument('cities_path',
111 type=str,
112 help='path to cities list in txt-format')
113 arg_parser.add_argument('name_db', type=str, help='name of DB')
114 return arg_parser
115
116
117if __name__ == '__main__':
118 args_parser = get_args_parser()
119 args = args_parser.parse_args()
120
121 overpass_api_getter = OverpassApiGetter()
122 dumper = Dumper(args.name_db)
123
124 with open(args.cities_path, 'r', encoding='utf-8') as cities:
125 for city in cities:
126 city_info = city.split(',')
127 xml_ways = overpass_api_getter.get_xml_ways(city_info[1])
128 iter_ways = get_iter_ways(xml_ways)
129 if iter_ways is None:
130 continue
131 dumper.dump_city(
132 city_info[2][:-1],
133 city_info[0],
134 city_info[1],
135 iter_ways)
136
137 dumper.kill()