· 7 years ago · Feb 15, 2018, 04:06 PM
1import collections
2import openpyxl
3from fuzzywuzzy import fuzz
4from fuzzywuzzy import process
5
6
7#Open book
8book = openpyxl.load_workbook('2.xlsx')
9ws = book.active
10
11#Creating the Excel Columns-Rows range OrderDict
12excel_range = collections.OrderedDict()
13
14#The list of pharmacies name
15#The list of pharmacies name
16apteki_list = ['Izumrud', 'Ostojenka', 'Michur', 'Redmayak_4k1', 'Samark', 'Poletaeva',
17 'Prechistenka', 'MalayaDmitrovka', 'Sherbkina', 'Kommunarka',
18 'Chelyabinskaya', 'Tverskaya', 'Sh_Entuziastov', 'Lusinovskaya',
19 'Blvr_Yana_Rajninsa', 'Barklaya', 'Warshavskoe_shosse', 'Kornejchuka',
20 'Rudnevka', 'Julebinskiy_blvr', 'Nijnaya_Krasnoselksaya', 'Kosmonavtov',
21 'Sholakskogo', 'Kashirskoe_shosse', 'Redmayak_9', 'Kastanaevskaya', 'Birulovskaya',
22 'Kotelniki_Kuzminksaya', 'Ramenskoe_Vokzalnaya_sqr', 'Lubertsi_3-e_Pochtovoe', 'Dolgoprudnij_Moskovskaya']
23
24
25apteki_rus_address = ['ИзумруднаÑ, д. 18', 'ул. ОÑтоженка', 'МичуринÑкий пр-Ñ‚', 'ул. КраÑного МаÑка, д. 4к1',
26 'СамарканÑкий б-Ñ€', 'ул. Ф. Полетаева,',
27 'ул. ПречиÑтенк', 'ул. ÐœÐ°Ð»Ð°Ñ Ð”Ð¼Ð¸Ñ‚Ñ€Ð¾Ð²ÐºÐ°', 'Щербинка, ул. БарышевÑÐºÐ°Ñ Ñ€Ð¾Ñ‰Ð°,',
28 'поÑ. Коммунарка, ул. Липовый парк', 'ул. ЧелÑбинÑкаÑ', 'ул. ТверÑкаÑ',
29 'ш. ÐнтузиаÑтов', 'ул. ЛюÑиновÑкаÑ', 'б-Ñ€ Яна РайниÑа', 'ул.БарклаÑ',
30 'ВаршавÑкое ш.', ' ул. Корнейчука', 'ул. Рудневка', 'ЖулебинÑкий б-Ñ€',
31 'ÐижнÑÑ ÐšÑ€Ð°ÑноÑельÑкаÑ', 'ул. КоÑмонавтов', 'ул. ШокальÑкого', 'КаширÑкое ш., д. 53, корп. 4',
32 'ул. КраÑного МаÑка, д. 9', 'ул. КаÑтанаевÑкаÑ, д. 42, корп. 2', 'ул. БирюлевÑкаÑ, д. 13, корп. 4',
33 'г. Котельники, ул. КузьминÑкаÑ, д. 17', 'г. РаменÑкое, Ð’Ð¾ÐºÐ·Ð°Ð»ÑŒÐ½Ð°Ñ Ð¿Ð»., д. 4б',
34 'г. Люберцы, ул. 3-е Почтовое отделение д. 49, корп. 2', 'МО, г. Долгопрудный, ул.МоÑковÑкаÑ, д. 56, корп. 3']
35
36#Apteki dict
37apteki = collections.OrderedDict()
38
39#The nested apteki dict # apteki['Izumrud'] = {}
40for apteka in apteki_list:
41 apteki[apteka] = {}
42
43
44
45# Creating list for storing all the Pharmacy coordinates
46for k in apteki.keys():
47 apteki[k]['name_occurrence_coor'] = []
48
49# Adding rus_addresses
50for counter, apteka in enumerate(apteki.keys()):
51 apteki[apteka]['rus_address'] = apteki_rus_address[counter]
52
53
54
55# Iterating over the whole book:
56# Search for the active Range of cells
57# Search for Pharmacy names coordinates
58for row in ws.iter_rows():
59 for cell in row:
60 # Saving the non-full Columns letters and Rows
61 # range in the excel_range dict
62 if cell.coordinate[0] not in excel_range:
63 # [0] - a letter(Column), i.e. A [1:] - the int
64 excel_range[cell.coordinate[0]] = [int(cell.coordinate[1:])]
65 else:
66 excel_range[cell.coordinate[0]].append(int(cell.coordinate[1:]))
67 # Searching for pharmacy names
68
69 if isinstance(cell.value, str):
70 for key, value in apteki.items():
71 if key == 'Redmayak_4k1' or key == 'Redmayak_9':
72 if fuzz.token_sort_ratio('КраÑного МаÑка,', cell.value) > 50:
73 templist = []
74 for character in list(cell.value):
75 try:
76 templist.append(int(character))
77 except:
78 pass
79 # print('**' * 200)
80 # print('Templist')
81 # print(templist)
82 # print(templist[-1])
83 # print('**' * 200)
84 if templist[-1] == 9:
85 apteki['Redmayak_9']['name_occurrence_coor'].append(cell.coordinate)
86 else:
87 apteki['Redmayak_4k1']['name_occurrence_coor'].append(cell.coordinate)
88 else:
89 if fuzz.token_sort_ratio(value['rus_address'], cell.value) > 60:
90 # Appending all instances of Pharm name coordinates
91 apteki[key]['name_occurrence_coor'].append(cell.coordinate)
92
93
94
95### Creating a tuple of search ranges for Revenue and Average Cheque
96
97for apteka in apteki_list:
98 try:
99 first_name_cooridnate = int(apteki[apteka]['name_occurrence_coor'][0][1:])
100 the_index = apteki_list.index(apteka) + 1
101 last_name_cooridnate = int(apteki[apteki_list[the_index]]['name_occurrence_coor'][0][1:]) - 1
102 print("#" * 40)
103 print(apteka, the_index, first_name_cooridnate, last_name_cooridnate)
104 apteki[apteka]['search_range'] = (first_name_cooridnate, last_name_cooridnate)
105 except IndexError:
106 first = apteki[apteka]['name_occurrence_coor']
107 print('*' * 200)
108 print(apteka + ' ' + str(first))
109 print('*' * 200)
110 first_name_cooridnate = 1
111 last_name_cooridnate = 129
112 apteki[apteka]['search_range'] = (first_name_cooridnate, last_name_cooridnate)
113
114
115
116
117
118# Iterating over found range of active cells
119# Searching for Revenue and Average Cheques
120
121# Iterating
122columns_list = list(excel_range.keys())
123min_value_of_range = min(excel_range[columns_list[0]])
124max_value_of_range = max(excel_range[columns_list[-1]])
125active_cells = ws[min_value_of_range:max_value_of_range]
126for row in active_cells:
127 for cell in row:
128 ##### Searching for Revenue str
129 if isinstance(cell.value, str):
130 if fuzz.token_sort_ratio('выручка', cell.value) > 50:
131 # Pharmacy Izumrudnaya
132 for apteka in apteki.keys():
133 if int(cell.coordinate[1:]) in range(apteki[apteka]['search_range'][0],
134 apteki[apteka]['search_range'][1]):
135 apteki[apteka]['revenue_coordinate'] = cell.coordinate
136
137 #### Searching for Revenue int
138 if isinstance(cell.value, int):
139 # Pharmacy Izumrudnaya
140 for apteka in apteki.keys():
141 if 'revenue_coordinate' in apteki[apteka]:
142 if cell.coordinate[1:] == apteki[apteka]['revenue_coordinate'][1:]:
143 apteki[apteka]['revenue'] = cell.value
144
145 #### Searching for Revenue int float
146 if isinstance(cell.value, float):
147 # Pharmacy Izumrudnaya
148 for apteka in apteki.keys():
149 if 'revenue_coordinate' in apteki[apteka]:
150 if cell.coordinate[1:] == apteki[apteka]['revenue_coordinate'][1:]:
151 apteki[apteka]['revenue'] = cell.value
152
153
154
155
156
157
158for k, v in apteki.items():
159 print(k + ' -- ' + str(v))