· 6 years ago · Aug 25, 2019, 08:42 AM
1import random
2import string
3import csv
4
5
6def randomword(length):
7 letters = string.ascii_lowercase
8 return ''.join(random.choice(letters) for i in range(length))
9
10
11def random_with_N_digits(n):
12 range_start = 10 ** (n - 1)
13 range_end = (10 ** n) - 1
14 return random.randint(range_start, range_end)
15
16
17contract_doc_no_repo = ["2014/S 049-081514", "2014/S 049-081437", "2014/S 049-080908", "2014/S 049-081554",
18 "2014/S 059-098592", "2014/S 059-099073", "2014/S 059-098571", "2014/S 037-060968",
19 "2014/S 037-060480",
20 "2014/S 037-060976", "2014/S 037-060521", "2014/S 037-060901", "2014/S 065-110250",
21 "2014/S 065-111356", "2014/S 065-111394", "2014/S 065-111798", "2014/S 065-110728",
22 "2014/S 047-078445", "2014/S 047-079239"]
23contract_activity_contractor_repo = ["RAILWAY_SERVICES", "AIRPORT_RELATED_ACTIVITIES",
24 "PRODUCTION_TRANSPORT_DISTRIBUTION_GAS_HEAT",
25 "URBAN_RAILWAY_TRAMWAY_TROLLEYBUS_BUS_SERVICES", "ELECTRICITY", "WATER"]
26contract_authority_country_repo = ["AT", "BE", "BG", "CY", "CZ", "DK", "EE", "FI", "FR", "DE", "GR", "HU", "IE", "IT",
27 "LV", "LT", "LU", "MT", "NL", "PL", "PT", "RO", "SK", "SI", "ES", "SE", "GB"]
28nation = {"AT": "Vienna", "BE": "Brusselles", "BG": "SOFIA", "CY": "CYPRUS", "CZ": "Prague", "DK": "Copenhagen",
29 "EE": "Tallinn", "FI": "Helsinki", "FR": "Paris", "DE": "Berlin", "GR": "Athens",
30 "HU": "Budapest", "IE": "Dublin", "IT": "Rome", "LV": "Riga", "LT": "Vilnius", "LU": "Luxembourg City",
31 "MT": "Valletta", "NL": "Amsterdam", "PL": "Warsaw", "PT": "Lisbon", "RO": "Bucharest", "SK": "Bratislava",
32 "SI": "Ljubljana", "ES": "Madrid", "SE": "Stockholm", "GB": "London"}
33contract_type_contract_repo = ["WORKS", "SUPPLIES", "SERVICES"]
34electronic_auction_repo = ["YES", "NO"]
35contract_activity_type_other_repo = ["Gestione rifiuti", "Ricerca e sviluppo", "Costruzione edifici",
36 "Costruzione strade", "Assistenza disabili", "Sicurezza aerea",
37 "Servizi di mobilità", "Agricoltura"]
38contract_contract_type_supply_repo = ["PURCHASE", "RENTAL", "LEASE", "COMBINATION_THESE"]
39row = [''] * 96
40row[0] = 0
41
42auth_list_name = [randomword(40) for _ in range(200000)]
43auth_list_cpv_code = [random.randint(1000000, 8000000) for _ in range(200000)]
44auth_list_url = ["http://www." + auth_list_name[k] + ".com" for k in range(200000)] # funziona, ho testato
45auth_list_address = [randomword(40) for k in range(200000)]
46auth_postal_code = [random.randint(1000, 90000) for _ in range(200000)]
47auth_phone = [random_with_N_digits(10) for _ in range(200000)]
48auth_email = [randomword(10) + "@gmail.com" for _ in range(200000)]
49auth_slug = [randomword(40) for _ in range(200000)]
50auth_country = [random.choice(list(nation.keys())) for _ in range(200000)]
51auth_location_nuts = [auth_country[k] + str(random.randint(1, 100)) for k in range(200000)]
52operator_town = [nation.get(random.choice(list(nation.keys()))) for _ in range(100000)]
53operator_slug = [randomword(40) for _ in range(100000)]
54operator_address = [randomword(40) for _ in range(100000)]
55operator_name = [randomword(40) for _ in range(100000)]
56operator_postal_code = [random.randint(1000, 90000) for _ in range(100000)]
57operator_phone = [random_with_N_digits(10) for _ in range(100000)]
58operator_email = [randomword(13) + "@gmail.com" for _ in range(100000)]
59operator_url = ["http://www." + randomword(9) + ".com" for _ in range(100000)]
60buyer = [randomword(40) for _ in range(20000)]
61trueFalse = ["True", "False"]
62appeal_body_country = [random.choice(list(nation.keys())) for _ in range(70000)]
63appeal_body_phone = [random_with_N_digits(10) for _ in range(70000)]
64appeal_postal_code = [random.randint(1000, 90000) for _ in range(70000)]
65appeal_official_name = [randomword(40) for _ in range(70000)]
66appeal_address = [randomword(40) for _ in range(70000)]
67contract_on_behalf_address = [randomword(40) for k in range(5000)]
68contract_on_behalf_country = [random.choice(list(nation.keys())) for _ in range(5000)]
69contract_on_behalf_postal_code = [random.randint(1000, 90000) for _ in range(5000)]
70contract_on_behalf_slug = [randomword(40) for _ in range(5000)]
71contract_on_behalf_official_name = [randomword(40) for _ in range(5000)]
72header=["contract_id","contract_doc_no","contract_activity_contractor","contract_lot_number","contract_authority_country","contract_contract_award_month","contract_type_contract","contract_contract_award_title","contract_authority_postal_code","contract_location_nuts","contract_offers_received_meaning","contract_operator_slug","contract_operator_town","contract_authority_address","contract_appeal_procedure","contract_concessionaire_contact","contract_notice_dispatch_month","contract_index","contract_electronic_auction","contract_operator_address","contract_notice_dispatch_year","contract_concessionaire_nationalid","contract_contract_award_day","contract_operator_official_name","contract_activity_type_other","contract_operator_postal_code","contract_contract_number","contract_authority_phone","contract_concessionaire_email","contract_contract_type_supply","contract_operator_country","contract_authority_url_info","contract_relates_to_eu_project","contract_notice_dispatch_day","contract_authority_email","contract_authority_town","contract_additional_information","contract_authority_attention","contract_authority_slug","contract_contract_award_year","contract_gpa_covered","contract_offers_received_num","contract_contract_title","contract_location","contract_file_reference","contract_contract_description","contract_authority_official_name","contract_cpv_code","contract_authority_url","contract_activity_type","contract_operator_phone","contract_initial_value_currency","contract_total_value_cost_eur","contract_authority_url_buyer","contract_contract_value_vat_included","contract_initial_value_cost","contract_contract_value_currency","contract_contract_value_cost","contract_appeal_body_country","contract_appeal_body_slug","contract_appeal_body_phone","contract_appeal_body_postal_code","contract_appeal_body_fax","contract_appeal_body_official_name","contract_appeal_body_email","contract_initial_value_vat_included","contract_contract_value_cost_eur","contract_total_value_cost","contract_appeal_body_town","contract_total_value_vat_included","contract_initial_value_cost_eur","contract_appeal_body_address","contract_total_value_currency","contract_operator_email","contract_authority_fax","contract_operator_url","contract_total_value_vat_rate","contract_contract_value_vat_rate","contract_appeal_body_url","contract_initial_value_vat_rate","contract_authority_url_participate","contract_operator_fax","contract_contract_value_low_eur","contract_contract_value_high","contract_contract_value_high_eur","contract_contract_value_low","contract_on_behalf_address","contract_on_behalf_country","contract_on_behalf_town","contract_on_behalf_postal_code","contract_on_behalf_slug","contract_on_behalf_official_name","contract_total_value_low","contract_total_value_high","contract_total_value_low_eur","contract_total_value_high_eur"]
73
74
75def file_writer():
76 header = ["contract_id", "contract_doc_no", "contract_activity_contractor", "contract_lot_number",
77 "contract_authority_country", "contract_contract_award_month", "contract_type_contract",
78 "contract_contract_award_title", "contract_authority_postal_code", "contract_location_nuts",
79 "contract_offers_received_meaning", "contract_operator_slug", "contract_operator_town",
80 "contract_authority_address", "contract_appeal_procedure", "contract_concessionaire_contact",
81 "contract_notice_dispatch_month", "contract_index", "contract_electronic_auction",
82 "contract_operator_address", "contract_notice_dispatch_year", "contract_concessionaire_nationalid",
83 "contract_contract_award_day", "contract_operator_official_name", "contract_activity_type_other",
84 "contract_operator_postal_code", "contract_contract_number", "contract_authority_phone",
85 "contract_concessionaire_email", "contract_contract_type_supply", "contract_operator_country",
86 "contract_authority_url_info", "contract_relates_to_eu_project", "contract_notice_dispatch_day",
87 "contract_authority_email", "contract_authority_town", "contract_additional_information",
88 "contract_authority_attention", "contract_authority_slug", "contract_contract_award_year",
89 "contract_gpa_covered", "contract_offers_received_num", "contract_contract_title", "contract_location",
90 "contract_file_reference", "contract_contract_description", "contract_authority_official_name",
91 "contract_cpv_code", "contract_authority_url", "contract_activity_type", "contract_operator_phone",
92 "contract_initial_value_currency", "contract_total_value_cost_eur", "contract_authority_url_buyer",
93 "contract_contract_value_vat_included", "contract_initial_value_cost", "contract_contract_value_currency",
94 "contract_contract_value_cost", "contract_appeal_body_country", "contract_appeal_body_slug",
95 "contract_appeal_body_phone", "contract_appeal_body_postal_code", "contract_appeal_body_fax",
96 "contract_appeal_body_official_name", "contract_appeal_body_email", "contract_initial_value_vat_included",
97 "contract_contract_value_cost_eur", "contract_total_value_cost", "contract_appeal_body_town",
98 "contract_total_value_vat_included", "contract_initial_value_cost_eur", "contract_appeal_body_address",
99 "contract_total_value_currency", "contract_operator_email", "contract_authority_fax",
100 "contract_operator_url", "contract_total_value_vat_rate", "contract_contract_value_vat_rate",
101 "contract_appeal_body_url", "contract_initial_value_vat_rate", "contract_authority_url_participate",
102 "contract_operator_fax", "contract_contract_value_low_eur", "contract_contract_value_high",
103 "contract_contract_value_high_eur", "contract_contract_value_low", "contract_on_behalf_address",
104 "contract_on_behalf_country", "contract_on_behalf_town", "contract_on_behalf_postal_code",
105 "contract_on_behalf_slug", "contract_on_behalf_official_name", "contract_total_value_low",
106 "contract_total_value_high", "contract_total_value_low_eur", "contract_total_value_high_eur"]
107 path = "C:\\Users\\ggiac\\.Neo4jDesktop\\neo4jDatabases\\database-b42409c4-dd13-455e-a707-abb5ab682775\\installation-3.5.6\\import\\ted-5000001.csv"
108 csvino = open(path, "r",newline="")
109 csvreader = csv.reader(csvino)
110 i = 100
111 j,k,m,l = 0,0,0,0
112
113 csvfile = open ("C:\\Users\\ggiac\\.Neo4jDesktop\\neo4jDatabases\\database-b42409c4-dd13-455e-a707-abb5ab682775\\installation-3.5.6\\import\\ted-"+ str(i) + ".csv","w+",encoding="utf-8",newline="")
114 csvfile2 = open ("C:\\Users\\ggiac\\.Neo4jDesktop\\neo4jDatabases\\database-b42409c4-dd13-455e-a707-abb5ab682775\\installation-3.5.6\\import\\ted-"+ str(i*10) + ".csv","w+",encoding="utf-8",newline="")
115 csvfile3 = open ("C:\\Users\\ggiac\\.Neo4jDesktop\\neo4jDatabases\\database-b42409c4-dd13-455e-a707-abb5ab682775\\installation-3.5.6\\import\\ted-"+ str(i*100) + ".csv","w+",encoding="utf-8",newline="")
116 csvfile4 = open ("C:\\Users\\ggiac\\.Neo4jDesktop\\neo4jDatabases\\database-b42409c4-dd13-455e-a707-abb5ab682775\\installation-3.5.6\\import\\ted-"+ str(i*1000) + ".csv","w+",encoding="utf-8",newline="")
117
118 writer = csv.writer(csvfile)
119 writer2 = csv.writer(csvfile2)
120 writer3 = csv.writer(csvfile3)
121 writer4 = csv.writer(csvfile4)
122
123 for row in csvreader:
124 if(j == 100):
125 break
126 else:
127 writer.writerow(row)
128 j += 1
129 csvfile.close()
130 writer2.writerow(header)
131 for row in csvreader:
132 if(k == 1000):
133 break
134 else:
135
136 writer2.writerow(row)
137 k += 1
138 csvfile2.close()
139 writer3.writerow(header)
140 for row in csvreader:
141 if(m == 10000):
142 break
143 else:
144
145 writer3.writerow(row)
146 m += 1
147 csvfile3.close()
148 writer4.writerow(header)
149 for row in csvreader:
150 if(l == 100000):
151 break
152 else:
153 writer4.writerow(row)
154 l += 1
155 csvfile4.close()
156
157def crea_ted_500k(nomefile,id_partenza):
158 row = [''] * 96
159 row[0] = id_partenza
160 with open(nomefile, "w+", newline='') as csvfile:
161 writer = csv.writer(csvfile)
162 writer.writerow(header)
163 for i in range(0, 500000):
164 random_index_auth = random.randint(0, 200000 - 1)
165 random_index_operator = random.randint(0, 100000 - 1)
166 random_index_appeal = random.randint(0, 70000 - 1)
167 random_index_on_behalf = random.randint(0, 5000 - 1)
168 row[0] += 1
169 ##
170 row[1] = contract_doc_no_repo[random.randint(0, len(contract_doc_no_repo) - 1)]
171 ##
172 row[2] = contract_activity_contractor_repo[random.randint(0, len(contract_activity_contractor_repo) - 1)]
173 ##
174 row[3] = random.randint(1, 50)
175 ##
176 row[4] = auth_country[random_index_auth]
177 ##
178 row[5] = random.randint(1, 12)
179 row[6] = contract_type_contract_repo[random.randint(0, len(contract_type_contract_repo) - 1)]
180 row[7] = "Generated award title " + randomword(20)
181 row[8] = auth_postal_code[random_index_auth]
182 row[9] = auth_location_nuts[random_index_auth]
183 row[10] = random.randint(1, 12)
184 row[11] = "Generated operator slug " + operator_slug[random_index_operator]
185 # Contract operator town
186 row[12] = operator_town[random_index_operator]
187 row[13] = auth_list_address[random_index_auth]
188 row[14] = "Generated appeal_procedure " + randomword(40)
189 row[15] = ""
190 row[16] = random.randint(1, 12)
191 row[17] = random.randint(0, 24)
192 # electronic auction
193 row[18] = electronic_auction_repo[random.randint(0, 1)]
194 row[19] = operator_address[random_index_operator]
195 row[20] = random.randint(2012, 2014)
196 row[21] = ""
197 row[22] = random.randint(1, 30)
198 row[23] = operator_name[random_index_operator]
199 row[24] = contract_activity_type_other_repo[random.randint(0, len(contract_activity_type_other_repo) - 1)]
200 row[25] = operator_postal_code[random_index_operator]
201 row[26] = random.randint(1, 100)
202 ##
203 row[27] = auth_phone[random_index_auth]
204 row[28] = ""
205 row[29] = contract_contract_type_supply_repo[random.randint(0, len(contract_contract_type_supply_repo) - 1)]
206 row[30] = list(nation.keys())[list(nation.values()).index(row[12])]
207 row[31] = auth_list_url[random_index_auth]
208 row[32] = "Generated contract_relates_to_eu_proget " + str(random.randint(1, 1000000))
209 # Cotnract notice dispatch day
210 row[33] = random.randint(1, 30)
211 row[34] = auth_email[random_index_auth]
212 # Contract_auth_town
213 row[35] = nation.get(row[4])
214 row[36] = "Generated contract additional info: " + randomword(40)
215 row[37] = "Generated contract auth attention " + randomword(15)
216 row[38] = auth_slug[random_index_auth]
217 row[39] = random.randint(2009, 2014)
218 row[40] = electronic_auction_repo[random.randint(0, 1)]
219 row[41] = random.randint(1, 30)
220 row[42] = "Generated contract title " + randomword(10)
221 row[43] = "Generated contract location " + nation.get(random.choice(list(nation.keys())))
222 row[44] = "Generated contract file reference: " + randomword(25)
223 row[45] = "Generated contract description: " + randomword(100)
224 row[46] = auth_list_name[random.randint(0, len(auth_list_name) - 1)]
225 row[47] = "Generated contract cpv code " + str(
226 auth_list_cpv_code[random.randint(0, len(auth_list_cpv_code) - 1)])
227 # Cotnract auth url
228 row[48] = auth_list_url[random.randint(0, len(auth_list_url) - 1)]
229 # Contract activty type
230 row[49] = ""
231 row[50] = operator_phone[random_index_operator]
232 row[51] = "EUR"
233 row[52] = random.randint(100000, 800000000)
234 tmp = random.randint(0, 25)
235 if (tmp == 1):
236 row[53] = random.randint(0, 20000)
237 else:
238 row[53] = ""
239 row[54] = trueFalse[random.randint(0, 1)]
240 row[55] = random.randint(100000, 20000000)
241 row[56] = "EUR"
242 row[57] = random.randint(100000, 20000000)
243 row[58] = appeal_body_country[random_index_appeal]
244 row[59] = randomword(50)
245 row[60] = appeal_body_phone[random_index_appeal]
246 row[61] = appeal_postal_code[random_index_appeal]
247 row[62] = row[60]
248 row[63] = appeal_official_name[random_index_appeal]
249 row[64] = row[63] + "@gmail.com"
250 row[65] = trueFalse[random.randint(0, 1)]
251 row[66] = random.randint(100000, 20000000)
252 row[67] = random.randint(100000, 20000000)
253 row[68] = nation.get(row[58])
254 row[69] = trueFalse[random.randint(0, 1)]
255 row[70] = random.randint(100000, 20000000)
256 row[71] = appeal_address[random_index_appeal]
257 row[72] = "EUR"
258 row[73] = operator_email[random_index_operator]
259 row[74] = auth_phone[random_index_auth]
260 row[75] = operator_url[random_index_operator]
261 row[77] = random.randint(15, 19)
262 row[77] = row[75]
263 row[78] = ("http://www." + row[63] + "." + str(row[67])).lower()
264 row[79] = 19
265 row[80] = ""
266 row[81] = operator_phone[random_index_operator]
267 tmp = random.randint(0, 100)
268 if (tmp == 1):
269 row[82] = random.randint(2000, 2000000)
270 row[83] = random.randint(row[81], row[81] * 2)
271 else:
272 row[82] = ""
273 row[83] = ""
274 row[84] = row[83]
275 row[85] = row[82]
276 row[86] = contract_on_behalf_address[random_index_on_behalf]
277 row[87] = contract_on_behalf_country[random_index_on_behalf]
278 row[88] = nation.get(row[86])
279 row[89] = contract_on_behalf_postal_code[random_index_on_behalf]
280 row[90] = contract_on_behalf_slug[random_index_on_behalf]
281 row[91] = contract_on_behalf_official_name[random_index_on_behalf]
282 row[92] = random.randint(60000, 10000000)
283 row[93] = random.randint(row[92], int(row[92] * 1.5))
284 row[94] = random.randint(60000, 10000000)
285 row[95] = random.randint(row[92], int(row[92] * 1.5))
286 writer.writerow(row)
287 print(i)
288crea_ted_500k(nomefile="ted-5000001.csv",0)
289crea_ted_500k(nomefile="ted-5000002.csv",500000)
290file_writer()