· 7 years ago · Dec 05, 2018, 09:18 PM
1import csv
2import string
3import random
4import os
5import time
6########################
7# Name of this scheme: #
8########################
9ThisScheme = 'SNKFPenF'
10########################
11
12# define the name(s) of the directories to be created:
13path0 = 'output'
14path1 = 'output/dirty'
15path2 = 'output/clean'
16path3 = 'originals'
17
18
19try:
20 os.mkdir(path0)
21except OSError:
22 print ("Creation of the directory %s failed, does it already exist?" % path0)
23else:
24 print ("Successfully created the directory %s " % path0)
25
26try:
27 os.mkdir(path1)
28except OSError:
29 print ("Creation of the directory %s failed, does it already exist?" % path1)
30else:
31 print ("Successfully created the directory %s " % path1)
32
33try:
34 os.mkdir(path2)
35except OSError:
36 print ("Creation of the directory %s failed, does it already exist?" % path2)
37else:
38 print ("Successfully created the directory %s " % path2)
39
40try:
41 os.mkdir(path3)
42except OSError:
43 print ("Creation of the directory %s failed, does it already exist?" % path3)
44else:
45 print ("Successfully created the directory %s " % path3)
46
47
48# WARNING!
49# When editing this file, be sure to use a find and replace on the headers coming from
50# the _INPUT FILE_ ONLY!
51
52
53# Edit these if needed, input file(s) DO NOT have a -> / <- in front as they are in this directory!
54
55# List of file(s) to import:
56file0 = 'unit_holders.csv'
57file8 = 'cashflow.csv'
58
59# List of file(s) to export:
60
61# Exported to /originals:
62file1 = '/persons.csv'
63file5 = '/personal_identification.csv'
64file2 = '/accounts.csv'
65
66# Exported to /dirty
67file6 = '/persons.csv'
68file12 = '/cashflow.csv'
69file14 = '/personal_identification.csv'
70file16 = '/accounts.csv'
71
72# Exported to /clean
73file3 = '/sponsors.csv'
74file7 = '/persons.csv'
75file10 = '/accounts.csv'
76file9 = '/cashflow.csv'
77file15 = '/personal_identification.csv'
78
79
80# Do not edit these (adds full path to output file):
81# Note that file4, file11 and file13 were removed as contact info was added to person, and ssnit was added to account
82file1 = path3 + file1
83file2 = path3 + file2
84file3 = path2 + file3
85file5 = path3 + file5
86file6 = path1 + file6
87file7 = path2 + file7
88file9 = path2 + file9
89file10 = path2 + file10
90file12 = path1 + file12
91file14 = path1 + file14
92file15 = path2 + file15
93file16 = path1 + file16
94
95# Edit the name of the logfile below (if needed):
96logFile = '/log.txt'
97
98# Do not edit these:
99logFile = path0 + logFile
100log = open(logFile,"w+")
101
102# An array that stores duplicate entries:
103dupes = []
104dupeCodes = []
105dupeUniques = []
106
107# An array which stores accounts linked to the same personcode..
108accountLinks = []
109
110# An array which gets the person_code of a 'unit_holder' that should be linked to another personCode
111newAccounts = []
112accountUniques = []
113
114# Array that stores completed entries
115completedLines = []
116
117# Dictionary Array which stores completed persons (helps to identify duplicates)
118persons = []
119personUniques = []
120
121# Arrays to store sponsor info to avoid duplicates:
122sponsorNames = []
123sponsorCodes = []
124sponsorWrongCodes = []
125sponsorLinks = []
126sponsorEntries = []
127currentSponsorCode = 'missing'
128currentSponsorName = 'missing'
129
130
131# This is used for accounts to reference sponsorLinks:
132sponsorDupes = []
133
134def AssignSponsorCode(code):
135 global currentSponsorCode
136 #if currentSponsorCode == '':
137 # currentSponsorCode = 'missing'
138 #else:
139 currentSponsorCode = code
140 print(currentSponsorCode, '/', code, 'updated!')
141
142def AssignSponsorName(name):
143 global currentSponsorName
144 currentSponsorName = name
145 print(currentSponsorName, 'updated!')
146
147def ClearSponsorCode():
148 global currentSponsorCode
149 currentSponsorCode = 'twasreset'
150 print('Sponsor Code Reset!')
151
152def ClearSponsorName():
153 global currentSponsorName
154 currentSponsorName = 'twasresettoo'
155 print('Sponsor Name Reset!')
156
157
158def CheckDuplicatePerson(unique):
159 if unique in dupeUniques:
160 return True
161
162 else:
163 return False
164
165def CheckDuplicateAccount(unique):
166 if unique in accountUniques:
167 return True
168
169 else:
170 return False
171
172def CheckNewAccount(code):
173 if code in newAccounts:
174 for link in accountLinks:
175 old = link[0]
176 new = link[1]
177 if new == code:
178 return old
179 break
180 else:
181 continue
182
183def SponsorCodeDupeCheck(code):
184 if code in sponsorCodes:
185 return True
186 else:
187 return False
188
189def SponsorNameDupeCheck(name):
190 if name in sponsorNames:
191 return True
192 else:
193 return False
194
195
196def GetSponsorCode(code):
197 for link in sponsorLinks:
198 old = link[0]
199 new = link[1]
200 if new == code:
201 return old
202 break
203 else:
204 continue
205
206def GetSponsorName(code):
207 for sponsorEntry in sponsorEntries:
208 entryCode = sponsorEntry[0]
209 entryName = sponsorEntry[1]
210 if entryCode == code:
211 return entryName
212 break
213 else:
214 continue
215
216def GetDuplicateByFullPerson(fullPerson):
217 if fullPerson in personUniques:
218 return True
219 else:
220 return False
221
222def GetDuplicateByAccount(fullAccount):
223 if fullAccount in accountUniques:
224 return True
225 else:
226 return False
227
228def GetFirstPersonCode(fullPerson):
229 for person in persons:
230 lastCode = person[0]
231 lastUnique = person[1]
232 if lastUnique == fullPerson:
233 return lastCode
234 break
235 else:
236 continue
237
238# Genenerator:
239def id_generator(size=10):
240 return ''.join(random.sample(string.ascii_uppercase, size))
241
242
243def split_csv(input_path, output_path1, output_path2, output_path3, output_path5, output_path6):
244 start = time.time()
245
246 # open the input file in read-mode:
247 csv_input_file = open(input_path, 'r')
248
249 # load the headers in the file with DictReader:
250 csv_input = csv.DictReader(csv_input_file, delimiter=',')
251
252 # 0pen the output files in write-mode
253 csv_output_file1 = open(output_path1, 'w', newline='')
254 csv_output_file2 = open(output_path2, 'w', newline='')
255 csv_output_file3 = open(output_path3, 'w', newline='')
256 csv_output_file5 = open(output_path5, 'w', newline='')
257 csv_output_file6 = open(output_path6, 'w', newline='')
258 csv_output_file10 = open(file10, 'w', newline='')
259
260 # Create CSV writers using the opened files
261 csv_1 = csv.writer(csv_output_file1) # people.csv
262 csv_2 = csv.writer(csv_output_file2) # accounts.csv
263 csv_3 = csv.writer(csv_output_file3) # sponsors.csv
264 csv_5 = csv.writer(csv_output_file5) # ids.csv
265 csv_6 = csv.writer(csv_output_file6) # duplicates.csv
266 csv_10 = csv.writer(csv_output_file10) # clean accounts.csv
267
268 ##################################################################################
269 # DO NOT edit the headers being inserted to the new files, they are PAS specific!#
270 ##################################################################################
271
272 # Headers for people.csv
273 person_headers = [
274 'person_code',
275 'ssnit',
276 'employer_code',
277 'first_name',
278 'mid_name',
279 'other_names',
280 'maiden_name',
281 'last_name',
282 'last_modified_by',
283 'last_modified_date',
284 'deleted',
285 'employment_status',
286 'dob',
287 'phone1',
288 'phone2',
289 'email',
290 'address1',
291 'address2',
292 'city',
293 'country',
294 'marital_status',
295 'gender',
296 'father_first_name',
297 'father_last_name',
298 'father_address',
299 'father_city',
300 'father_country',
301 'father_phone',
302 'mother_first_name',
303 'mother_last_name',
304 'mother_maiden_name',
305 'mother_address',
306 'mother_city',
307 'mother_country',
308 'mother_phone',
309 'nationality'
310 ]
311 # Write the headers to file:
312 csv_1.writerow(person_headers)
313 #
314 # Headers for accounts.csv
315 account_headers = [
316 'person_code',
317 'sponsor_code',
318 'scheme_code',
319 'description',
320 'last_modified_by',
321 'last_modified_date',
322 'industry_code',
323 'income_structure',
324 'average_pay',
325 'ssno'
326 ]
327 # Write the headers to file:
328 csv_2.writerow(account_headers)
329 #
330 # Headers for sponsors.csv
331 sponsor_headers = [
332 'sponsor_code',
333 'sponsor_name',
334 'region_code',
335 'category',
336 'last_modified_by',
337 'last_modified_date',
338 'deleted', # (yes/no)
339 'email',
340 'telephone_1',
341 'telephone_2',
342 'fax',
343 'address',
344 'contact_person1',
345 'contact_person2'
346 ]
347 # Write the headers to file:
348 csv_3.writerow(sponsor_headers)
349
350 # Headers for ids.csv
351 id_headers = [
352 'person_code',
353 'id_type',
354 'id_number'
355 ]
356 # Write the headers to file:
357 csv_5.writerow(id_headers)
358
359 # Just a placemark variable to show the script user what row it's on (increased and shown to console):
360 thisLine = 1;
361
362 # Same as above, but counts number of duplicates:
363 duplicates = 0;
364
365 # For each row in our input file, do this:\
366 for input_row in csv_input:
367 # If there wasnt a surname or first name provided, replace it with 'missing'...
368 # the field is nullable, but it looks better to show that the system and/or PiSys
369 # is aware of the missing data
370
371 # Process data for Sankofa to PAS structure..
372 if input_row['surname'] == '':
373 input_row['surname'] = 'missing'
374
375 if input_row['first_name'] == '':
376 input_row['first_name'] = 'missing'
377
378 if input_row['maritalStatus'] == 'Married':
379 input_row['maritalStatus'] = '1'
380
381 if input_row['maritalStatus'] == 'Single':
382 input_row['maritalStatus'] = '2'
383
384 if input_row['maritalStatus'] == 'Divorced':
385 input_row['maritalStatus'] = '3'
386
387 if input_row['employmentStatus'] == 'Unemployed':
388 input_row['employmentStatus'] = '2'
389
390 if input_row['employmentStatus'] == 'Employed':
391 input_row['employmentStatus'] = '1'
392
393 if input_row['employmentStatus'] == 'Retired':
394 input_row['employmentStatus'] = '3'
395
396 if input_row['gender'] == 'Male':
397 input_row['gender'] = '1'
398
399 if input_row['gender'] == 'Female':
400 input_row['gender'] = '2'
401
402 if input_row['Nationality'] != '':
403 input_row['Nationality'] = '82'
404
405 if input_row['fatherCountry'] == 'GH':
406 input_row['fatherCountry'] = '82'
407
408 if input_row['motherCountry'] == 'GH':
409 input_row['motherCountry'] = '82'
410
411 if input_row['fatherCity'] == 'Navrongo':
412 input_row['fatherCity'] = '54'
413
414 if input_row['motherCity'] == 'Navrongo':
415 input_row['motherCity'] = '54'
416
417 ##############################
418 # Beginning of Sponsor Parse #
419 ##############################
420
421 personCode = input_row['unit_holder_id']
422 #SponsorCode = ''
423 #SponsorName = ''
424 if input_row['employer_code'] != '' and input_row['employer_name'] != '':
425 rawcode = input_row['employer_code']
426 codeType = type(rawcode)
427 if codeType == int:
428 print('################# integer! #################')
429 print('################# integer! #################')
430 print('################# integer! #################')
431 print('################# integer! #################')
432 print('################# integer! #################')
433 print('################# integer! #################')
434 if codeType == str:
435 print('string!')
436 #code = ''
437 #if codeType == 'int':
438 # code = str(code)
439 #else:
440 # code = code
441 name = input_row['employer_name']
442 codeCheck = SponsorCodeDupeCheck(rawcode)
443 nameCheck = SponsorNameDupeCheck(name)
444 if codeCheck == True and nameCheck == True:
445 print(personCode, 'Duplicate Sponsor (based on name and code) Found and Omitted!')
446 AssignSponsorCode(code)
447 AssignSponsorName(name)
448 print(currentSponsorCode, 'vs', code)
449 if codeCheck == True and nameCheck == False:
450 print(personCode, 'Duplicate Sponsor (based on code) Found and Omitted!')
451 AssignSponsorCode(code)
452 AssignSponsorName(name)
453 print(currentSponsorCode, 'vs', code)
454 if codeCheck == False and nameCheck == True:
455 firstCode = GetSponsorCode(name)
456 if firstCode != code:
457 appendValue = [firstCode, code]
458 sponsorLinks.append(appendValue)
459 sponsorWrongCodes.append(code)
460 AssignSponsorCode(firstCode)
461 AssignSponsorName(name)
462 print(personCode, 'Duplicate Sponsor Name with Different Code Found! Code recorded in Links.')
463 print(currentSponsorCode, 'vs', code)
464 if firstCode == code:
465 AssignSponsorCode(code)
466 AssignSponsorName(name)
467 print(personCode, 'Duplicate Sponsor Found with same Code! Omitted entry. ')
468 print(currentSponsorCode, 'vs', code)
469
470 if codeCheck == False and nameCheck == False:
471 csv_3_row = [
472 currentSponsorCode,
473 currentSponsorName,
474 '0', #not null, integer(11)
475 '-1', #not null, integer(11)
476 '',
477 '',
478 'no', #deleted
479 '', #email
480 '', #telephone_1
481 '', #telephone_2
482 '', #fax
483 '', #address
484 '', #contact_person1
485 '' #contact_person2
486 ]
487 # Write row to file:
488 csv_3.writerow(csv_3_row)
489 sponsorCodes.append(currentSponsorCode)
490 sponsorNames.append(currentSponsorName)
491 sponsorEntry = [currentSponsorCode, currentSponsorName]
492 sponsorEntries.append(sponsorEntry)
493 AssignSponsorCode(code)
494 AssignSponsorName(name)
495 print(currentSponsorCode, 'vs', code)
496 print(personCode, 'Contained full unique sponsor, added sponsor to file.')
497
498 if input_row['employer_code'] == '' and input_row['employer_name'] != '':
499 name = input_row['employer_name']
500 firstCode = GetSponsorCode(name)
501 if firstCode != '':
502 AssignSponsorCode(firstCode)
503 AssignSponsorName(name)
504 print(personCode, 'Excluded Duplicate Sponsor Entry with Name:', name, 'it linked with:', firstCode)
505 print(currentSponsorCode, 'vs', code)
506 if firstCode == '' or firstCode == None:
507 ClearSponsorCode()
508 AssignSponsorName(name)
509 print(personCode, 'Excluded Duplicate Sponsor Entry with Name:', name, 'as it had no identifying code.')
510 print(currentSponsorCode, 'vs', code)
511
512 if input_row['employer_code'] != '' and input_row['employer_name'] == '':
513 rawcode = input_row['employer_code']
514 codeType = type(rawcode)
515 code = ''
516 if codeType == 'int':
517 code = str(code)
518 else:
519 code = code
520 codeCheck = SponsorCodeDupeCheck(code)
521 # Get generated code:
522 randomString = id_generator()
523 # Add a cap which makes reporting easy in MySQL:
524 cap = 'MISS'
525 # Concatenate cap and generated name:
526 name = cap + randomString
527 if codeCheck == False:
528 if code == '1006' or code == 1006:
529 logEntry = 'KER-SEE-1006', '\n'
530 logString = ''.join(logEntry)
531 log.write(logString)
532
533 csv_3_row = [
534 code,
535 name,
536 '0', #not null, integer(11)
537 '-1', #not null, integer(11)
538 '',
539 '',
540 'no', #deleted
541 '', #email
542 '', #telephone_1
543 '', #telephone_2
544 '', #fax
545 '', #address
546 '', #contact_person1
547 '' #contact_person2
548 ]
549 # Write row to file:
550 csv_3.writerow(csv_3_row)
551 sponsorCodes.append(code)
552 AssignSponsorCode(code)
553 AssignSponsorName(name)
554 print(currentSponsorCode, 'vs', code)
555 else:
556 AssignSponsorCode(code)
557 AssignSponsorName(name)
558 print(personCode, 'Excluded Sponsor Entry without Name, as its code is a duplicate')
559 print(currentSponsorCode, 'vs', code)
560
561 if input_row['employer_code'] == '' and input_row['employer_name'] == '':
562 ClearSponsorCode()
563 ClearSponsorName()
564 print(personCode, 'No sponsor here! Ignoring...')
565
566 #########################
567 # End of Sponsor Parse! #
568 #########################
569
570 ##############################
571 # Beginning of Persons Parse #
572 ##############################
573
574 # Row Identity for People
575 # A lot of information will be missing.. but insert anyway.
576 # The person_code will tie things together, so DO NOT fail to insert
577 # EVEN IF a person_code is ALL you have! - they STILL EXIST!
578 #global CurrentSponsorCode
579 csv_1_row = [
580 input_row['unit_holder_id'],
581 input_row['ssnit_no'],
582 currentSponsorCode,
583 input_row['first_name'],
584 input_row['midName'],
585 input_row['other_names'],
586 '', #maiden name
587 input_row['surname'],
588 '', #last_modified_by
589 '', #last_modified_date
590 'no', #deleted
591 input_row['employmentStatus'],
592 input_row['DOB'],
593 input_row['phone'],
594 input_row['phone2'],
595 input_row['email'],
596 input_row['userAddress'],
597 input_row['userAddress1'],
598 input_row['userCity'],
599 input_row['userCountry'],
600 input_row['maritalStatus'],
601 input_row['gender'],
602 input_row['fatherFName'],
603 input_row['fatherLName'],
604 input_row['fatherAdd'],
605 input_row['fatherCity'],
606 input_row['fatherCountry'],
607 input_row['fatherPhone'],
608 input_row['motherFName'],
609 input_row['motherLName'],
610 '', #maiden name (mother)
611 input_row['motherAdd'],
612 input_row['motherCity'],
613 input_row['motherCountry'],
614 input_row['motherPhone'],
615 input_row['Nationality']
616 ]
617
618 # Remove spaces for insertion in to persons array, not output to CSV:
619 first_name = input_row['first_name'].replace(" ", "")
620 surname = input_row['surname'].replace(" ", "")
621
622 # Create a unique identifier based on these values:
623 fullPerson = input_row['ssnit_no'] + surname + first_name
624 # Extract the person_code:
625 personCode = input_row['unit_holder_id']
626 unverifiedCode = currentSponsorCode
627 sponsorName = currentSponsorName
628
629 #if unverifiedCode == '' or unverifiedCode == None:
630 # code = 'missing'
631 # AssignSponsorCode(code)
632 #else:
633 # AssignSponsorCode(unverifiedCode)
634 # #sponsorCode = unverifiedCode
635
636 print(currentSponsorCode)
637
638 # Check array of persons for unique:
639 # for pcode, ucode in persons.items():
640
641 # If this is a duplicate, mark it as so and write to duplicates file.
642 fullAccount = currentSponsorCode + surname + first_name
643 personDupeCheck = GetDuplicateByFullPerson(fullPerson) # this checks to see if another entry with the same ssnit, first name, and last name exists already
644 accountDupeCheck = GetDuplicateByAccount(fullAccount) # this checks to see if another entry with the same sponsor, first name, and last name exists already
645
646 # If the person and the account are duplicates:
647 if personDupeCheck == True and accountDupeCheck == True:
648 logEntry = 'Duplicate Person and Account: personCode: ', personCode, 'schemeCode:', currentSponsorCode, 'unique:', fullPerson, '\n'
649 logString = ''.join(logEntry)
650 log.write(logString)
651 #print("Duplicate: personCode: ", lastCode, "/", personCode, "- details logged.")
652 appendValue = [personCode, fullPerson]
653 dupes.append(appendValue)
654 dupeCodes.append(personCode)
655 dupeCodes.append(personDupeCheck)
656 dupeUniques.append(fullPerson)
657 # Write to People.csv:
658 csv_1.writerow(csv_1_row)
659
660 # If this is a duplicate person, but the account is new:
661 if personDupeCheck == True and accountDupeCheck == False:
662 OriginalCode = GetFirstPersonCode(fullPerson)
663 logEntry = 'New Account found for a duplicate person: personCode: ', personCode, 'vs', OriginalCode, 'SponsorsCode:', currentSponsorCode, 'unique:', fullPerson, '\n'
664 logString = ''.join(logEntry)
665 log.write(logString)
666 appendValue = [OriginalCode, personCode]
667 accountLinks.append(appendValue)
668 # Add the personCode from this row to an array(list) of new accounts:
669 newAccounts.append(personCode)
670 accountUniques.append(fullAccount)
671 # Write to People.csv:
672 csv_1.writerow(csv_1_row)
673
674 # If this is an original person, with an original account:
675 if personDupeCheck == False and accountDupeCheck == False:
676 logEntry = 'Original Person and account found for: personCode: ', personCode, 'schemeCode:', currentSponsorCode, 'unique:', fullPerson, '\n'
677 logString = ''.join(logEntry)
678 log.write(logString)
679 appendValue = [personCode, fullPerson, currentSponsorCode]
680 persons.append(appendValue)
681 personUniques.append(fullPerson)
682 accountUniques.append(fullAccount)
683 # Write to People.csv:
684 csv_1.writerow(csv_1_row)
685
686 # If this is an original person, with a duplicate account:
687 if personDupeCheck == False and accountDupeCheck == True:
688 logEntry = 'Original Person with duplicate account found for: personCode: ', personCode, 'schemeCode:', currentSponsorCode, 'unique:', fullPerson, '\n'
689 logString = ''.join(logEntry)
690 log.write(logString)
691 appendValue = [personCode, fullPerson]
692 dupes.append(appendValue)
693 dupeCodes.append(personCode)
694 dupeUniques.append(fullPerson)
695 # Write to People.csv:
696 csv_1.writerow(csv_1_row)
697
698
699
700 ########################
701 # End of Persons Parse #
702 ########################
703
704
705
706
707
708 ###############################
709 # Beginning of Account Parse! #
710 ###############################
711
712 unit_holder = input_row['unit_holder_id']
713 unverifiedCode = input_row['employer_code']
714 sponsorName = input_row['employer_name']
715
716 sponsorCode = ''
717
718 if unverifiedCode in sponsorDupes:
719 for entry in sponsorLinks:
720 entryCode = entry[0]
721 entryName = entry[1]
722 if entryName == sponsorName and entryCode != unverifiedCode:
723 sponsorCode = entryCode
724 logEntry = 'Account Sponsor Code Updated! (from): Sponsors Code: ', unverifiedCode, ' to: ', entryCode, ' as: ', sponsorName, ' = ', entryName, '\n'
725 logString = ''.join(logEntry)
726 log.write(logString)
727 # Row identity for Accounts:
728 csv_2_row = [
729 unit_holder,
730 sponsorCode,
731 ThisScheme, # this is the scheme_code!! replace with input_row if supplied.
732 '',
733 '',
734 '',
735 input_row['IndustryCode'],
736 input_row['payRate'],
737 input_row['averagePay'],
738 input_row['ssnit_no']
739 ]
740
741 # Write (data) to Accounts.csv:
742 csv_2.writerow(csv_2_row)
743
744 if entryName == sponsorName and entryCode == unverifiedCode:
745 sponsorCode = unverifiedCode
746 # Row identity for Accounts:
747 csv_2_row = [
748 unit_holder,
749 sponsorCode,
750 ThisScheme, # this is the scheme_code!! replace with input_row if supplied.
751 '',
752 '',
753 '',
754 input_row['IndustryCode'],
755 input_row['payRate'],
756 input_row['averagePay'],
757 input_row['ssnit_no']
758 ]
759
760 # Write (data) to Accounts.csv:
761 csv_2.writerow(csv_2_row)
762 else:
763 continue
764
765 else:
766 verifiedCode = unverifiedCode # it is not in duplicates, so it is automatically 'verified'
767 logEntry = 'Original sponsor found and added to account: Sponsors Code: ', unverifiedCode, ' Name: ', sponsorName, '\n'
768 logString = ''.join(logEntry)
769 log.write(logString)
770 # Row identity for Accounts:
771 csv_2_row = [
772 unit_holder,
773 verifiedCode,
774 ThisScheme, # this is the scheme_code!! replace with input_row if supplied.
775 '',
776 '',
777 '',
778 input_row['IndustryCode'],
779 input_row['payRate'],
780 input_row['averagePay'],
781 input_row['ssnit_no']
782 ]
783
784 # Write (data) to Accounts.csv:
785 csv_2.writerow(csv_2_row)
786 ####################################################
787 ####################################################
788 # KR _ NOTE: this needs to check for sponsorLinks!!
789 # find link (if exists) and insert it instead if found!
790 ####################################################
791 ####################################################
792
793
794 # Row identity for Sponsors
795 # Region code is replaced with a value for 'unknown' in the database if empty
796
797
798
799 # Start of ID data parse. As IDType is a different column than ssnit, both are checked separately, and inserted separately.
800 if input_row['IDType'] == 'Drivers License':
801 input_row['IDType'] = '2'
802
803 if input_row['IDType'] == 'Passport':
804 input_row['IDType'] = '1'
805
806 if input_row['IDType'] == 'Voters ID':
807 input_row['IDType'] = '4'
808
809 # Create row for ID:
810 id_row = [
811 input_row['unit_holder_id'],
812 input_row['IDType'],
813 input_row['IDNum']
814 ]
815
816 # Write row to ID file:
817 csv_5.writerow(id_row)
818 ClearSponsorCode()
819 ClearSponsorName()
820
821 # Close the people_with_dupes.csv file so it can be opened for reference to duplicates:
822 csv_output_file1.close()
823 csv_output_file2.close()
824 csv_output_file5.close()
825
826 # Open 'original' files created in read mode:
827 input_persons = open(file1, 'r')
828 input_accounts = open(file2, 'r')
829 input_cashflow = open(file8, 'r')
830 input_ids = open(file5, 'r')
831
832 # Open the new files in /clean and /dirty in write mode:
833 persons_output = open(file7, 'w', newline='')
834 cashflow_clean = open(file9, 'w', newline='')
835 cashflow_dirty = open(file12, 'w', newline='')
836 ids_clean = open(file15, 'w', newline='')
837 ids_dirty = open(file14, 'w', newline='')
838 accounts_clean = open(file10, 'w', newline='')
839 accounts_dirty = open(file16, 'w', newline='')
840
841 # Create a reader for each of the input files:
842 csv_input_persons = csv.DictReader(input_persons, delimiter=',')
843 csv_input_cashflow = csv.DictReader(input_cashflow, delimiter=',')
844 csv_input_ids = csv.DictReader(input_ids, delimiter=',')
845 csv_input_accounts = csv.DictReader(input_accounts, delimiter=',')
846
847 # Create a writer for each of the new files:
848 persons_csv = csv.writer(persons_output)
849 cash_csv_c = csv.writer(cashflow_clean)
850 cash_csv_d = csv.writer(cashflow_dirty)
851 ids_csv_c = csv.writer(ids_clean)
852 ids_csv_d = csv.writer(ids_dirty)
853 accounts_csv_c = csv.writer(accounts_clean)
854 accounts_csv_d = csv.writer(accounts_dirty)
855
856 # Headers for Accounts:
857 account_headers = [
858 'person_code',
859 'sponsor_code',
860 'scheme_code',
861 'description',
862 'last_modified_by',
863 'last_modified_date',
864 'income_structure',
865 'average_pay',
866 'ssno'
867 ]
868 # Write the headers to files:
869 accounts_csv_c.writerow(account_headers)
870 accounts_csv_d.writerow(account_headers)
871
872 # For each row in the accounts table:
873 for input_row in csv_input_accounts:
874 # Get the person_code:
875 thisPerson = input_row['person_code']
876 # Write the row:
877 if thisPerson in newAccounts:
878 #for original, new in accountLinks.items():
879 for link in accountLinks:
880 # Old refers to the original person_code
881 old = link[0]
882 # New refers to a person/account to be replaced by original:
883 new = link[1]
884 # If the new code is equal to the current one:
885 if new == input_row['person_code']:
886 # Create the row:
887 account_row = [
888 old,
889 input_row['sponsor_code'],
890 input_row['scheme_code'],
891 input_row['description'],
892 input_row['last_modified_by'],
893 input_row['last_modified_date'],
894 input_row['income_structure'],
895 input_row['average_pay'],
896 input_row['ssno']
897 ]
898 # Identify the sponsor/employer (for logging purposes):
899 sponsor = input_row['sponsor_code']
900
901 # If they're a duplicate:
902 if thisPerson in dupeCodes:
903 # Write the row:
904 accounts_csv_d.writerow(account_row)
905 # Show the user what we've done:
906 print('New Account for Duplicate: |', thisPerson, '| moved to dirty file.' )
907 # Create and append a log entry:
908 logEntry = 'Found new account for a Duplicate: PersonCode (old/new): ', old, '/', new, 'SponsorCode:', sponsor, ', moved to Dirty file.', '\n'
909 logString = ''.join(logEntry)
910 log.write(logString)
911 # If they aren't a duplicate:
912 else:
913 # Write the row:
914 accounts_csv_c.writerow(account_row)
915 # Show the user what we've done:
916 print('New Account for Original: |', thisPerson, '| moved to clean file.' )
917 # Create and append a log entry:
918 logEntry = 'Found new account for a person: PersonCode (old/new): ', old, '/', new, 'SponsorCode:', sponsor, ', moved to Clean file.', '\n'
919 logString = ''.join(logEntry)
920 log.write(logString)
921 # This continues the loop around the array
922 else:
923 continue
924 # If this is not a new account:
925 else:
926 # Create the row:
927 account_row = [
928 input_row['person_code'],
929 input_row['sponsor_code'],
930 input_row['scheme_code'],
931 input_row['description'],
932 input_row['last_modified_by'],
933 input_row['last_modified_date'],
934 input_row['income_structure'],
935 input_row['average_pay'],
936 input_row['ssno']
937 ]
938 # If they're a duplicate:
939 if thisPerson in dupeCodes:
940 # Write the row to dirty file:
941 accounts_csv_d.writerow(account_row)
942 # Show the user what we've done:
943 print('Single account for Duplicate: |', thisPerson, '| moved to dirty file.' )
944 # Create and append a log entry:
945 logEntry = 'Found single account for a duplicate: PersonCode: ', thisPerson, 'SchemeCode:', sponsorCode, ', moved to Dirty file.', '\n'
946 logString = ''.join(logEntry)
947 log.write(logString)
948 # If they aren't a duplicate:
949 else:
950 # Write the row to clean file:
951 accounts_csv_c.writerow(account_row)
952 # Show the user what we've done:
953 print('Single account for Original: |', thisPerson, '| moved to clean file.' )
954 # Create and append a log entry:
955 logEntry = 'Found single account for an individual: ', thisPerson, 'SchemeCode:', sponsorCode, ', moved to Clean file.' ,'\n'
956 logString = ''.join(logEntry)
957 log.write(logString)
958
959 # Headers for ID file
960 id_headers = [
961 'person_code',
962 'id_type',
963 'id_number'
964 ]
965 # Write the headers to files:
966 ids_csv_c.writerow(id_headers)
967 ids_csv_d.writerow(id_headers)
968
969 # For each row in the IDs.csv:
970 for input_row in csv_input_ids:
971 # Get the person code and ID number:
972 thisPerson = input_row['person_code']
973 thisNumber = input_row['id_number']
974
975 # If the ID number is not empty, continue:
976 if thisNumber != '':
977 if thisPerson in newAccounts:
978 #for original, new in accountLinks.items():
979 for link in accountLinks:
980 old = link[0]
981 new = link[1]
982 if new == input_row['person_code']:
983 # Create the row:
984 id_row = [
985 old,
986 input_row['id_type'],
987 input_row['id_number']
988 ]
989 # If this is a duplicate, write to dirty file:
990 if thisPerson in dupeCodes:
991 ids_csv_d.writerow(id_row)
992 print('ID Info for a Duplicate with Person Code w/New Account: |', thisPerson, '| moved to dirty file.' )
993 # If this is not a duplicate, write to clean file:
994 else:
995 ids_csv_c.writerow(id_row)
996 print('ID Info for an Individual w/ Person Code w/New Account: |', thisPerson, '| moved to clean file.' )
997 else:
998 continue
999 else:
1000 # Create the row:
1001 id_row = [
1002 input_row['person_code'],
1003 input_row['id_type'],
1004 input_row['id_number']
1005 ]
1006 if thisPerson in dupeCodes:
1007 ids_csv_d.writerow(id_row)
1008 print('ID Info for a Duplicate with Person Code: |', thisPerson, '| moved to dirty file.' )
1009 # If this is not a duplicate, write to clean file:
1010 else:
1011 ids_csv_c.writerow(id_row)
1012 print('ID Info for an Individual w/ Person Code: |', thisPerson, '| moved to clean file.' )
1013 # If the ID number is empty, continue process, don't insert:
1014 else:
1015 continue
1016
1017
1018 # Headers for the Cashflow files:
1019 cashflow_headers = [
1020 'person_code',
1021 'sponsor_code',
1022 'scheme_code',
1023 'cashflow_type',
1024 'ledger_date',
1025 'value_date',
1026 'amount',
1027 'deleted',
1028 'last_modified_by',
1029 'last_modified_date'
1030 ]
1031 # Write the headers:
1032 cash_csv_c.writerow(cashflow_headers)
1033 cash_csv_d.writerow(cashflow_headers)
1034 # For each row in the cashflow file:
1035 for input_row in csv_input_cashflow:
1036 # Get the person_code:
1037 thisPerson = input_row['person_code']
1038 # Create the row:
1039 if thisPerson in newAccounts:
1040 #for original, new in accountLinks.items():
1041 for link in accountLinks:
1042 old = link[0]
1043 new = link[1]
1044 if new == input_row['person_code']:
1045 # Create the row:
1046 cashrow = [
1047 old,
1048 input_row['sponsor_code'],
1049 input_row['scheme_code'],
1050 input_row['cashflow_type'],
1051 input_row['ledger_date'],
1052 input_row['value_date'],
1053 input_row['amount'],
1054 'no', # deleted
1055 '', #last_modified_by
1056 '' #last_modified_date
1057 ]
1058 # If this is a duplicate, write to dirty file:
1059 if thisPerson in dupeCodes:
1060 cash_csv_d.writerow(cashrow)
1061 print('Cashflow Info for Duplicate w/new account: |', thisPerson, '| moved to dirty file.' )
1062 break
1063 # If this is not a duplicate, write to clean file:
1064 else:
1065 cash_csv_c.writerow(cashrow)
1066 print('Cashflow Info for Original w/new account: |', thisPerson, '| moved to clean file.' )
1067 break
1068 else:
1069 continue
1070
1071 else:
1072 cashrow = [
1073 input_row['person_code'],
1074 input_row['sponsor_code'],
1075 input_row['scheme_code'],
1076 input_row['cashflow_type'],
1077 input_row['ledger_date'],
1078 input_row['value_date'],
1079 input_row['amount'],
1080 'no', # deleted
1081 '', #last_modified_by
1082 '' #last_modified_date
1083 ]
1084 # If this is a duplicate, write to dirty file:
1085 if thisPerson in dupeCodes:
1086 cash_csv_d.writerow(cashrow)
1087 print('Cashflow Info for Duplicate w/one account: |', thisPerson, '| moved to dirty file.' )
1088 # If this is not a duplicate, write to clean file:
1089 else:
1090 cash_csv_c.writerow(cashrow)
1091 print('Cashflow Info for Original w/one account: |', thisPerson, '| moved to clean file.' )
1092
1093 # Headers for Persons.csv:
1094 person_headers = [
1095 'person_code',
1096 'first_name',
1097 'mid_name',
1098 'other_names',
1099 'maiden_name',
1100 'last_name',
1101 'last_modified_by',
1102 'last_modified_date',
1103 'deleted',
1104 'employment_status',
1105 'dob',
1106 'phone1',
1107 'phone2',
1108 'email',
1109 'address1',
1110 'address2',
1111 'city',
1112 'country',
1113 'marital_status',
1114 'gender',
1115 'father_first_name',
1116 'father_last_name',
1117 'father_address',
1118 'father_city',
1119 'father_country',
1120 'father_phone',
1121 'mother_first_name',
1122 'mother_last_name',
1123 'mother_maiden_name',
1124 'mother_address', # he has address and maiden name place reversed
1125 'mother_city',
1126 'mother_country',
1127 'mother_phone',
1128 'nationality'
1129 ]
1130 # Write the headers:
1131 persons_csv.writerow(person_headers)
1132 csv_6.writerow(person_headers)
1133 # For each row in the persons_with_dupes file:
1134 for input_row in csv_input_persons:
1135 # Remove spaces for insertion in to persons array, not output to CSV:
1136 if input_row['last_name'] == '':
1137 input_row['last_name'] = 'missing'
1138
1139 if input_row['first_name'] == '':
1140 input_row['first_name'] = 'missing'
1141
1142 first_name = input_row['first_name'].replace(" ", "")
1143 surname = input_row['last_name'].replace(" ", "")
1144
1145 # Create a unique identifier based on these values:
1146 fullPerson = input_row['ssnit'] + surname + first_name
1147 thisPerson = input_row['person_code']
1148 # If this is a duplicate, create row and write to dirty file:
1149 if thisPerson in newAccounts:
1150 #for original, new in accountLinks.items():
1151 for link in accountLinks:
1152 old = link[0]
1153 new = link[1]
1154 if new == input_row['person_code']:
1155 if fullPerson in dupeUniques:
1156 # Write the duplicate to the duplicates.csv file:
1157 print('Duplicate Person Omitted, recorded new account with link to:', old )
1158 logEntry = 'Omitted entry of person with duplicates for:', new, ' as it exists in:', old, '\n'
1159 logString = ''.join(logEntry)
1160 log.write(logString)
1161 break
1162 # If this is not a duplicate, write to clean file:
1163 else:
1164 print('Person record omitted, new account recorded with link to:', old )
1165 logEntry = 'Omitted entry of person with multiple accounts for:', new, ' as it exists in:', old, '\n'
1166 logString = ''.join(logEntry)
1167 log.write(logString)
1168 break
1169 else:
1170 continue
1171 else:
1172 person_row = [
1173 input_row['person_code'],
1174 input_row['first_name'],
1175 input_row['mid_name'],
1176 input_row['other_names'],
1177 input_row['maiden_name'],
1178 input_row['last_name'],
1179 input_row['last_modified_by'],
1180 input_row['last_modified_date'],
1181 input_row['deleted'],
1182 input_row['employment_status'],
1183 input_row['dob'],
1184 input_row['phone1'],
1185 input_row['phone2'],
1186 input_row['email'],
1187 input_row['address1'],
1188 input_row['address2'],
1189 input_row['city'],
1190 input_row['country'],
1191 input_row['marital_status'],
1192 input_row['gender'],
1193 input_row['father_first_name'],
1194 input_row['father_last_name'],
1195 input_row['father_address'],
1196 input_row['father_city'],
1197 input_row['father_country'],
1198 input_row['father_phone'],
1199 input_row['mother_first_name'],
1200 input_row['mother_last_name'],
1201 input_row['mother_maiden_name'],
1202 input_row['mother_address'],
1203 input_row['mother_city'],
1204 input_row['mother_country'],
1205 input_row['mother_phone'],
1206 input_row['nationality']
1207 ]
1208 if fullPerson in dupeUniques:
1209 # Write the duplicate to the duplicates.csv file:
1210 csv_6.writerow(person_row)
1211 print('Duplicate Person w/single account using ref# |', input_row['person_code'], '| moved to dirty file.' )
1212 # If this is not a duplicate, write to clean file:
1213 else:
1214 persons_csv.writerow(person_row)
1215 print('Single Person w/single account using ref# |', input_row['person_code'], '| moved to clean file.' )
1216
1217 # When done, close all of our files:
1218 csv_output_file1.close()
1219 csv_output_file2.close()
1220 csv_output_file3.close()
1221 csv_output_file5.close()
1222 csv_output_file6.close()
1223 persons_output.close()
1224 csv_input_file.close()
1225 cashflow_dirty.close()
1226 cashflow_clean.close()
1227 ids_dirty.close()
1228 ids_clean.close()
1229 accounts_dirty.close()
1230 accounts_clean.close()
1231 log.close()
1232 done = time.time()
1233 elapsed = done - start
1234 print("Completed in:", elapsed)
1235
1236 return "File splice completed. Proceed to import stage."
1237
1238
1239if __name__ == '__main__':
1240 success = split_csv(input_path = file0, output_path1 = file1, output_path2=file2, output_path3= file3, output_path5= file5, output_path6= file6)
1241 print(success)