· 6 years ago · Jun 14, 2019, 10:00 AM
1#Kopirajt LBLZR_ lmao
2import os.path, codecs, sys
3
4date_files = ["dates1.txt", "dates2.txt", "dates3.txt", "dates4.txt"]
5output_file = "dates_out.txt"
6
7def save_list(listt, filename):
8 with codecs.open(filename, "w", "utf-8") as fp:
9 #firstly we delete old entries
10 fp.truncate()
11 for item in listt:
12 fp.write("{}\r\n".format(item))
13
14def load_list(filename):
15 #check if exists
16 if os.path.isfile(filename):
17 with codecs.open(filename, "r", "utf-8") as fp:
18 listt = fp.readlines()
19 #remove whitespace characters at the end of each line
20 listt = [x.strip() for x in listt]
21 return listt
22 return []
23
24def process_dates(dates):
25 print("Classifying date format")
26
27 out_dates = []
28 delimeters = ['.', '/']
29 types = [0,0,0]
30 type_string = ["unknown", "day", "month", "year"] # 1 = day, 2 = month, 3 = year
31
32 for date in dates:
33 for idx, s in enumerate(''.join([o if not o in delimeters else ' ' for o in list(date)]).split()):
34 s = int(s)
35
36 if(s <= 12):
37 types[idx] = 2 #month
38 elif(s <= 31):
39 types[idx] = 1 #day
40 else:
41 types[idx] = 3 #year
42
43 if sum(types) == 6:
44 break
45
46 print(f"Date format: {type_string[types[0]]}/{type_string[types[1]]}/{type_string[types[2]]}")
47 print("Changing to standard format (day/month/year)...")
48
49 # Create lookup table
50 type_lookup = [0, 0, 0]
51 for id, i in enumerate(types):
52 if(i == 1):
53 type_lookup[id] = 0
54 if(i == 2):
55 type_lookup[id] = 1
56 if(i == 3):
57 type_lookup[id] = 2
58
59 print(f"Lookup table: {type_lookup}")
60
61 for date in dates:
62 split_date = ''.join([o if not o in delimeters else ' ' for o in list(date)]).split()
63 out_dates.append(f"{int(split_date[type_lookup[0]])}/{int(split_date[type_lookup[1]])}/{int(split_date[type_lookup[2]])}")
64
65 return out_dates
66
67dates_out = []
68for filename in date_files:
69 print(f"Processing file {filename}")
70 dates_out.extend(process_dates(load_list(filename)))
71
72print(f"Saving dates to {output_file}")
73save_list(dates_out, output_file)