· 7 years ago · Dec 24, 2018, 07:36 PM
1# -*- coding: utf-8 -*-
2
3import rarfile
4import os
5import shutil
6import time
7import pymysql
8import sys
9import hashlib
10import zipfile
11
12ARCHTIVE_DIR = 'archive'
13TARGET_DIR = 'target'
14PROTECTED_DIR = 'protected'
15DAMAGED_DIR = 'damaged'
16DUPLICATE_DIR = 'duplicate'
17
18# удалÑть архивы поÑле раÑпаковки?
19REMOVE = False
20REMOVE_DUPLICATE = False
21
22if not os.path.isdir(ARCHTIVE_DIR):
23 os.mkdir(ARCHTIVE_DIR)
24if not os.path.isdir(TARGET_DIR):
25 os.mkdir(TARGET_DIR)
26if not os.path.isdir(PROTECTED_DIR):
27 os.mkdir(PROTECTED_DIR)
28if not os.path.isdir(DAMAGED_DIR):
29 os.mkdir(DAMAGED_DIR)
30if not os.path.isdir(DUPLICATE_DIR):
31 os.mkdir(DUPLICATE_DIR)
32
33TOTAL_FILES = 0
34TOTAL_PROCCESSED = 0
35DUPLICATED = 0
36DAMAGED = 0
37PROTECTED = 0
38PERC_DIFF = 50
39CURRENT_PERC = 0
40
41'''
42CREATE TABLE IF NOT EXISTS `hash_arc` (
43 `id` int(11) NOT NULL AUTO_INCREMENT,
44 `date` date NOT NULL,
45 `filename` varchar(255) NOT NULL,
46 `hash` varchar(32) NOT NULL,
47 `bad` tinyint(1) DEFAULT '0',
48 PRIMARY KEY(id)
49) ENGINE=InnoDB DEFAULT CHARSET=utf8;
50'''
51
52
53def file_as_bytes(file):
54 with file:
55 return file.read()
56
57def process_rar(rar_name, mysql_cursor, password=None):
58 #print(rar_name)
59 global TOTAL_FILES, TOTAL_PROCCESSED, DAMAGED, PROTECTED, PERC_DIFF, CURRENT_PERC, REMOVE_DUPLICATE, DUPLICATED
60 #return
61 try:
62 fname = os.path.basename(rar_name)
63 TOTAL_PROCCESSED += 1
64 perc = float(TOTAL_PROCCESSED / TOTAL_FILES) * 100
65 if perc - CURRENT_PERC >= PERC_DIFF:
66 CURRENT_PERC = perc
67 print("Ð’Ñего обработано файлов: %s из %s (%s%%)" %(TOTAL_PROCCESSED,TOTAL_FILES, int(perc)))
68 print("Защищенных архивов: %s" % (PROTECTED))
69 print("Поврежденных архивов %s" % (DAMAGED))
70 print("Дубликатов %s" % (DUPLICATED))
71 hash = hashlib.md5(file_as_bytes(open(rar_name, 'rb'))).hexdigest()
72 #print(hash)
73 query = ("SELECT * FROM hash_arc where hash = '%s'" % hash)
74 data = {}
75 try:
76 mysql_cursor.execute(query)
77 # conn.commit()
78 except Exception as e:
79 print(e)
80 for data in cursor:
81 #print(data)
82 pass
83
84 arch_name = os.path.basename(rar_name)
85 if len(data) < 1:
86 # no duplicate in database
87 query = "INSERT INTO hash_arc(date, filename, hash) VALUES(date(NOW()), '%s', '%s')" % (arch_name, hash)
88 try:
89 mysql_cursor.execute(query)
90 # conn.commit()
91 except Exception as e:
92 print(e)
93 else:
94 # duplicated in database
95 DUPLICATED += 1
96 if REMOVE_DUPLICATE == True:
97 os.remove(rar_name)
98 else:
99 shutil.copyfile(rar_name, os.path.join(DUPLICATE_DIR, fname))
100 print(arch_name + ' уже ÑущеÑтвует в БД')
101 query = "UPDATE hash_arc SET date = date(NOW()) where hash = '%s'" % (hash)
102
103 try:
104 mysql_cursor.execute(query)
105 return
106 # conn.commit()
107 except Exception as e:
108 print(e)
109 return
110
111 with rarfile.RarFile(rar_name) as archive:
112 #print(archive.namelist)
113 fname = os.path.basename(rar_name)
114 fname = os.path.splitext(fname)[0]
115 #print(fname)
116 dirname = fname
117 #print(dirname)
118 #return
119 if archive.needs_password():
120 PROTECTED += 1
121 fname = (time.strftime("%d.%m__")) + fname + '.rar'
122 shutil.copyfile(rar_name, os.path.join(PROTECTED_DIR, fname))
123 if REMOVE == True:
124 #os.rename(rar_name, os.path.join(PROTECTED_DIR, fname))
125 os.remove(rar_name)
126 return
127 # if REMOVE == True:
128 # os.remove(rar_name)
129 try:
130 os.mkdir(os.path.join(TARGET_DIR, dirname))
131 except Exception as e:
132 print(dirname + " exists")
133 try:
134 #shutil.rmtree(os.path.join(TARGET_DIR, dirname))
135 archive.extractall(os.path.join(TARGET_DIR, dirname), pwd=password)
136 except Exception as e:
137 print(e)
138 DAMAGED += 1
139 if REMOVE == True:
140 # shutil.rmtree(os.path.join(TARGET_DIR, dirname))
141 print("Removing " + rar_name)
142 os.remove(rar_name)
143 return
144 if REMOVE == True:
145 os.remove(rar_name)
146 except Exception as e:
147 DAMAGED += 1
148 print(e)
149 if REMOVE == True:
150 os.remove(rar_name)
151 print("Removing " + rar_name)
152
153
154def process_zip(rar_name, mysql_cursor, password=None):
155 #print(rar_name)
156 global TOTAL_FILES, TOTAL_PROCCESSED, DAMAGED, PROTECTED, PERC_DIFF, CURRENT_PERC, REMOVE_DUPLICATE, DUPLICATED
157 #return
158 try:
159 fname = os.path.basename(rar_name)
160 TOTAL_PROCCESSED += 1
161 perc = float(TOTAL_PROCCESSED / TOTAL_FILES) * 100
162 if perc - CURRENT_PERC >= PERC_DIFF:
163 CURRENT_PERC = perc
164 print("Ð’Ñего обработано файлов: %s из %s (%s%%)" %(TOTAL_PROCCESSED,TOTAL_FILES, int(perc)))
165 print("Защищенных архивов: %s" % (PROTECTED))
166 print("Поврежденных архивов %s" % (DAMAGED))
167 print("Дубликатов %s" % (DUPLICATED))
168 hash = hashlib.md5(file_as_bytes(open(rar_name, 'rb'))).hexdigest()
169 #print(hash)
170 query = ("SELECT * FROM hash_arc where hash = '%s'" % hash)
171 data = {}
172 try:
173 mysql_cursor.execute(query)
174 # conn.commit()
175 except Exception as e:
176 print(e)
177 for data in cursor:
178 #print(data)
179 pass
180
181 arch_name = os.path.basename(rar_name)
182 if len(data) < 1:
183 # no duplicate in database
184 query = "INSERT INTO hash_arc(date, filename, hash) VALUES(date(NOW()), '%s', '%s')" % (arch_name, hash)
185 try:
186 mysql_cursor.execute(query)
187 # conn.commit()
188 except Exception as e:
189 print(e)
190 else:
191 # duplicated in database
192 DUPLICATED += 1
193 if REMOVE_DUPLICATE == True:
194 os.remove(rar_name)
195 else:
196 shutil.copyfile(rar_name, os.path.join(DUPLICATE_DIR, fname))
197 print(arch_name + ' уже ÑущеÑтвует в БД')
198 query = "UPDATE hash_arc SET date = date(NOW()) where hash = '%s'" % (hash)
199
200 try:
201 mysql_cursor.execute(query)
202 return
203 # conn.commit()
204 except Exception as e:
205 print(e)
206 return
207
208 with zipfile.ZipFile(rar_name) as archive:
209 #print(archive.namelist)
210 fname = os.path.basename(rar_name)
211 fname = os.path.splitext(fname)[0]
212 #print(fname)
213 dirname = fname
214 #print(dirname)
215 for zinfo in archive.infolist():
216 is_encrypted = zinfo.flag_bits & 0x1
217 if is_encrypted:
218 PROTECTED += 1
219 fname = (time.strftime("%d.%m__")) + fname + '.zip'
220 shutil.copyfile(rar_name, os.path.join(PROTECTED_DIR, fname))
221 if REMOVE == True:
222 #os.rename(rar_name, os.path.join(PROTECTED_DIR, fname))
223 os.remove(rar_name)
224 return
225 # if REMOVE == True:
226 # os.remove(rar_name)
227 #return
228 # if archive.needs_password():
229 # PROTECTED += 1
230 # fname = (time.strftime("%d.%m__")) + fname + '.rar'
231 # shutil.copyfile(rar_name, os.path.join(PROTECTED_DIR, fname))
232 # if REMOVE == True:
233 # #os.rename(rar_name, os.path.join(PROTECTED_DIR, fname))
234 # os.remove(rar_name)
235 # return
236 # # if REMOVE == True:
237 # # os.remove(rar_name)
238 try:
239 os.mkdir(os.path.join(TARGET_DIR, dirname))
240 except Exception as e:
241 print(dirname + " exists")
242 try:
243 #shutil.rmtree(os.path.join(TARGET_DIR, dirname))
244 archive.extractall(os.path.join(TARGET_DIR, dirname), pwd=password)
245 except Exception as e:
246 print(e)
247 DAMAGED += 1
248 if REMOVE == True:
249 # shutil.rmtree(os.path.join(TARGET_DIR, dirname))
250 print("Removing " + rar_name)
251 os.remove(rar_name)
252 return
253 if REMOVE == True:
254 os.remove(rar_name)
255 except Exception as e:
256 DAMAGED += 1
257 print(e)
258 if REMOVE == True:
259 os.remove(rar_name)
260 print("Removing " + rar_name)
261
262
263
264DB_HOST = 'localhost'
265DB_NAME = 'test'
266DB_USER = 'root'
267DB_PASS = ''
268
269conn = pymysql.connect(host=DB_HOST,
270 database=DB_NAME,
271 user=DB_USER,
272 password=DB_PASS)
273if conn.open:
274 print('Connected to MySQL database')
275else:
276 print("Can't connect to database")
277 sys.exit()
278
279conn.autocommit = False
280cursor = conn.cursor();
281
282files_path = []
283for root, dirs, files in os.walk(ARCHTIVE_DIR):
284 for name in files:
285 files_path.append(os.path.join(root, name))
286 # print("Processing " + os.path.join(root, name))
287# print(files_path)
288TOTAL_FILES = len(files_path)
289for file in files_path:
290 print("Processing " + file)
291 if file.lower().find('.rar') != -1:
292 process_rar(file, cursor)
293 elif file.lower().find('.zip') != -1:
294 process_zip(file, cursor)
295 else:
296 print("Not archive")
297
298print("Ð’Ñего обработано файлов: %s из %s (%s%%)" % (TOTAL_PROCCESSED, TOTAL_FILES, int(CURRENT_PERC)))
299print("Защищенных архивов: %s" % (PROTECTED))
300print("Поврежденных архивов %s" % (DAMAGED))
301print("Дубликатов %s" % (DUPLICATED))
302conn.commit()
303'''
304for root, dirs, files in os.walk(ARCHTIVE_DIR):
305 for name in files:
306 try:
307 print("Processing " + os.path.join(root, name))
308 process_rar(os.path.join(root, name))
309 except Exception as e:
310 print(e)
311 continue
312'''