· 7 years ago · Feb 03, 2019, 08:28 PM
1# -*- coding: utf-8 -*-
2
3import rarfile
4import os
5import shutil
6import time
7import pymysql
8import sys
9import hashlib
10import zipfile
11
12ARCHTIVE_DIR = 'archive'
13TARGET_DIR = 'target'
14PROTECTED_DIR = 'protected'
15BAD_PROTECTED_DIR = 'bad_protected'
16DAMAGED_DIR = 'damaged'
17DUPLICATE_DIR = 'duplicate'
18
19BAD_EXTENSIONS = ['.txt']
20# "exe","doc","cfg"
21
22# удалÑть архивы поÑле раÑпаковки?
23REMOVE = False
24REMOVE_DUPLICATE = False
25
26if not os.path.isdir(ARCHTIVE_DIR):
27 os.mkdir(ARCHTIVE_DIR)
28if not os.path.isdir(TARGET_DIR):
29 os.mkdir(TARGET_DIR)
30if not os.path.isdir(PROTECTED_DIR):
31 os.mkdir(PROTECTED_DIR)
32if not os.path.isdir(DAMAGED_DIR):
33 os.mkdir(DAMAGED_DIR)
34if not os.path.isdir(DUPLICATE_DIR):
35 os.mkdir(DUPLICATE_DIR)
36if not os.path.isdir(BAD_PROTECTED_DIR):
37 os.mkdir(BAD_PROTECTED_DIR)
38
39TOTAL_FILES = 0
40TOTAL_PROCCESSED = 0
41DUPLICATED = 0
42DAMAGED = 0
43PROTECTED = 0
44PERC_DIFF = 50
45CURRENT_PERC = 0
46
47'''
48CREATE TABLE IF NOT EXISTS `hash_arc` (
49 `id` int(11) NOT NULL AUTO_INCREMENT,
50 `date` date NOT NULL,
51 `filename` varchar(255) NOT NULL,
52 `hash` varchar(32) NOT NULL,
53 `bad` tinyint(1) DEFAULT '0',
54 PRIMARY KEY(id)
55) ENGINE=InnoDB DEFAULT CHARSET=utf8;
56'''
57
58
59def file_as_bytes(file):
60 with file:
61 return file.read()
62
63def process_rar(rar_name, mysql_cursor, password=None):
64 #print(rar_name)
65 global TOTAL_FILES, TOTAL_PROCCESSED, DAMAGED, PROTECTED, PERC_DIFF, CURRENT_PERC, REMOVE_DUPLICATE, DUPLICATED, \
66 BAD_EXTENSIONS, BAD_PROTECTED_DIR
67 #return
68 try:
69 fname = os.path.basename(rar_name)
70 TOTAL_PROCCESSED += 1
71 perc = float(TOTAL_PROCCESSED / TOTAL_FILES) * 100
72 if perc - CURRENT_PERC >= PERC_DIFF:
73 CURRENT_PERC = perc
74 print("Ð’Ñего обработано файлов: %s из %s (%s%%)" %(TOTAL_PROCCESSED,TOTAL_FILES, int(perc)))
75 print("Защищенных архивов: %s" % (PROTECTED))
76 print("Поврежденных архивов %s" % (DAMAGED))
77 print("Дубликатов %s" % (DUPLICATED))
78 hash = hashlib.md5(file_as_bytes(open(rar_name, 'rb'))).hexdigest()
79 #print(hash)
80 query = ("SELECT * FROM hash_arc where hash = '%s'" % hash)
81 data = {}
82 try:
83 mysql_cursor.execute(query)
84 # conn.commit()
85 except Exception as e:
86 print(e)
87 for data in cursor:
88 #print(data)
89 pass
90
91 # data = []
92
93 arch_name = os.path.basename(rar_name)
94 if len(data) < 1:
95 # no duplicate in database
96 query = "INSERT INTO hash_arc(date, filename, hash) VALUES(date(NOW()), '%s', '%s')" % (arch_name, hash)
97 try:
98 mysql_cursor.execute(query)
99 # conn.commit()
100 except Exception as e:
101 print(e)
102 else:
103 # duplicated in database
104 DUPLICATED += 1
105 if REMOVE_DUPLICATE == True:
106 os.remove(rar_name)
107 else:
108 shutil.copyfile(rar_name, os.path.join(DUPLICATE_DIR, fname))
109 print(arch_name + ' уже ÑущеÑтвует в БД')
110 query = "UPDATE hash_arc SET date = date(NOW()) where hash = '%s'" % (hash)
111
112 try:
113 mysql_cursor.execute(query)
114 return
115 # conn.commit()
116 except Exception as e:
117 print(e)
118 return
119
120 with rarfile.RarFile(rar_name) as archive:
121 #print(archive.namelist)
122 fname = os.path.basename(rar_name)
123 fname = os.path.splitext(fname)[0]
124 #print(fname)
125 dirname = fname
126 #print(dirname)
127 #return
128 if archive.needs_password():
129 fname = (time.strftime("%d.%m__")) + fname + '.rar'
130 PROTECTED += 1
131 for f in archive.infolist():
132 ext = os.path.splitext(f.filename)[-1]
133 if ext in BAD_EXTENSIONS:
134 shutil.copyfile(rar_name, os.path.join(BAD_PROTECTED_DIR, fname))
135 if REMOVE == True:
136 # os.rename(rar_name, os.path.join(PROTECTED_DIR, fname))
137 os.remove(rar_name)
138 return
139 shutil.copyfile(rar_name, os.path.join(PROTECTED_DIR, fname))
140 if REMOVE == True:
141 #os.rename(rar_name, os.path.join(PROTECTED_DIR, fname))
142 os.remove(rar_name)
143 return
144 # if REMOVE == True:
145 # os.remove(rar_name)
146 try:
147 os.mkdir(os.path.join(TARGET_DIR, dirname))
148 except Exception as e:
149 print(dirname + " exists")
150 try:
151 #shutil.rmtree(os.path.join(TARGET_DIR, dirname))
152 archive.extractall(os.path.join(TARGET_DIR, dirname), pwd=password)
153 except Exception as e:
154 print(e)
155 DAMAGED += 1
156 if REMOVE == True:
157 # shutil.rmtree(os.path.join(TARGET_DIR, dirname))
158 print("Removing " + rar_name)
159 os.remove(rar_name)
160 return
161 if REMOVE == True:
162 os.remove(rar_name)
163 except Exception as e:
164 DAMAGED += 1
165 print(e)
166 if REMOVE == True:
167 os.remove(rar_name)
168 print("Removing " + rar_name)
169
170
171def process_zip(rar_name, mysql_cursor, password=None):
172 #print(rar_name)
173 global TOTAL_FILES, TOTAL_PROCCESSED, DAMAGED, PROTECTED, PERC_DIFF, CURRENT_PERC, REMOVE_DUPLICATE, DUPLICATED, \
174 BAD_PROTECTED_DIR, BAD_EXTENSIONS
175 #return
176 try:
177 fname = os.path.basename(rar_name)
178 TOTAL_PROCCESSED += 1
179 perc = float(TOTAL_PROCCESSED / TOTAL_FILES) * 100
180 if perc - CURRENT_PERC >= PERC_DIFF:
181 CURRENT_PERC = perc
182 print("Ð’Ñего обработано файлов: %s из %s (%s%%)" %(TOTAL_PROCCESSED,TOTAL_FILES, int(perc)))
183 print("Защищенных архивов: %s" % (PROTECTED))
184 print("Поврежденных архивов %s" % (DAMAGED))
185 print("Дубликатов %s" % (DUPLICATED))
186 hash = hashlib.md5(file_as_bytes(open(rar_name, 'rb'))).hexdigest()
187 #print(hash)
188 query = ("SELECT * FROM hash_arc where hash = '%s'" % hash)
189 data = {}
190 try:
191 mysql_cursor.execute(query)
192 # conn.commit()
193 except Exception as e:
194 print(e)
195 for data in cursor:
196 #print(data)
197 pass
198
199
200 # data = []
201
202 arch_name = os.path.basename(rar_name)
203 if len(data) < 1:
204 # no duplicate in database
205 query = "INSERT INTO hash_arc(date, filename, hash) VALUES(date(NOW()), '%s', '%s')" % (arch_name, hash)
206 try:
207 mysql_cursor.execute(query)
208 # conn.commit()
209 except Exception as e:
210 print(e)
211 else:
212 # duplicated in database
213 DUPLICATED += 1
214 if REMOVE_DUPLICATE == True:
215 os.remove(rar_name)
216 else:
217 shutil.copyfile(rar_name, os.path.join(DUPLICATE_DIR, fname))
218 print(arch_name + ' уже ÑущеÑтвует в БД')
219 query = "UPDATE hash_arc SET date = date(NOW()) where hash = '%s'" % (hash)
220
221 try:
222 mysql_cursor.execute(query)
223 return
224 # conn.commit()
225 except Exception as e:
226 print(e)
227 return
228
229 with zipfile.ZipFile(rar_name) as archive:
230 #print(archive.namelist)
231 fname = os.path.basename(rar_name)
232 fname = os.path.splitext(fname)[0]
233 #print(fname)
234 dirname = fname
235 #print(dirname)
236 for zinfo in archive.infolist():
237 is_encrypted = zinfo.flag_bits & 0x1
238 if is_encrypted:
239 PROTECTED += 1
240 fname = (time.strftime("%d.%m__")) + fname + '.zip'
241 file_list = archive.namelist()
242 for file in file_list:
243 ext = os.path.splitext(file)[-1]
244 if ext in BAD_EXTENSIONS:
245 shutil.copyfile(rar_name, os.path.join(BAD_PROTECTED_DIR, fname))
246 if REMOVE == True:
247 # os.rename(rar_name, os.path.join(PROTECTED_DIR, fname))
248 os.remove(rar_name)
249 return
250
251 shutil.copyfile(rar_name, os.path.join(PROTECTED_DIR, fname))
252 if REMOVE == True:
253 #os.rename(rar_name, os.path.join(PROTECTED_DIR, fname))
254 os.remove(rar_name)
255 return
256 # if REMOVE == True:
257 # os.remove(rar_name)
258 #return
259 # if archive.needs_password():
260 # PROTECTED += 1
261 # fname = (time.strftime("%d.%m__")) + fname + '.rar'
262 # shutil.copyfile(rar_name, os.path.join(PROTECTED_DIR, fname))
263 # if REMOVE == True:
264 # #os.rename(rar_name, os.path.join(PROTECTED_DIR, fname))
265 # os.remove(rar_name)
266 # return
267 # # if REMOVE == True:
268 # # os.remove(rar_name)
269 try:
270 os.mkdir(os.path.join(TARGET_DIR, dirname))
271 except Exception as e:
272 print(dirname + " exists")
273 try:
274 #shutil.rmtree(os.path.join(TARGET_DIR, dirname))
275 archive.extractall(os.path.join(TARGET_DIR, dirname), pwd=password)
276 except Exception as e:
277 print(e)
278 DAMAGED += 1
279 if REMOVE == True:
280 # shutil.rmtree(os.path.join(TARGET_DIR, dirname))
281 print("Removing " + rar_name)
282 os.remove(rar_name)
283 return
284 if REMOVE == True:
285 os.remove(rar_name)
286 except Exception as e:
287 DAMAGED += 1
288 print(e)
289 if REMOVE == True:
290 os.remove(rar_name)
291 print("Removing " + rar_name)
292
293
294
295DB_HOST = 'localhost'
296DB_NAME = 'test'
297DB_USER = 'root'
298DB_PASS = ''
299
300conn = pymysql.connect(host=DB_HOST,
301 database=DB_NAME,
302 user=DB_USER,
303 password=DB_PASS)
304if conn.open:
305 print('Connected to MySQL database')
306else:
307 print("Can't connect to database")
308 sys.exit()
309
310conn.autocommit = False
311cursor = conn.cursor();
312
313files_path = []
314for root, dirs, files in os.walk(ARCHTIVE_DIR):
315 for name in files:
316 files_path.append(os.path.join(root, name))
317 # print("Processing " + os.path.join(root, name))
318# print(files_path)
319TOTAL_FILES = len(files_path)
320for file in files_path:
321 print("Processing " + file)
322 if file.lower().find('.rar') != -1:
323 process_rar(file, cursor)
324 elif file.lower().find('.zip') != -1:
325 process_zip(file, cursor)
326 else:
327 print("Not archive")
328
329print("Ð’Ñего обработано файлов: %s из %s (%s%%)" % (TOTAL_PROCCESSED, TOTAL_FILES, int(CURRENT_PERC)))
330print("Защищенных архивов: %s" % (PROTECTED))
331print("Поврежденных архивов %s" % (DAMAGED))
332print("Дубликатов %s" % (DUPLICATED))
333conn.commit()
334'''
335for root, dirs, files in os.walk(ARCHTIVE_DIR):
336 for name in files:
337 try:
338 print("Processing " + os.path.join(root, name))
339 process_rar(os.path.join(root, name))
340 except Exception as e:
341 print(e)
342 continue
343'''