· 7 years ago · Dec 02, 2018, 04:44 PM
1import rarfile
2import os
3import shutil
4import time
5import pymysql
6import sys
7import hashlib
8
9ARCHTIVE_DIR = 'archive'
10TARGET_DIR = 'target'
11PROTECTED_DIR = 'protected'
12DAMAGED_DIR = 'damaged'
13DUPLICATE_DIR = 'duplicate'
14
15# удалÑть архивы поÑле раÑпаковки?
16REMOVE = False
17REMOVE_DUPLICATE = False
18
19if not os.path.isdir(ARCHTIVE_DIR):
20 os.mkdir(ARCHTIVE_DIR)
21if not os.path.isdir(TARGET_DIR):
22 os.mkdir(TARGET_DIR)
23if not os.path.isdir(PROTECTED_DIR):
24 os.mkdir(PROTECTED_DIR)
25if not os.path.isdir(DAMAGED_DIR):
26 os.mkdir(DAMAGED_DIR)
27if not os.path.isdir(DUPLICATE_DIR):
28 os.mkdir(DUPLICATE_DIR)
29
30TOTAL_FILES = 0
31TOTAL_PROCCESSED = 0
32DAMAGED = 0
33PROTECTED = 0
34PERC_DIFF = 10
35CURRENT_PERC = 0
36
37'''
38CREATE TABLE IF NOT EXISTS `hash_arc` (
39 `id` int(11) NOT NULL AUTO_INCREMENT,
40 `date` date NOT NULL,
41 `filename` varchar(255) NOT NULL,
42 `hash` varchar(32) NOT NULL,
43 `bad` tinyint(1) DEFAULT '0',
44 PRIMARY KEY(id)
45) ENGINE=InnoDB DEFAULT CHARSET=utf8;
46'''
47
48
49def file_as_bytes(file):
50 with file:
51 return file.read()
52
53def process_rar(rar_name, mysql_cursor, password=None):
54 #print(rar_name)
55 global TOTAL_FILES, TOTAL_PROCCESSED, DAMAGED, PROTECTED, PERC_DIFF, CURRENT_PERC, REMOVE_DUPLICATE
56 #return
57 try:
58 fname = os.path.basename(rar_name)
59 TOTAL_PROCCESSED += 1
60 perc = float(TOTAL_PROCCESSED / TOTAL_FILES) * 100
61 if perc - CURRENT_PERC >= PERC_DIFF:
62 CURRENT_PERC = perc
63 print("Ð’Ñего обработано файлов: %s из %s (%s%%)" %(TOTAL_PROCCESSED,TOTAL_FILES, int(perc)))
64 print("Защищенных архивов: %s" % (PROTECTED))
65 print("Поврежденных архивов %s" % (DAMAGED))
66 hash = hashlib.md5(file_as_bytes(open(rar_name, 'rb'))).hexdigest()
67 #print(hash)
68 query = ("SELECT * FROM hash_arc where hash = '%s'" % hash)
69 data = {}
70 try:
71 mysql_cursor.execute(query)
72 # conn.commit()
73 except Exception as e:
74 print(e)
75 for data in cursor:
76 #print(data)
77 pass
78
79 arch_name = os.path.basename(rar_name)
80 if len(data) < 1:
81 # no duplicate in database
82 query = "INSERT INTO hash_arc(date, filename, hash) VALUES(date(NOW()), '%s', '%s')" % (arch_name, hash)
83 try:
84 mysql_cursor.execute(query)
85 # conn.commit()
86 except Exception as e:
87 print(e)
88 else:
89 # duplicated in database
90 if REMOVE_DUPLICATE == True:
91 os.remove(rar_name)
92 else:
93 shutil.copyfile(rar_name, os.path.join(DUPLICATE_DIR, fname))
94 print(arch_name + ' уже ÑущеÑтвует в БД')
95 query = "UPDATE hash_arc SET date = date(NOW()) where hash = '%s'" % (hash)
96
97 try:
98 mysql_cursor.execute(query)
99 return
100 # conn.commit()
101 except Exception as e:
102 print(e)
103 return
104
105 with rarfile.RarFile(rar_name) as archive:
106 #print(archive.namelist)
107 fname = os.path.basename(rar_name)
108 fname = os.path.splitext(fname)[0]
109 #print(fname)
110 dirname = fname
111 #print(dirname)
112 #return
113 if archive.needs_password():
114 PROTECTED += 1
115 fname = (time.strftime("%d.%m__")) + fname + '.rar'
116 shutil.copyfile(rar_name, os.path.join(PROTECTED_DIR, fname))
117 if REMOVE == True:
118 #os.rename(rar_name, os.path.join(PROTECTED_DIR, fname))
119 os.remove(rar_name)
120 return
121 # if REMOVE == True:
122 # os.remove(rar_name)
123 try:
124 os.mkdir(os.path.join(TARGET_DIR, dirname))
125 except Exception as e:
126 print(dirname + " exists")
127 try:
128 #shutil.rmtree(os.path.join(TARGET_DIR, dirname))
129 archive.extractall(os.path.join(TARGET_DIR, dirname), pwd=password)
130 except Exception as e:
131 print(e)
132 DAMAGED += 1
133 if REMOVE == True:
134 # shutil.rmtree(os.path.join(TARGET_DIR, dirname))
135 print("Removing " + rar_name)
136 os.remove(rar_name)
137 return
138 if REMOVE == True:
139 os.remove(rar_name)
140 except Exception as e:
141 DAMAGED += 1
142 print(e)
143 if REMOVE == True:
144 os.remove(rar_name)
145 print("Removing " + rar_name)
146
147DB_HOST = 'localhost'
148DB_NAME = 'test'
149DB_USER = 'root'
150DB_PASS = ''
151
152conn = pymysql.connect(host=DB_HOST,
153 database=DB_NAME,
154 user=DB_USER,
155 password=DB_PASS)
156if conn.open:
157 print('Connected to MySQL database')
158else:
159 print("Can't connect to database")
160 sys.exit()
161
162conn.autocommit = False
163cursor = conn.cursor();
164
165files_path = []
166for root, dirs, files in os.walk(ARCHTIVE_DIR):
167 for name in files:
168 files_path.append(os.path.join(root, name))
169 # print("Processing " + os.path.join(root, name))
170# print(files_path)
171TOTAL_FILES = len(files_path)
172for file in files_path:
173 print("Processing " + file)
174 process_rar(file, cursor)
175
176print("Ð’Ñего обработано файлов: %s из %s (%s%%)" % (TOTAL_PROCCESSED, TOTAL_FILES, int(CURRENT_PERC)))
177print("Защищенных архивов: %s" % (PROTECTED))
178print("Поврежденных архивов %s" % (DAMAGED))
179conn.commit()
180'''
181for root, dirs, files in os.walk(ARCHTIVE_DIR):
182 for name in files:
183 try:
184 print("Processing " + os.path.join(root, name))
185 process_rar(os.path.join(root, name))
186 except Exception as e:
187 print(e)
188 continue
189'''