· 7 years ago · Dec 06, 2018, 05:38 PM
1# -*- coding: utf-8 -*-
2
3import rarfile
4import os
5import shutil
6import time
7import pymysql
8import sys
9import hashlib
10
11ARCHTIVE_DIR = 'archive'
12TARGET_DIR = 'target'
13PROTECTED_DIR = 'protected'
14DAMAGED_DIR = 'damaged'
15DUPLICATE_DIR = 'duplicate'
16
17# удалÑть архивы поÑле раÑпаковки?
18REMOVE = False
19REMOVE_DUPLICATE = False
20
21if not os.path.isdir(ARCHTIVE_DIR):
22 os.mkdir(ARCHTIVE_DIR)
23if not os.path.isdir(TARGET_DIR):
24 os.mkdir(TARGET_DIR)
25if not os.path.isdir(PROTECTED_DIR):
26 os.mkdir(PROTECTED_DIR)
27if not os.path.isdir(DAMAGED_DIR):
28 os.mkdir(DAMAGED_DIR)
29if not os.path.isdir(DUPLICATE_DIR):
30 os.mkdir(DUPLICATE_DIR)
31
32TOTAL_FILES = 0
33TOTAL_PROCCESSED = 0
34DUPLICATED = 0
35DAMAGED = 0
36PROTECTED = 0
37PERC_DIFF = 10
38CURRENT_PERC = 0
39
40'''
41CREATE TABLE IF NOT EXISTS `hash_arc` (
42 `id` int(11) NOT NULL AUTO_INCREMENT,
43 `date` date NOT NULL,
44 `filename` varchar(255) NOT NULL,
45 `hash` varchar(32) NOT NULL,
46 `bad` tinyint(1) DEFAULT '0',
47 PRIMARY KEY(id)
48) ENGINE=InnoDB DEFAULT CHARSET=utf8;
49'''
50
51
52def file_as_bytes(file):
53 with file:
54 return file.read()
55
56def process_rar(rar_name, mysql_cursor, password=None):
57 #print(rar_name)
58 global TOTAL_FILES, TOTAL_PROCCESSED, DAMAGED, PROTECTED, PERC_DIFF, CURRENT_PERC, REMOVE_DUPLICATE, DUPLICATED
59 #return
60 try:
61 fname = os.path.basename(rar_name)
62 TOTAL_PROCCESSED += 1
63 perc = float(TOTAL_PROCCESSED / TOTAL_FILES) * 100
64 if perc - CURRENT_PERC >= PERC_DIFF:
65 CURRENT_PERC = perc
66 print("Ð’Ñего обработано файлов: %s из %s (%s%%)" %(TOTAL_PROCCESSED,TOTAL_FILES, int(perc)))
67 print("Защищенных архивов: %s" % (PROTECTED))
68 print("Поврежденных архивов %s" % (DAMAGED))
69 print("Дубликатов %s" % (DUPLICATED))
70 hash = hashlib.md5(file_as_bytes(open(rar_name, 'rb'))).hexdigest()
71 #print(hash)
72 query = ("SELECT * FROM hash_arc where hash = '%s'" % hash)
73 data = {}
74 try:
75 mysql_cursor.execute(query)
76 # conn.commit()
77 except Exception as e:
78 print(e)
79 for data in cursor:
80 #print(data)
81 pass
82
83 arch_name = os.path.basename(rar_name)
84 if len(data) < 1:
85 # no duplicate in database
86 query = "INSERT INTO hash_arc(date, filename, hash) VALUES(date(NOW()), '%s', '%s')" % (arch_name, hash)
87 try:
88 mysql_cursor.execute(query)
89 # conn.commit()
90 except Exception as e:
91 print(e)
92 else:
93 # duplicated in database
94 DUPLICATED += 1
95 if REMOVE_DUPLICATE == True:
96 os.remove(rar_name)
97 else:
98 shutil.copyfile(rar_name, os.path.join(DUPLICATE_DIR, fname))
99 print(arch_name + ' уже ÑущеÑтвует в БД')
100 query = "UPDATE hash_arc SET date = date(NOW()) where hash = '%s'" % (hash)
101
102 try:
103 mysql_cursor.execute(query)
104 return
105 # conn.commit()
106 except Exception as e:
107 print(e)
108 return
109
110 with rarfile.RarFile(rar_name) as archive:
111 #print(archive.namelist)
112 fname = os.path.basename(rar_name)
113 fname = os.path.splitext(fname)[0]
114 #print(fname)
115 dirname = fname
116 #print(dirname)
117 #return
118 if archive.needs_password():
119 PROTECTED += 1
120 fname = (time.strftime("%d.%m__")) + fname + '.rar'
121 shutil.copyfile(rar_name, os.path.join(PROTECTED_DIR, fname))
122 if REMOVE == True:
123 #os.rename(rar_name, os.path.join(PROTECTED_DIR, fname))
124 os.remove(rar_name)
125 return
126 # if REMOVE == True:
127 # os.remove(rar_name)
128 try:
129 os.mkdir(os.path.join(TARGET_DIR, dirname))
130 except Exception as e:
131 print(dirname + " exists")
132 try:
133 #shutil.rmtree(os.path.join(TARGET_DIR, dirname))
134 archive.extractall(os.path.join(TARGET_DIR, dirname), pwd=password)
135 except Exception as e:
136 print(e)
137 DAMAGED += 1
138 if REMOVE == True:
139 # shutil.rmtree(os.path.join(TARGET_DIR, dirname))
140 print("Removing " + rar_name)
141 os.remove(rar_name)
142 return
143 if REMOVE == True:
144 os.remove(rar_name)
145 except Exception as e:
146 DAMAGED += 1
147 print(e)
148 if REMOVE == True:
149 os.remove(rar_name)
150 print("Removing " + rar_name)
151
152DB_HOST = 'localhost'
153DB_NAME = 'test'
154DB_USER = 'root'
155DB_PASS = ''
156
157conn = pymysql.connect(host=DB_HOST,
158 database=DB_NAME,
159 user=DB_USER,
160 password=DB_PASS)
161if conn.open:
162 print('Connected to MySQL database')
163else:
164 print("Can't connect to database")
165 sys.exit()
166
167conn.autocommit = False
168cursor = conn.cursor();
169
170files_path = []
171for root, dirs, files in os.walk(ARCHTIVE_DIR):
172 for name in files:
173 files_path.append(os.path.join(root, name))
174 # print("Processing " + os.path.join(root, name))
175# print(files_path)
176TOTAL_FILES = len(files_path)
177for file in files_path:
178 print("Processing " + file)
179 process_rar(file, cursor)
180
181print("Ð’Ñего обработано файлов: %s из %s (%s%%)" % (TOTAL_PROCCESSED, TOTAL_FILES, int(CURRENT_PERC)))
182print("Защищенных архивов: %s" % (PROTECTED))
183print("Поврежденных архивов %s" % (DAMAGED))
184print("Дубликатов %s" % (DUPLICATED))
185conn.commit()
186'''
187for root, dirs, files in os.walk(ARCHTIVE_DIR):
188 for name in files:
189 try:
190 print("Processing " + os.path.join(root, name))
191 process_rar(os.path.join(root, name))
192 except Exception as e:
193 print(e)
194 continue
195'''