· 6 years ago · Dec 26, 2019, 03:54 PM
1#!/usr/bin/env python3
2
3import argparse
4import errno
5import glob
6import logging
7import mimetypes
8import multiprocessing
9import os
10import shutil
11import sqlite3
12import subprocess
13import sys
14import tqdm
15
16# Fixed size for album covers in px. Some players have issues with overly large album art in MP3s
17ALBUM_ARTSIZE = 1200
18# List of MIME-Types to encode
19TRANSCODE_MIMES = [
20 "audio/flac",
21 "audio/x-flac",
22 "audio/wav",
23 "audio/x-wav",
24 "audio/pcm",
25 "audio/x-pcm"
26 ]
27# Script version. Do not change
28VERSION = "1.0"
29
30
31def compare_dict(dict1, dict2):
32 """Compare two dicts and return the unique keys"""
33 d1_keys = set(dict1.keys())
34 d2_keys = set(dict2.keys())
35 d1 = d1_keys - d2_keys
36 d2 = d2_keys - d1_keys
37 return d1, d2
38
39
40def copyfile(src, dest):
41 """Copy a file, making sure the directory exists. Return true on success, false on failure"""
42 os.makedirs(os.path.dirname(dest), exist_ok=True)
43 logging.debug(f"Copying file {os.path.basename(src)} to {os.path.dirname(dest)}")
44 try:
45 shutil.copy2(src, dest)
46 except Exception:
47 logging.warning(f"Error durnig copying of file {src}")
48
49
50def encode_mp3(src, dest, quality):
51 """Encode a MP3 file with ffmpeg"""
52 try:
53 subprocess.run(["ffmpeg", "-i", src,
54 "-q:a", str(quality),
55 # Set Album Art to fixed size
56 "-vf", "scale=" + str(ALBUM_ARTSIZE) + ":" + str(ALBUM_ARTSIZE),
57 "-y", dest],
58 stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
59 except OSError:
60 logging.error("Could not encode file {src}")
61 return False
62 return True
63
64
65def encode_opus(src, dest, quality):
66 """Encode an Opus file wiht ffmpeg"""
67 opus_map = {
68 0: "256k", 1: "225k", 2: "190k", 3: "175k", 4: "165k",
69 5: "130k", 6: "115k", 7: "100k", 8: "85k", 9: "65k"
70 }
71 try:
72 subprocess.run(["ffmpeg", "-i", src, "-b:a", str(opus_map[quality]), "-y", dest],
73 stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
74 except OSError:
75 logging.error("Could not encode file {src}")
76 return False
77 return True
78
79
80def encode_worker(args):
81 """Encode a file. Returns a tuple indicating whether the encode was successful
82 and the path of the encoded file"""
83 os.makedirs(os.path.dirname(args["target_path"]), exist_ok=True)
84 logging.debug(f"Encoding {os.path.basename(args['source_path'])}")
85 if args["output"] == "mp3":
86 if not encode_mp3(args["source_path"], args["target_path"], args["quality"]):
87 logging.error(f"Could not encode file {os.path.basename(args['source_path'])}")
88 return (False, args["source_path"])
89 elif args["output"] == "opus":
90 if not encode_opus(args["source_path"], args["target_path"], args["quality"]):
91 logging.error(f"Could not encode file {os.path.basename(args['source_path'])}")
92 return (False, args["source_path"])
93 logging.info(f"Encoded file {os.path.basename(args['source_path'])}")
94 return True, None
95
96
97def get_args():
98 parser = argparse.ArgumentParser()
99 parser.add_argument("source_path")
100 parser.add_argument("target_path")
101 parser.add_argument("-C", "--copy_files",
102 action="store_true",
103 help="Copy all files incapable of being encoded to the target path")
104 parser.add_argument("-D", "--database",
105 help="Track changes using a database at the specified path")
106 parser.add_argument("-O", "--output",
107 choices=["mp3", "opus"], default="mp3",
108 help="Set output format")
109 parser.add_argument("-Q", "--quality",
110 type=int, choices=range(0, 10), default=0,
111 help="Set output quality. 0 = Best -> 9 = Worst")
112 parser.add_argument("-R", "--remove",
113 action="store_true",
114 help="Track removal of files (only available with -D set)")
115 parser.add_argument("-l", "--loglevel",
116 choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], default="WARNING",
117 help="Set loglevel to report")
118 parser.add_argument("-p", "--progress",
119 action="store_true",
120 help="Enable fancy progress bars and other user-friendly output")
121 parser.add_argument("-t", "--threads",
122 type=int,
123 default=multiprocessing.cpu_count(),
124 help="Set ammount of threads used for encoding")
125 parser.add_argument("--version",
126 action="store_true",
127 help="Show AutoTranscode version and exit")
128 args = parser.parse_args()
129 return args
130
131
132def init_db(args):
133 new_settings = {
134 "source_path": args.source_path,
135 "target_path": args.target_path,
136 "quality": str(args.quality), # Prevents type mismatch with DB
137 "art_size": str(ALBUM_ARTSIZE),
138 "output": args.output
139 }
140 with sqlite3.connect(args.database) as con:
141 cur = con.cursor()
142 try:
143 cur.execute("SELECT Name FROM Settings")
144 except sqlite3.OperationalError:
145 # Table doesn't exist yet, db is new -> create tables
146 cur.execute("CREATE TABLE Settings("
147 "Name TEXT NOT NULL PRIMARY KEY,"
148 "Value TEXT NOT NULL)"
149 )
150 cur.execute("CREATE TABLE Files("
151 "Path TEXT NOT NULL PRIMARY KEY,"
152 "Hash TEXT)"
153 )
154 files = {}
155 logging.info("Created new database")
156 else:
157 cur.execute("SELECT Name, Value FROM Settings")
158 old_settings = dict(cur.fetchall())
159 if old_settings == new_settings:
160 # Only load the files if the settings are identical
161 # all files will be rehashed if the settings are differnt
162 cur.execute("SELECT Path, Hash FROM Files")
163 files = dict(cur.fetchall())
164 else:
165 logging.warning("Settings mode mismatch - path, hash or encode settings have changed "
166 "since the script was last run. Reinizializing DB...")
167 files = {}
168 return files
169
170
171def rm(path):
172 logging.debug(f"Removig file {path}")
173 try:
174 os.remove(path)
175 except OSError as e:
176 if e.errno == errno.ENOENT:
177 logging.warning(f"Tried removin file that no longer exists: {path}")
178 except:
179 raise
180
181
182def validate_args(args):
183 # if not args.source_path.endswith(path_char[os.name]):
184 # args.source_path = args.source_path + path_char[os.name]
185 # if not args.target_path.endswith(path_char[os.name]):
186 # args.target_path = args.target_path + path_char[os.name]
187
188 if args.remove and not args.database:
189 raise ValueError("Cannot use --remove without specifying a database (-D)")
190 if not os.path.isdir(args.source_path):
191 raise FileNotFoundError(f"Could not access {args.source_path}")
192 if not os.path.isdir(args.target_path):
193 raise FileNotFoundError(f"Could not access {args.target_path}")
194 if args.database:
195 try:
196 # Open("*", "a+") will access a file if it exists and create it if it doesnt.
197 open(args.database, 'a+')
198 except Exception:
199 raise FileNotFoundError(f"Could not access {args.database}")
200 if not shutil.which("ffmpeg"):
201 raise RuntimeError(
202 "ffmpeg not found. Please make sure that ffmpeg is installed and can be found in $PATH")
203 logging.info("Done parsing arguments")
204
205
206def main():
207 if sys.version_info < (3, 6):
208 raise RuntimeError("This script requires Python 3.6 or higher to run")
209
210 to_check = []
211 to_copy = []
212 to_encode = []
213 to_delete = []
214 new_db = {}
215 fails = []
216
217 args = get_args()
218 # Setup logging ASAP
219 logging.basicConfig(level=getattr(logging, args.loglevel), format="%(levelname)s: %(message)s")
220 if args.version:
221 print("AutoTranscode Version:", VERSION)
222 sys.exit()
223
224 validate_args(args)
225 logging.debug("Arguments parsed")
226 if args.database:
227 current_db = init_db(args)
228 logging.debug(f"Read database at {args.database}")
229 logging.info("Read configuration")
230
231 logging.debug(f"Start scanning {args.source_path}")
232 for path in glob.iglob(args.source_path + "/**", recursive=True):
233 if os.path.isfile(path):
234 if args.database:
235 if args.copy_files:
236 logging.debug(f"Queueing up {path} for hashing")
237 to_check.append(path)
238 elif mimetypes.guess_type(path)[0] in TRANSCODE_MIMES:
239 logging.debug(f"Queueing up {path} for hashing")
240 to_check.append(path)
241 else:
242 if mimetypes.guess_type(path)[0] in TRANSCODE_MIMES:
243 to_encode.append(path)
244 elif args.copy_files:
245 to_copy.append(path)
246 logging.info(f"Done scanning {args.source_path}")
247
248 # Hash all objects and put them into encode_queue/copy_queue.
249 # Also append sucessful hashes to the new db. Failures are removed later
250 if args.database:
251 logging.debug("Start comparing files to previous run")
252 for path in to_check:
253 logging.debug(f"Creating hash for {path}")
254 file_hash = str(os.path.getmtime(path))
255 try:
256 # See if the file exists in our current database
257 previous_hash = current_db[path]
258 except KeyError:
259 logging.debug(f"{os.path.basename(path)} not found in db - adding as new file")
260 new_db[path] = file_hash
261 if mimetypes.guess_type(path)[0] in TRANSCODE_MIMES:
262 to_encode.append(path)
263 elif args.copy_files:
264 to_copy.append(path)
265 continue
266 if file_hash != previous_hash:
267 logging.debug(f"Hash mismatch for {path} - queuing for encode/copy")
268 new_db[path] = file_hash
269 if mimetypes.guess_type(path)[0] in TRANSCODE_MIMES:
270 to_encode.append(path)
271 elif args.copy_files:
272 to_copy.append(path)
273 elif file_hash == previous_hash:
274 new_db[path] = file_hash
275 logging.info("Done comparing files to previous run")
276
277 # Copy non-encode files if copy is set
278 if to_copy:
279 if args.progress:
280 # Doing print() so what we get some output no matter the loglevel.
281 # --progress is on, so its fine
282 print("Copying files")
283 for path in tqdm.tqdm(to_copy):
284 target = path.replace(args.source_path, args.target_path)
285 copyfile(path, target)
286 else:
287 logging.info(f"Copying {len(to_copy)} files")
288 for path in to_copy:
289 target = path.replace(args.source_path, args.target_path)
290 copyfile(path, target)
291 logging.debug(f"Copied file {path}")
292 logging.info("Done copying files")
293
294 if to_encode:
295 transcode_args = []
296 for path in to_encode:
297 # Create a list of dicts to map to the encode workers
298 transcode_args.append({
299 "source_path": path,
300 "target_path": path.replace(args.source_path, args.target_path).replace(os.path.splitext(path)[1], "." + args.output),
301 "output": args.output,
302 "quality": args.quality,
303 })
304 with multiprocessing.Pool(args.threads) as transcode_pool:
305 if args.progress:
306 print("Encoding files")
307 for i in tqdm.tqdm(transcode_pool.imap(encode_worker, transcode_args), total=len(to_encode)):
308 # Append failed transcodes to fails
309 if not i[0]:
310 fails.append(i[1])
311 else:
312 logging.info(f"Encoding {len(transcode_args)} Files")
313 for i in transcode_pool.imap(encode_worker, transcode_args):
314 if not i[0]:
315 fails.append(i[1])
316 transcode_pool.close()
317 transcode_pool.join()
318 logging.info("Done encoding files")
319
320 if args.database:
321 logging.debug("Writing new databse")
322 # Removed failed encodes from our db. This causes any leftover file to be removed and since they will not be added
323 # to the db they will be re-encoded during the next run
324 for item in fails:
325 new_db.pop(item)
326 # Commit our new DB
327 new_settings = {
328 "source_path": args.source_path,
329 "target_path": args.target_path,
330 "quality": str(args.quality), # Prevents type mismatch with DB
331 "art_size": ALBUM_ARTSIZE,
332 "output": args.output
333 }
334 with sqlite3.connect(args.database) as con:
335 con.execute("DELETE FROM Files")
336 for key in new_db:
337 con.execute("REPLACE INTO Files VALUES (?,?)", (key, new_db[key]))
338 for key in new_settings:
339 con.execute("REPLACE INTO Settings VALUES (?,?)", (key, new_settings[key]))
340 logging.info("Saved changes to database")
341
342 if args.database:
343 to_delete = compare_dict(current_db, new_db)[0]
344 if to_delete and args.remove:
345 # All hashed files are in new_db. We then compare new_db and current_db and look for files only in current_db.
346 # These files were not found and must have thus been removed. We do the same on the target path
347 if args.progress:
348 print("Removing files")
349 for path in tqdm.tqdm(to_delete):
350 target_file = path.replace(args.source_path, args.target_path)
351 if mimetypes.guess_type(path)[0] in TRANSCODE_MIMES:
352 target_file = target_file.replace(os.path.splitext(path)[1], "." + args.output)
353 # dirname() doesn't work if the file has been deleted already. So we get the dir
354 # of the file here
355 target_dir = os.path.dirname(target_file)
356 rm(target_file)
357 if not os.listdir(target_dir):
358 logging.debug(f"Removing directory {os.path.dirname(target_dir)}")
359 os.removedirs(target_dir)
360 else:
361 logging.debug(f"Removing {len(to_delete)} files")
362 for path in to_delete:
363 target_file = path.replace(args.source_path, args.target_path)
364 if mimetypes.guess_type(path)[0] in TRANSCODE_MIMES:
365 target_file = target_file.replace(os.path.splitext(path)[1], "." + args.output)
366 # dirname() doesn't work if the file has been deleted already. So we get the dir
367 # of the file here
368 target_dir = os.path.dirname(target_file)
369 rm(target_file)
370 if not os.listdir(target_dir):
371 logging.debug(f"Removing directory {os.path.dirname(target_dir)}")
372 os.removedirs(target_dir)
373 logging.info("Done removing files")
374
375 # Print failed actions
376 if fails:
377 logging.warning("Some files could not be processed. Run the script again to retry\n"
378 f"List of files:\n{fails}")
379
380
381if __name__ == "__main__":
382 main()