r3qt5gfm

· 6 years ago · Mar 29, 2020, 11:08 AM
1import os
2import re
3import csv
4import urllib.request
5import requests
6import pickle
7
8def slugify(value):
9    import unicodedata
10    value = unicodedata.normalize('NFKD', value)
11    value = re.sub('[^\w\s-]', '', value).strip()
12    value = re.sub('[-\s]+', '_', value)
13    return value
14
15titles = {}
16with open('nma_images_titles.csv') as csvfile:
17    reader = csv.reader(csvfile)
18    next(reader)
19    for row in reader:
20        folder = slugify(row[2])
21        titles[row[3]] = folder
22        path = 'rip/%s' % folder
23        if not os.path.exists(path):
24            print('making %s' % path)
25            os.makedirs(path)
26if os.path.exists('progress.pickle'):
27    progress = pickle.load(open('progress.pickle', 'rb'))
28else:
29    progress = []
30
31count = 0
32with open('nma_images.csv') as csvfile:
33    reader = csv.reader(csvfile)
34    next(reader)
35    total = sum(1 for row in reader)
36    csvfile.seek(0)
37    reader = csv.reader(csvfile)
38    next(reader)
39    for row in reader:
40        identifier, secret, _ = row[-1].split('/')[-1].split('_')
41        if identifier in progress:
42            print('[%s/%s] skipping already downloaded image' % (count, total))
43            count += 1
44            continue
45        data = requests.get('https://www.flickr.com/services/rest/?method=flickr.photos.getInfo&api_key=[PUT YOUR API KEY HERE]&photo_id=%s&secret=%s&format=json&nojsoncallback=1' % (identifier, secret)).json()
46        original = 'https://live.staticflickr.com/%s/%s_%s_o.%s' % (data['photo']['server'],  identifier, data['photo']['originalsecret'], data['photo']['originalformat'])
47        title = '%s.%s' % (slugify(data['photo']['title']['_content']),  data['photo']['originalformat'])
48        folder = titles[row[3]]
49        path = 'rip/%s/%s' % (folder, title)
50
51        print('[%s/%s] downloading (%s/%s) %s' % (count, total, folder, title, original))
52        testfile = urllib.request.URLopener()
53        testfile.retrieve(original, path)
54        progress.append(identifier)
55        count += 1
56        pickle.dump(progress, open('progress.pickle', 'wb'))