XXmLCJ43

· 6 years ago · May 06, 2020, 04:56 AM
1# e6py v2.0 4-9-2020 by OregonTrail on e621
2# modify it, redistribute it, do whatever.
3
4import requests, json, time, sys, os
5
6print("e6py v2.0 4-3-2020 by OregonTrail on e621")
7
8### CREDENTIALS ###
9
10creds = False
11
12if os.path.exists("credentials.txt"):
13    with open("credentials.txt", "r") as cf:
14        ca = cf.read().splitlines()
15
16    if len(ca) < 2:
17        print("Failed to load credentials: not enough lines!")
18    elif ca[0] == '\n' or ca[1] == '\n':
19        print("Failed to load credentials: one or more lines is blank!")
20    elif (len(ca[0]) < 2 or len(ca[0]) > 20) or not len(ca[1]) == 24:
21        print("Failed to load credentials: Username or API key have incorrect lengths!")
22    else:
23        print("Loaded username and API key from file.")
24        creds = True
25
26###################
27
28print("\nEnter tags to crawl for posts.\n")
29
30query = input("Search: ")
31
32if not query:
33    sys.exit()
34
35baseURL = 'https://e621.net/posts.json'
36headers = {'User-Agent': 'e6Py/2.0 (by OregonTrail on e621)'}
37params = {'tags': query, 'limit': '320', 'page': None}  # number of posts per batch, won't go higher
38urls = []
39minDelta = 1.0  # minimum time in seconds between API calls, do not set this to anything lower than 0.5.
40
41requestsMade = 0
42addedPosts = 0
43ignoredPosts = 0
44totalPosts = 0
45downloads = 0
46
47### PROCESS ###
48
49while True:
50    with requests.Session() as s:
51        tick = time.perf_counter()
52        try:
53            if creds:
54                r = s.get(baseURL, headers=headers, params=params, auth=(ca[0], ca[1]))
55            else:
56                r = s.get(baseURL, headers=headers, params=params)
57            r.raise_for_status()
58        except requests.exceptions.HTTPError as e:
59            raise SystemExit(e)
60        except requests.exceptions.ConnectionError as e:
61            raise SystemExit(e)
62
63        requestsMade += 1;
64        print("\n[" + str(requestsMade) + "] " + r.url)
65        j = json.loads(r.text)
66
67        if not len(j['posts']):  #  if there are no posts to process
68            if not len(urls) and not ignoredPosts:  # if there weren't any posts in the first place
69                print("\nNo posts matched your search.")
70                input("Press Enter to exit.")
71                sys.exit()
72            else:  # or if there aren't any more posts to process
73                print("\nTotal API calls made: " + str(requestsMade))
74                print("Added posts: " + str(addedPosts))
75                print("Ignored posts: " + str(ignoredPosts))
76                print("Total posts processed: " + str(totalPosts))
77
78                if ignoredPosts:  # if not every post found is downloadable without an account
79                    print("\nSome posts were ignored. To download these, supply your username")
80                    print("and API key on the first and second lines of a text file called")
81                    print("\"credentials.txt\" and restart this script.")
82                    print("\nTo generate an API key, log in to e621.net, and")
83                    print("go to Account > My Profile > API Key > Generate.")
84                break
85
86        for p in j['posts']:
87            if not p['file']['url']:  # if post is not downloadable
88                ignoredPosts += 1
89            else:
90                urls.append(p['file']['url'])
91                addedPosts += 1
92
93            params['page'] = 'b' + str(p['id'])  # set to current post ID, end result is it will be set to the lowest ID in the batch for use in next API call
94
95        totalPosts += len(j['posts'])
96        print("Posts so far: " + str(totalPosts))
97        tock = time.perf_counter()
98
99        if (tock - tick) < minDelta:  # API call rate limiting
100            time.sleep(minDelta - (tock - tick))
101
102###############
103
104### DOWNLOAD ###
105
106if addedPosts:
107    choice = input("\nDownload these posts? [y/N] ")
108
109    # sanitization
110    invalidCharacters = ['\"', '*', '<', '>', '?', '\\', '|', '/', ':']
111    invalidFilenames = ["con", "prn", "aux", "nul", "com0", "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9", "lpt0", "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9"]
112
113    for c in invalidCharacters:
114        query = query.replace(c, '_')
115
116    if len(query) == 3 or len(query) == 4:
117        for f in invalidFilenames:
118            if query == f:
119                query = '_' + f + '_'
120    #
121
122    if choice == 'y' or choice == 'Y':
123        if not os.path.exists(query):
124            os.mkdir(query)
125
126        with requests.Session() as s:
127            for u in urls:
128                downloads += 1
129                filename = u.split('/')[6]
130                if not os.path.exists(query + '/' + filename):
131                    print("[" + str(downloads) + "/" + str(addedPosts) + "] " + u)
132                    d = s.get(u, headers=headers)
133                    with open(query + '/' + filename, "wb") as f:
134                        f.write(d.content)
135                else:
136                    print("[" + str(downloads) + "/" + str(addedPosts) + "] file already exists, skipping.")
137else:
138    print("\nNothing to download.")
139    input("Press Enter to exit.")
140
141################