· 5 years ago · May 06, 2020, 04:56 AM
1# e6py v2.0 4-9-2020 by OregonTrail on e621
2# modify it, redistribute it, do whatever.
3
4import requests, json, time, sys, os
5
6print("e6py v2.0 4-3-2020 by OregonTrail on e621")
7
8### CREDENTIALS ###
9
10creds = False
11
12if os.path.exists("credentials.txt"):
13 with open("credentials.txt", "r") as cf:
14 ca = cf.read().splitlines()
15
16 if len(ca) < 2:
17 print("Failed to load credentials: not enough lines!")
18 elif ca[0] == '\n' or ca[1] == '\n':
19 print("Failed to load credentials: one or more lines is blank!")
20 elif (len(ca[0]) < 2 or len(ca[0]) > 20) or not len(ca[1]) == 24:
21 print("Failed to load credentials: Username or API key have incorrect lengths!")
22 else:
23 print("Loaded username and API key from file.")
24 creds = True
25
26###################
27
28print("\nEnter tags to crawl for posts.\n")
29
30query = input("Search: ")
31
32if not query:
33 sys.exit()
34
35baseURL = 'https://e621.net/posts.json'
36headers = {'User-Agent': 'e6Py/2.0 (by OregonTrail on e621)'}
37params = {'tags': query, 'limit': '320', 'page': None} # number of posts per batch, won't go higher
38urls = []
39minDelta = 1.0 # minimum time in seconds between API calls, do not set this to anything lower than 0.5.
40
41requestsMade = 0
42addedPosts = 0
43ignoredPosts = 0
44totalPosts = 0
45downloads = 0
46
47### PROCESS ###
48
49while True:
50 with requests.Session() as s:
51 tick = time.perf_counter()
52 try:
53 if creds:
54 r = s.get(baseURL, headers=headers, params=params, auth=(ca[0], ca[1]))
55 else:
56 r = s.get(baseURL, headers=headers, params=params)
57 r.raise_for_status()
58 except requests.exceptions.HTTPError as e:
59 raise SystemExit(e)
60 except requests.exceptions.ConnectionError as e:
61 raise SystemExit(e)
62
63 requestsMade += 1;
64 print("\n[" + str(requestsMade) + "] " + r.url)
65 j = json.loads(r.text)
66
67 if not len(j['posts']): # if there are no posts to process
68 if not len(urls) and not ignoredPosts: # if there weren't any posts in the first place
69 print("\nNo posts matched your search.")
70 input("Press Enter to exit.")
71 sys.exit()
72 else: # or if there aren't any more posts to process
73 print("\nTotal API calls made: " + str(requestsMade))
74 print("Added posts: " + str(addedPosts))
75 print("Ignored posts: " + str(ignoredPosts))
76 print("Total posts processed: " + str(totalPosts))
77
78 if ignoredPosts: # if not every post found is downloadable without an account
79 print("\nSome posts were ignored. To download these, supply your username")
80 print("and API key on the first and second lines of a text file called")
81 print("\"credentials.txt\" and restart this script.")
82 print("\nTo generate an API key, log in to e621.net, and")
83 print("go to Account > My Profile > API Key > Generate.")
84 break
85
86 for p in j['posts']:
87 if not p['file']['url']: # if post is not downloadable
88 ignoredPosts += 1
89 else:
90 urls.append(p['file']['url'])
91 addedPosts += 1
92
93 params['page'] = 'b' + str(p['id']) # set to current post ID, end result is it will be set to the lowest ID in the batch for use in next API call
94
95 totalPosts += len(j['posts'])
96 print("Posts so far: " + str(totalPosts))
97 tock = time.perf_counter()
98
99 if (tock - tick) < minDelta: # API call rate limiting
100 time.sleep(minDelta - (tock - tick))
101
102###############
103
104### DOWNLOAD ###
105
106if addedPosts:
107 choice = input("\nDownload these posts? [y/N] ")
108
109 # sanitization
110 invalidCharacters = ['\"', '*', '<', '>', '?', '\\', '|', '/', ':']
111 invalidFilenames = ["con", "prn", "aux", "nul", "com0", "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9", "lpt0", "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9"]
112
113 for c in invalidCharacters:
114 query = query.replace(c, '_')
115
116 if len(query) == 3 or len(query) == 4:
117 for f in invalidFilenames:
118 if query == f:
119 query = '_' + f + '_'
120 #
121
122 if choice == 'y' or choice == 'Y':
123 if not os.path.exists(query):
124 os.mkdir(query)
125
126 with requests.Session() as s:
127 for u in urls:
128 downloads += 1
129 filename = u.split('/')[6]
130 if not os.path.exists(query + '/' + filename):
131 print("[" + str(downloads) + "/" + str(addedPosts) + "] " + u)
132 d = s.get(u, headers=headers)
133 with open(query + '/' + filename, "wb") as f:
134 f.write(d.content)
135 else:
136 print("[" + str(downloads) + "/" + str(addedPosts) + "] file already exists, skipping.")
137else:
138 print("\nNothing to download.")
139 input("Press Enter to exit.")
140
141################