· 6 years ago · Dec 04, 2018, 06:02 AM
1'''
2This library is a collection of Python 3.71 functions to download and archive images
3from tumblr.com, before it is too late. You can grab all the images from a specific
4tumblr, or archive your own Likes. Pic files are saved with a filename formatted like
5mySFWtumblr-1234567-kitty-cat-cute.jpg, where 1234567 is the post ID and kitty-cat-cute
6are tags. It may or may not work with Python versions earlier than Python 3.71.
7
80. Get the latest version of Python and have some hazy understanding of how it works.
9 https://www.python.org/
10
111. Get the pytumblr library and install somewhere in your python path https://github.com/tumblr/pytumblr.
12 You want the pytumblr directory there not the pytumblr-master.
13
142. Get OAuth credentials/keys here: https://www.tumblr.com/docs/en/api/v2
15 The four keys are strings of 50 chars, consumer_key, consumer_secret, oauth_token, oauth_token_secret
16 in that order. Each tumblr you have has its own set of keys. You do this while logged in on a browser
17 to the particular tumblr in question. This is a requirement to call the tumblr API.
18
19 Edit this file, or preferably, create a separate file to contain your actual keys below.
20
213. Save this file as alboget.py to a directory in your Python path.
22
234. Start python console. Import os and alboget. Navigate to the directoy that you want your incoming pics
24 to be saved, e.g. >>>os.chdir('C:\\TUMBLR'). Otherwise the files will be saved to the CWD.
25
265. For your tumblr at mySFWtumblr.tumblr.com, you can save your Liked pics like this
27
28 >>>alboget.updateLikes('mySFWtumblr', 0)
29
30 or
31
32 >>>alboget.updateLikes('mySFWtumblr', 3560)
33
34 where 3560 was the number of Likes you had when you last did an update.
35
366. To get the post pics from your own or any other tumblr, use
37
38 >>>alboget.picScrape('mySFWtumblr', 0, 'mySFWtumblr')
39
40 or
41
42 >>>alboget.picScrape('female-presenting-nipples', 0, 'mySFWtumblr')
43
44 where 0 will get all available pics (careful) and another number will get pics back to
45 the post count specified. You have to use YOUR OWN tumblr name in the third field, which tells
46 the function which keys to use.
47
48 This backwards seeming behaviour was put in to make it easier to
49 do incremental backups of both Likes and Posts, and the directories created for the saved data have
50 names like female-presenting-nipples-44560, where 44560 was the most recent post count when called.
51
527. Tumblr Limitations. In practice, tumblr has changed the way the API works so that it will only
53 effectively fetch the most recent ~1000 posts from one's Likes. This is not documented. The
54 console at https://api.tumblr.com/console also is not 100% faithful to reality. It does seem that the
55 posts function still works backward into the arbitrary past. So when calling the updateLikes function,
56 you should specify the count field as a number about 1000 less than your current Likes count. Also, the
57 API allows only 1000 calls per hour, and 5000 calls per day, however each call fetches 20 posts, so you can
58 get up to 20,000 pics/hour. The pic file download from tumblrs servers is slow, and I suspect
59 that they throttle this speed once they detect that a single IP is grabbing many pics per minute. If you
60 have multiple tumblrs and multiple key sets, that will help to get around this limit.
61
628. Happy downloading.
63
64'''
65
66import os
67import re
68import urllib
69from urllib.parse import urlparse
70import requests
71import pytumblr
72
73# Edit the strings below to put in your actual keys, and tumblr names. These are dummies.
74# You can create keys for as many tumblrs as you own.
75keys = {'mySFWtumblr': ['XMKyiiypJP0Kz5EkUOKVn0dmqGBBSezxSFEgJCBghRUqizGtu3',
76 'lmoBq8zLwe99YqGHn0rJYilQHiwKcPkXJQwLJxQzWiRR5zj8xV',
77 'NiPZMaurMnhAEinHEiXS7ncguRkgCb36Asu3C9IJpgX8LrZ0Dv',
78 'mu0vgW6qQEfMqS42kAthbex9Eq54sDSM6ME6YxTLMfLJPJ9brg'],
79 'myPORNtumblr': ['izljviYTtjTZJc9ftvB0W1y66fVQC2O7IHuce8GPH5U4fCb4FX',
80 'YEGXrrigdSpQbwE1GrgwBWodlWJ3fifYScBVkpUSSCTwxLW6yR',
81 'kWpNZ5v60CtYbOHIE6aNUogw2UgcIJ1jnpaHn2GoQZQg1YInnK',
82 'x675q63ESyypmXJgjMIyWokvStEHriVMruQ9dIWElFcD5UDRmt']}
83
84
85tumblrrootdir = os.getcwd() #you can hard code your preferred dir here if you like
86global client
87false = False; true = True; null = None;
88
89def cleanFN(filename):
90 return( re.sub("[^a-zA-Z0-9-_]+", "",filename))
91
92def initclient(tname):
93 global client
94 client = pytumblr.TumblrRestClient(*keys[tname])
95
96def namesandURLsFromPosts(postlist):
97 namesURLs = []
98 for i in range(len(postlist)):
99 apost = postlist[i]
100 blogname = apost['blog_name']
101 postid = str(apost['id'])
102 filepre = blogname + '-' + postid
103 if apost['type'] == 'photo':
104 atag = '-'.join(apost['tags']).replace(' ', '_')
105 atag = atag[0:50]
106 if len(atag) > 0:
107 atag = '-' + atag
108 if len(apost['photos']) == 1:
109 urlfetch = apost['photos'][0]['original_size']['url']
110 aurl = urlparse(urlfetch)
111 abase, aext = os.path.splitext(aurl.path) #if url has no file extension this might not work
112 pic = cleanFN(filepre + atag) + aext
113 namesURLs.append([urlfetch, pic])
114 else:
115 for j in range(len(apost['photos'])):
116 urlfetch = apost['photos'][j]['original_size']['url']
117 aurl = urlparse(urlfetch)
118 abase, aext = os.path.splitext(aurl.path)
119 pic = cleanFN(filepre + atag + '-' + str(j+1).zfill(2)) + aext
120 namesURLs.append([urlfetch, pic])
121 return(namesURLs)
122
123def getPosts(tumblrname, mylimit, myoffset):
124 global client
125 tumresp = client.posts(tumblrname + '.tumblr.com',limit=mylimit, offset=myoffset)
126 if ('posts' in tumresp):
127 out = namesandURLsFromPosts(tumresp['posts'])
128 else:
129 print('tumblr call fail at getPosts(' + tumblrname + 'myoffset =' + str(myoffset))
130 print(tumresp)
131 out = []
132 return(out)
133
134def getSelfLiked(mylimit, myoffset):
135 global client
136 tumresp = client.likes(limit=mylimit, offset=myoffset)
137 if ('liked_posts' in tumresp):
138 out = namesandURLsFromPosts(tumresp['liked_posts'])
139 if out == []:
140 print('empty list returned from getSelfLiked at (mylimit, myoffset) ' + str(mylimit) + ' '+ str(myoffset))
141 else:
142 print('tumblr call fail at getSelfLiked at myoffset = ' + str(myoffset))
143 print(tumresp)
144 out = []
145 return(out)
146
147def getNUSincePrevious(tumblrname, previousCount): # if previouscount = 0 get them all
148 global client
149 currentPostCount = client.blog_info(tumblrname + '.tumblr.com')['blog']['posts']
150 print('For ' + tumblrname + ' current post count = ' + str(currentPostCount))
151 offs = 0
152 numtoget = currentPostCount - previousCount
153 nulist = []
154 for offs in range(0,numtoget, 20):
155 nulist.extend(getPosts(tumblrname, 20, offs))
156 return(nulist)
157
158def getLikedNUSincePrevious(previousCount): # if previouscount = 0 get them all
159 global client
160 cinfo = client.info()
161 currentLikedCount = cinfo['user']['likes']
162 print('For ' + cinfo['user']['name'] + ' current liked count = ' + str(currentLikedCount))
163 offs = 0
164 numtoget = currentLikedCount - previousCount
165 nulist = []
166 for offs in range(0,numtoget, 20):
167 nulist.extend(getSelfLiked(20, offs))
168 return(nulist)
169
170def fetchPicsToCWD(NUlist):
171 badcalls = []
172 for q in range(len(NUlist)):
173 a = NUlist[q]
174 try:
175 r = requests.get(a[0],timeout=5.0)
176 except requests.exceptions.Timeout:
177 badcalls.append(a)
178 print('requests.exceptions.Timeout at getting '+ a[0])
179 except Exception as e:
180 print('Caught an exception as some sub call of fetchPicsToCWD ')
181 print(e)
182 else:
183 if r.status_code != requests.codes.ok:
184 badcalls.append(a)
185 print('Bad call made with r.status_code = ' + str(r.status_code) + '\n while getting '+ a[0])
186 else:
187 #need to check if the filename a[1] already exists!!!
188 with open(a[1], 'wb') as f:
189 f.write(r.content)
190 print(str(len(badcalls)) + ' bad calls made in this pass of pic gets')
191 return(badcalls)
192
193def updateLikes(mytumblrname, previous):
194 if mytumblrname in keys:
195 global client
196 initclient(mytumblrname)
197 cinfo = client.info()
198 currentLikedCount = cinfo['user']['likes']
199 os.chdir(tumblrrootdir)
200 newLikesDir =mytumblrname + '-LIKES-' + str(currentLikedCount)
201 try:
202 os.mkdir(newLikesDir)
203 except FileExistsError:
204 print('directory already exists, and it is fine')
205 os.chdir(newLikesDir)
206 print('starting to get ' + mytumblrname + ' photolinks likes ' + str(currentLikedCount) + ' down to ' + str(previous))
207 nu = getLikedNUSincePrevious(previous)
208 print('starting to download '+ str(len(nu)) + ' photo files to ' + newLikesDir)
209 bc = fetchPicsToCWD(nu)
210 if len(bc) > 0:
211 print('Starting second pass on bad calls')
212 bc2 = fetchPicsToCWD(bc)
213 print('updateLikes completed.')
214 os.chdir(tumblrrootdir)
215 else:
216 print('No keys in tumblrkeys.py available for ', mytumblrname)
217
218def picScrape(tumblrname, previous, myKeyName): #myKeyName is your blog credentials to use
219 if myKeyName in keys:
220 global client
221 initclient(myKeyName)
222 currentPostCount = client.blog_info(tumblrname + '.tumblr.com')['blog']['posts']
223 os.chdir(tumblrrootdir)
224 newPostsDir =tumblrname + '-' + str(currentPostCount)
225 try:
226 os.mkdir(newPostsDir)
227 except FileExistsError:
228 print('directory already exists, and it is fine')
229 os.chdir(newPostsDir)
230 print('starting to get ' + tumblrname + ' photolinks ' + str(currentPostCount) + ' down to ' + str(previous))
231 nu = getNUSincePrevious(tumblrname, previous)
232 print('starting to download '+ str(len(nu)) + ' photo files to ' + newPostsDir)
233 bc = fetchPicsToCWD(nu)
234 if len(bc) > 0:
235 print('Starting second pass on bad calls')
236 bc2 = fetchPicsToCWD(bc)
237 print('picScrape completed.')
238 os.chdir(tumblrrootdir)
239 else:
240 print('No keys in tumblrkeys.py available for ', myKeyName)