cUxadPsu

· 6 years ago · Apr 21, 2020, 07:32 PM
1# this lacks a TON of exception handling, I'll be updating it as I go along
2import flickrapi
3import json
4import urllib.request
5
6
7def downloader():
8    # Get this from Flickr
9    api_key = input("what is your api key?")
10    # Get this from Flickr
11    api_secret = input("what is your api secret key?")
12
13    # This is the album's URL, needs to be in the following format:
14    # https://www.flickr.com/photos/XXXXXXXXX/albums/YYYYYYY
15    url = input("please enter the URL")
16    url = url.split("https://www.flickr.com/photos/")
17    user, photoset = url[1].split("/albums/")
18
19    # 2020-04-16
20    as_of_date = input("as of what picture_date are you trying to download the pictures? Please use the following "
21                       "format YYYY-MM-DD")
22
23    # Initial call to the Flickr API
24    flickr = flickrapi.FlickrAPI(api_key, api_secret)
25
26    # This is dedicated to find the number of pages for the photoset
27    page_count = flickr.photosets.getPhotos(api_key=api_key, photoset_id=photoset, user_id=user
28                                            , format='json')
29
30    page_count_json = json.loads(page_count)
31
32    # Set the current_page var to be the total number of pages in the photoset
33    current_page = page_count_json["photoset"]["pages"]
34
35    # Every loop, decrement the current_page var by 1, since it starts at the max, and keep iterating as long as the
36    # current page is > 0
37    while current_page > 0:
38
39        # Get the list of photos for the current_page, flickr's api has a limitation of 500 photos per page
40        photo_list = flickr.photosets.getPhotos(api_key=api_key, photoset_id=photoset, user_id=user
41                                                , format='json', page=current_page)
42
43        photo_list_json = json.loads(photo_list)
44
45        # for every photo in photo_list_json
46        for photo_index, photo in enumerate(photo_list_json["photoset"]["photo"], 1):
47            # this call gets the info of each actual_picture, this has the picture_date captured attribute that I need
48            picture_date = flickr.photos.getInfo(api_key=api_key, photo_id=photo["id"], format='json')
49
50            # for some reason, the folks at flickr thought it a good idea to add the URL for the actual_picture in a
51            # .getSizes() function? Very confusing naming
52            actual_picture = flickr.photos.getSizes(api_key=api_key, photo_id=photo["id"], format='json')
53
54            picture_date_json = json.loads(picture_date)
55            actual_picture_json = json.loads(actual_picture)
56
57            # the original size is always the last element of the size list, and it's always 15, this gets the last item
58            og_pic_url = actual_picture_json["sizes"]["size"][-1]["source"]
59
60            taken_date = picture_date_json["photo"]["dates"]["taken"]
61            # I need to put some error handling here
62            if taken_date.startswith(as_of_date):
63                print(picture_date_json["photo"])
64                # I need to add a directory path, saving a bunch of 10MB+ pictures to my small SSD is a no go
65                fullname = picture_date_json["photo"]["title"]["_content"] + "." + picture_date_json["photo"]["originalformat"]
66                # this actually downloads the pictures
67                urllib.request.urlretrieve(og_pic_url, fullname)
68            else:
69                print(str(photo_index) + " " + taken_date[:len("YYYY-MM-DD")])
70        current_page -= 1