· 6 years ago · Dec 12, 2019, 08:18 PM
1import requests
2from requests import get
3from bs4 import BeautifulSoup
4import sys
5import shutil
6import os
7import json
8import configparser
9
10def properName(field_name):
11
12 countryDict = {'Sweden' : 'NOR',
13 'Finland' : 'NOR',
14 'Norway' : 'NOR'
15 }
16
17 try:
18 field_name = countryDict[field_name.lower()]
19 return field_name
20 except:
21 return field_name
22
23def getBluray(link):
24
25 tempDict = {}
26
27 #Query BluRay.com Page
28 bluray_soup = BeautifulSoup(requests.get(blurayLink).text, 'html.parser')
29 title_raw = bluray_soup.find("div", id="movie_info")
30
31 #Get the film title for the raw string
32 tempDict['title'] = title_raw.contents[1].get_text()
33 tempDict['4k'] = False
34
35 #Check if the page is for a 4K Film, currently not used
36 if "4K" in tempDict['title']:
37 tempDict['4k'] = True
38 tempDict['title'] = tempDict['title'][:-2].strip()
39
40 country_raw = bluray_soup.find("a", class_="black noline")
41 tempDict['country'] = properName(country_raw.next_sibling.get('title'))
42
43 movie_info_raw = bluray_soup.find("span", class_="subheading grey")
44 tempDict['movie_distributor'] = properName(movie_info_raw.get_text().split("|")[0].strip())
45 tempDict['year'] = movie_info_raw.get_text().split("|")[1].strip()
46
47 return tempDict
48
49def getIMDB(bluray_title, bluray_year):
50 tempDict = {}
51 template_url = "https://www.imdb.com/find?ref_=nv_sr_fn&q="
52 base_url="https://www.imdb.com"
53
54 #create the string to add to url to search IMDB
55 title_split = bluray_title.split()
56 title_split.append(bluray_year)
57 search_string = '+'.join(title_split)
58 search_url = template_url + search_string
59
60
61 #IMDB Search Page Soup
62 imdb_search_soup = BeautifulSoup(requests.get(search_url).text, 'html.parser')
63 movie_url = base_url + imdb_search_soup.find("tr", class_="findResult odd").contents[3].contents[1].get('href')
64 tempDict['movie_url'] = movie_url.split("?")[0]
65 movie_code = movie_url.split("/")[4]
66
67 imdb_soup = BeautifulSoup(requests.get(movie_url).text,'html.parser')
68 #Full Summary sometimes cutoff on main page
69 summary_soup = BeautifulSoup(requests.get(movie_url+"plotsummary").text,'html.parser')
70
71 #more trusted than blu-ray year
72 tempDict['year'] = imdb_soup.find("span", id="titleYear").get_text()[1:-1]
73
74 plot_summary_raw = summary_soup.find("ul", class_="ipl-zebra-list", id="plot-summaries-content")
75 tempDict['plot_summary'] = plot_summary_raw.contents[1].contents[1].get_text().strip()
76
77 director_raw = imdb_soup.find("div", class_="credit_summary_item")
78 tempDict['director'] = director_raw.get_text().split("\n")[2].strip()
79
80 #akas.imdb.com to get foreign title, if different
81 akas_link = "http://akas.imdb.com/title/" + movie_code
82 akas_soup = BeautifulSoup(requests.get(akas_link).text,'html.parser')
83 foreign_title_raw = akas_soup.find("div", class_="title_wrapper")
84 foreign_title_formatted = foreign_title_raw.contents[1].get_text().strip()[:-6].strip()
85
86 #combining the foreign title with english title
87 if foreign_title_formatted != bluray_title:
88 tempDict['thread_movie_title'] = foreign_title_formatted + " (AKA: " + bluray_title + ")"
89 else:
90 tempDict['thread_movie_title'] = bluray_title
91 return tempDict
92
93
94def getRottenTomatoes(bluray_title, imdb_year):
95 tempDict = {}
96 #Need to check for movie_name_year first and if 404 then movie_name
97 template_url = "https://www.rottentomatoes.com/m/"
98 bluray_title_split = bluray_title.split()
99 tempDict['bluray_title_formatted'] = "_".join(bluray_title_split)
100 base_movie_url = template_url+tempDict['bluray_title_formatted']
101 tempDict['movie_url'] = base_movie_url+"_"+imdb_year
102
103 page_results = BeautifulSoup(requests.get(tempDict['movie_url']).text, 'html.parser')
104 page_title = page_results.find('title')
105 #Checks to see if https://www.rottentomatoes.com/m/movie_name_year gives 404, and if so, change to without year
106 if(page_title.text == "Rotten Tomatoes: Movies - Rotten Tomatoes"):
107 tempDict['movie_url'] = base_movie_url
108 return tempDict
109
110def getMoviePoster(bluray_title, bluray_title_underscored, imdb_year):
111 site_url = "https://www.themoviedb.org"
112 poster_search_url = "/images/posters"
113 #url for searching
114 search_url_start = "/search/movie?query="
115 search_url_end = "&language=en-US"
116 poster_search_url_end = "?language=en-US"
117 #chage to make into A%20Star%20Is%20Born
118 bluray_title_split = bluray_title.split()
119 bluray_title_formatted = "%20".join(bluray_title_split)
120 #Used to search for year
121 year_search_string = "%20y%3A"
122 search_url = site_url+search_url_start+bluray_title_formatted+year_search_string+imdb_year+search_url_end
123
124 page_results = BeautifulSoup(requests.get(search_url).text, 'html.parser')
125 #get first movie cards content
126 movie_soup = page_results.find("div", class_ = "image_content")
127 #get the link to movie page
128 movie_link = movie_soup.a.get("href").split("?")[0]
129 movie_url = site_url+movie_link+poster_search_url+poster_search_url_end
130
131 #open up movie page
132 movie_page_soup = BeautifulSoup(requests.get(movie_url).text, 'html.parser')
133 poster_url = movie_page_soup.find("div", class_="image_content").a.get("href")
134
135 resp = requests.get(poster_url, stream=True)
136 # Open a local file with wb ( write binary ) permission.
137 file_name=bluray_title_underscored+'.jpg'
138 local_file = open(file_name, 'wb')
139 # Set decode_content value to True, otherwise the downloaded image file's size will be zero.
140 resp.raw.decode_content = True
141 # Copy the response stream raw data to local image file.
142 shutil.copyfileobj(resp.raw, local_file)
143 full_file_path = [os.path.abspath(file_name)]
144 return full_file_path
145
146def readFiles(files, key = 'image[]'):
147 """ Open files for Requests payload
148 Keyword arguments:
149 files -- list of files (full paths)
150 key -- Requests payload form field name
151 """
152 result = []
153 for f in files:
154 result.append((key, (os.path.basename(f), open(f, 'rb'))))
155 return result
156
157def largestFiles(files, limit = 8):
158 """ Trims the files list to the specified limit, discarding the smallest files from the list
159 Thanks to LeFirstTimer for the suggestion and sample code!
160 Keyword arguments:
161 files -- list of files (full paths)
162 limit -- Maximum number of files
163 """
164 if len(files) <= limit:
165 return files
166 pairs=[]
167 for file in files:
168 size = os.path.getsize(file)
169 pairs.append((file,size))
170 pairs.sort(key = lambda s: s[1], reverse=True)
171 pairs = pairs[:limit]
172 pairs.sort(key = lambda s: s[0])
173 return [x[0] for x in pairs]
174
175def uploadToAIMG(bluray_title, imdb_year, files, api_key, gallery_name):
176 """ Upload screenshots to AIMG
177 Keyword arguments:
178 key -- AIMG API key
179 title -- Gallery title
180 file - list of images (full paths)
181 """
182 payload = {
183 'apikey': api_key,
184 'galleryid': 'new',
185 'gallerytitle': gallery_name,
186
187 }
188 s = requests.Session()
189 req = requests.Request(
190 'POST',
191 'https://img.awesome-hd.me/api/upload',
192 data=payload,
193 files=readFiles(files)
194 )
195 prepped = req.prepare()
196 resp = s.send(prepped)
197 if resp.status_code != 200:
198 raise ValueError('Error code from AIMG: ' + str(resp.status_code))
199 return resp.json()
200
201def aimgLink(response):
202 return response['files'][0]['directimage']
203
204
205def generateTemplate(thread_movie_title, director, imgLink, imdb_link, rt_link, bluray_link, plot_summary, movie_distributor, country, imdb_year, movie_title):
206 template = """
207[quote][align=center][size=7][b]%s[/b][/size]
208[size=5]by %s[/size][/align][/quote]
209[align=center][img]%s[/img][/align]
210[align=center][quote][size=4][b][url=%s][color=#008b46]IMDb[/color][/url] | [url=%s][color=#BF0000]Rotten[/color][/url] | [url=%s][color=#00468b]BluRay[/color][/url] | [url=][color=#f3e800]Discuss[/color][/url][/b][/size][/quote][/align]
211[align=center][size=3][b][color=#8b0000]Plot[/color][/b][/size][quote] %s
212[/quote][/align]
213[align=center][size=3][b][color=#8b0000]Remux Information[/color][/b][/align][quote][*][b]Name: [/b]
214[*][b]Source: [/b]
215[*][b]Edition: [/b] %s | %s
216[*][b]Video: [/b] TBD
217[*][b]Audio: [/b] TBD
218[*][b]Subtitles: [/b] TBD
219[*][b]Chapters: [/b]TBD (TBD)[/quote]
220[align=center][size=3][b][color=#8b0000]Notes[/color][/b][/align][quote][/quote]
221
222[align=center][size=3][b][color=#8b0000]Additional Information[/color][/b][/size][/align][quote][*][b]BDInfo:[/b]
223[spoiler][align=left]
224
225[/align][/spoiler][*][b]Logs:[/b]
226[spoiler][align=left]
227
228[/align][/spoiler][*][b]Mediainfo:[/b]
229[spoiler][align=left]
230
231[/align][/spoiler][/quote]
232[align=center][size=3][b][color=#8b0000]Screenshots[/color][/b][/size][/align]
233[align=center][spoiler]Screenshots[/align]""" % (thread_movie_title, director, imgLink, imdb_link, rt_link, bluray_link, plot_summary, movie_distributor, country)
234
235 f = open(movie_title + " (" + imdb_year + ").txt", "w+")
236 f.write(template)
237 f.close()
238
239
240
241#Testing Hardcoded Link
242# blurayLink = "https://www.blu-ray.com/movies/Vampire-Girl-vs-Frankenstein-Girl-Blu-ray/13951/"
243# blurayLink = "https://www.blu-ray.com/movies/A-Star-Is-Born-Blu-ray/217109/"
244#Testing Command Line Argument
245blurayLink = sys.argv[1]
246
247config = configparser.ConfigParser()
248config.read("conf.txt")
249api_key = config['user_settings']['api_key']
250gallery_name = config['user_settings']['gallery_name']
251delete_poster = config['user_settings']['delete_poster']
252
253
254blurayDict = getBluray(blurayLink)
255bluray_title = blurayDict['title']
256imdbDict = getIMDB(bluray_title, blurayDict['year'])
257imdb_year = imdbDict['year']
258rtDict = getRottenTomatoes(bluray_title, imdb_year)
259file_name = getMoviePoster(bluray_title, rtDict['bluray_title_formatted'], imdb_year)
260testResult = largestFiles(file_name)
261aimgresponse = uploadToAIMG(bluray_title, imdb_year, testResult, api_key, gallery_name)
262imgLink = aimgLink(aimgresponse)
263generateTemplate(imdbDict['thread_movie_title'], imdbDict['director'], imgLink, imdbDict['movie_url'], rtDict['movie_url'], blurayLink, imdbDict['plot_summary'], blurayDict['movie_distributor'], blurayDict['country'], imdb_year, bluray_title)
264
265if(delete_poster):
266 os.remove(str(file_name[0]))