· 7 years ago · Jan 26, 2019, 04:48 PM
1# -*- coding: utf-8 -*-
2# import config file
3import config
4
5# import libraries
6from bs4 import BeautifulSoup
7import datetime
8import urllib.request as urllib
9import sys
10import time
11import re
12import sqlite3
13
14# webdriver libraries
15from selenium import webdriver
16from selenium.webdriver.firefox.options import Options
17from selenium.webdriver.support.ui import WebDriverWait
18from selenium.webdriver.support import expected_conditions as EC
19from selenium.webdriver.common.by import By
20
21# spotipy library
22import spotipy
23import spotipy.util as util
24
25# import pylast
26import pylast
27
28# song class holds information about each song
29class Song:
30 artist = None
31 song = None
32 spotify_uri = None
33
34 def __init__(self, artist, song, spotify_uri):
35 self.artist = artist
36 self.song = song
37 self.spotify_uri = spotify_uri
38
39 def printSong(self):
40 print(self.artist, '-', self.song, ', Uri:', self.spotify_uri)
41
42##------------------------------------------------------------------------------
43## Get Date of latest sunday
44##
45## @return formatted date of last sunday as yyyymmdd
46#
47def getSundayDate():
48 today = datetime.date.today()
49 sun_offset = (today.weekday() - 6) % 7
50 sunday_of_week = today - datetime.timedelta(days=sun_offset)
51 sunday_date = sunday_of_week.strftime('%Y%m%d')
52 return sunday_date
53
54##------------------------------------------------------------------------------
55## URL Pattern
56##
57## https://fm4.orf.at/player/20190120/SSU
58## URL pattern:
59## /yyyymmdd/SSU
60## /20190120/SSU
61## SSU is just Sunny Side Up the show from 10am till 1pm
62## URL pattern changes ever day, we need to change it every week,
63## to only get sundays
64##
65## @return concatenated URL of website
66def getURLPattern():
67 return 'https://fm4.orf.at/player/' + getSundayDate() + '/SSU'
68
69##------------------------------------------------------------------------------
70## Get html source from page specified by page_url
71##
72## @return html source as beautiful soup object
73#
74def getHtmlFromPage():
75 page_URL = getURLPattern()
76
77 options = Options()
78 options.headless = True
79 profile = webdriver.FirefoxProfile()
80 profile.set_preference("media.volume_scale", "0.0")
81 driver = webdriver.Firefox(options=options, firefox_profile=profile)
82
83 driver.get(page_URL)
84
85 wait = WebDriverWait(driver, 3)
86 wait.until(EC.presence_of_element_located((By.CLASS_NAME,
87 'broadcast-items-list')))
88 time.sleep(1)
89
90 soup = BeautifulSoup(driver.page_source, "html.parser")
91 driver.quit()
92 return soup
93
94##------------------------------------------------------------------------------
95## remove bad characters from list
96##
97## @param list, list with elements to check
98#
99def sanitize(strList):
100 regex_remove = r'([^A-zsdäöüÄÖÜß-][\^]?)'
101 regex_ft = r'(ft.?([^n]s?w*)+)'
102
103 # check for bad characters
104 for i in range(len(strList)):
105 strList[i] = re.sub(regex_remove, "", str(strList[i]))
106 strList[i] = re.sub(regex_ft, "", strList[i])
107
108##------------------------------------------------------------------------------
109## print music
110##
111## @param lists to print
112#
113def printMusic(interpreter_list, title_list):
114 for element in range(len(interpreter_list)):
115 print(interpreter_list[element] + " : " + title_list[element])
116
117##------------------------------------------------------------------------------
118## parse html
119##
120## @param lists to write results to
121#
122def parseHtml(interpreter_list, title_list):
123 soup = getHtmlFromPage()
124
125 # find all interpreter in playlist
126 interpreter = soup.find_all("div", {"class": "interpreter"})
127
128 # find all titles in playlist
129 title = soup.find_all("div", {"class": "title"})
130
131 # Check for errors
132 if (len(interpreter) != len(title)):
133 raise Exception("The amount of interpreters don't correspond" +
134 "to the amount of titles.")
135 if (len(interpreter) == 0):
136 raise Exception("No FM4 music playlist found in given url")
137
138 for element in range(len(interpreter)):
139 interpreter_list.append(interpreter[element].text)
140 title_list.append(title[element].text)
141
142##------------------------------------------------------------------------------
143## create Token with given credentials
144##
145## @return authentication token
146#
147def getToken():
148 # authetication token
149 token = util.prompt_for_user_token(config.USERNAME, config.SCOPE, config.CLIENT_ID,
150 config.CLIENT_SECRET, config.REDIRECT_URI)
151
152 if token:
153 return token
154 else:
155 raise Exception("Could not get authentication token from spotify!")
156
157##------------------------------------------------------------------------------
158## search track and get spotify uri
159##
160## @param token, authentication token
161## @param interpreter && title, strings containing track info
162## @return uri string
163#
164def getUri(spotify_Obj, interpreter, title):
165 result = spotify_Obj.search(q=interpreter + ' ' + title)
166
167 if (result != None):
168 if (len(result['tracks']['items']) != 0):
169 track_id = result['tracks']['items'][0]['uri']
170 uri = str(track_id)
171 return uri
172
173##------------------------------------------------------------------------------
174## correct artist name and track title with lastFm api
175##
176## @param1 artist_name, name of artist to correct
177## @param2 title_name, title name to correct
178## @return track_corrected, corrected Track object
179#
180def getTrackInfo(artist_name, track_name):
181 # network authentication
182 last_Fm = getLastFmNetworkAuth()
183
184 # declare artist_name as artist object
185 artist = last_Fm.get_artist(artist_name)
186 # correct artist name
187 artist_corrected_name = artist.get_correction()
188
189 track = last_Fm.get_track(artist_corrected_name, track_name)
190 track_corrected_name = track.get_correction()
191
192 trackInfo = pylast.Track(artist_corrected_name, track_corrected_name,
193 last_Fm)
194 return trackInfo
195
196##------------------------------------------------------------------------------
197## get last fm network authentication
198##
199## @return network authentication token
200#
201def getLastFmNetworkAuth():
202 network = pylast.LastFMNetwork(config.LASTFM_API_KEY, config.LASTFM_API_SECRET)
203 return network
204
205##------------------------------------------------------------------------------
206## parse music items from website, put them into a list, sanitize lists,
207## correct artist names and song titles with last.fm API and save list in a
208## sqlite database for further usage
209##
210## @return network authentication token
211#
212def parseTracksIntoSongClassList(song_list):
213 # lists containing the Interpreter and title
214 interpreter_list = []
215 title_list = []
216
217 # fill lists with results
218 parseHtml(interpreter_list, title_list)
219
220 print(datetime.datetime.now(), "Done parsing html")
221
222 # remove bad characters from lists
223 sanitize(interpreter_list)
224 sanitize(title_list)
225
226 # get Token and create spotify object
227 sp = spotipy.Spotify(getToken())
228
229 # correct artist and title names
230 for element in range(len(interpreter_list)):
231 track_info = getTrackInfo(interpreter_list[element],
232 title_list[element])
233
234 title = str(track_info.get_name())
235 artist = str(track_info.get_artist())
236
237 if (title != artist):
238 if (title is not None):
239 title_list[element] = title
240 if (artist is not None):
241 interpreter_list[element] = artist
242 else:
243 title_list[element] = title_list[element]
244 interpreter_list[element] = interpreter_list[element]
245
246 # get spotify uri for song
247 spotify_uri = getUri(sp, interpreter_list[element], title_list[element])
248
249 if (spotify_uri != None and len(spotify_uri) != 0):
250 track_uri = str(spotify_uri)
251 song_list.append(Song(interpreter_list[element],
252 title_list[element], track_uri))
253
254 print(datetime.datetime.now(), "Done parsing songs")
255
256##------------------------------------------------------------------------------
257## insert new songs to database, checks for duplicates and ignores them
258##
259## @param song_list, list containing songs which need to be inserted
260## into database
261#
262def updateDatabase(song_list):
263 conn = sqlite3.connect('SongDatabase.db')
264 c = conn.cursor()
265
266 # date to insert into table
267 today = datetime.date.today()
268 today.strftime('%Y-%m-%d')
269
270 c.execute('''CREATE TABLE IF NOT EXISTS songs
271 (SongID INTEGER PRIMARY KEY, artist_name TEXT, song_name TEXT,
272 spotify_uri TEXT, UploadDate TIMESTAMP, Uploaded INTEGER,
273 UNIQUE(artist_name, song_name, spotify_uri) ON CONFLICT IGNORE)''')
274
275 for item in range(len(song_list)):
276 c.execute('''INSERT INTO songs
277 (artist_name, song_name, spotify_uri, UploadDate, Uploaded)
278 VALUES (?,?,?,?,?)''', (song_list[item].artist, song_list[item].song,
279 song_list[item].spotify_uri, today, 0))
280
281 conn.commit()
282 c.close()
283 print(datetime.datetime.now(), "Done updating Database")
284
285##------------------------------------------------------------------------------
286## copy Uris from song_list into new list
287##
288## @param song_list, list containing songs which get copied into new list
289## @return track_list, list containing all song uris
290#
291def getUrisList(song_list):
292 uri_list = []
293
294 for song in range(len(song_list)):
295 uri_list.append(song_list[song].spotify_uri)
296 print(uri_list)
297 return uri_list
298
299##------------------------------------------------------------------------------
300## Main part of the program
301## get html and parse important parts into file
302#
303if __name__ == '__main__':
304 # list to fill with corrected songs
305 song_list = []
306
307 # parse songs into song_list
308 parseTracksIntoSongClassList(song_list)
309
310 # insert song_list into database
311 updateDatabase(song_list)
312
313# -*- coding: utf-8 -*-
314# import config file
315import config
316
317import sqlite3
318import pandas as pd
319
320# spotipy library
321import spotipy
322import spotipy.util as util
323
324##------------------------------------------------------------------------------
325## create Token with given credentials
326##
327## @return authentication token
328#
329def getToken():
330 # authetication token
331 token = util.prompt_for_user_token(config.USERNAME, config.SCOPE, config.CLIENT_ID,
332 config.CLIENT_SECRET, config.REDIRECT_URI)
333 return token
334
335##------------------------------------------------------------------------------
336## insert new songs to database, checks for duplicates and ignores them
337##
338## @param song_list, list containing songs to be inserted into database
339#
340def uploadSongsToSpotify():
341 # declare db name
342 database_name = 'SongDatabase.db'
343
344 # spotify auth token
345 sp = spotipy.Spotify(getToken())
346 if sp:
347 # spotify username
348 username = config.USERNAME
349 # spotify ide of playlist
350 playlist_id = config.PLAYLIST_ID
351
352 conn = sqlite3.connect(database_name)
353 c = conn.cursor()
354
355 c.execute("""SELECT spotify_uri FROM songs WHERE (Uploaded = 0)""")
356
357 # save query results in tuple
358 data = c.fetchall()
359
360 # save uris in list, for spotipy
361 uri_list = []
362 for item in range(len(data)):
363 uri_list.append(str(data[item][0]))
364
365 print(uri_list)
366
367 # upload uri_list to spotify
368 # check for empty list
369 if (len(uri_list) != 0):
370 sp.user_playlist_add_tracks(username, playlist_id, uri_list)
371
372 # set Uploaded values in database to 1
373 c.execute("""UPDATE songs SET Uploaded = ? WHERE Uploaded = ?""", (1, 0))
374 conn.commit()
375
376 else:
377 raise Exception("There aren't any new songs in database, songs were already uploaded")
378
379 c.close()
380
381 else:
382 raise Exception("Could not get token from spotify API")
383
384
385if __name__ == '__main__':
386 uploadSongsToSpotify()