· 4 years ago · Apr 11, 2021, 02:32 PM
1# std imports
2import os
3import re
4import csv
5import json
6import logging
7
8# API imports
9import wptools
10import wikipedia
11
12import pylast
13
14import spotipy
15from spotipy.oauth2 import SpotifyClientCredentials
16
17logging.basicConfig(level=logging.WARNING)
18
19last_fm_api_key = "194ebdf5b49fa996adb5ffb9bfcab1db"
20passwd_path = "hidden/passwdData.csv"
21albums_info_path = "album_info/albums.csv"
22dest_path = "album_info/album_info.json"
23
24os.environ['SPOTIPY_CLIENT_ID'] = 'a7dfe025796347eeb0e630dc21b2abb4'
25os.environ['SPOTIPY_CLIENT_SECRET'] = '3a3144d353cf42ff95b3f04a129c10a5'
26features_list = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness',
27 'liveness', 'valence', 'tempo']
28
29
30# ----- unnecessary as of right now --------
31# '''
32# Names in password csv file:
33#
34# NAME FIRST_KEY SECOND_KEY
35# last_fm_api API Key Shared Secret
36# spotify_api Client ID Client Secret
37# last_fm_login Login Hashed Password
38# '''
39# -------------------------------------------
40
41
42def read_album_info_from_csv(path):
43 result = []
44
45 with open(path, "r", newline='') as csv_file:
46 csv_reader = csv.reader(csv_file, delimiter=";")
47 for line in csv_reader:
48 result.append((line[0], line[1]))
49
50 return result
51
52
53def get_features(album, artist, spotify):
54 album_info = spotify.search(q="album:{} artist:{}".format(album, artist), type="album")['albums']['items']
55 if len(album_info) == 0:
56 logging.warning(f"Album {artist} - {album} hasn't been found on spotify")
57 return {}
58
59 album_features = {feature: 0 for feature in features_list}
60 tracks = spotify.album_tracks(album_info[0]['uri'])
61 album_duration = 0
62
63 for track in tracks['items']:
64 track_features = spotify.audio_features([track['id']])[0]
65 track_duration = track_features['duration_ms']
66 for feature in features_list:
67 album_features[feature] += track_features[feature] * track_duration
68 album_duration += track_duration
69
70 for feature in features_list:
71 album_features[feature] /= album_duration
72
73 return album_features
74
75
76def get_tags(artist, title, network):
77 logging.info(f'Artist: {artist:30},Title: {title:30}')
78
79 # "&" is more frequent on last.fm than "and", so we choose "&"
80 if " and " in artist or " and " in title:
81 title = title.replace(" and ", " & ")
82 artist = artist.replace(" and ", " & ")
83
84 try:
85 album = network.get_album(title=title, artist=artist)
86 # artist = network.get_artist(artist_name=artist) # might be needed in the future
87 # artist.get_top_tags()
88 tags = album.get_top_tags(limit=10)
89 if len(tags) == 0:
90 logging.warning(f'Album {artist} - {title} has 0 tags on last.fm')
91
92 return {tag.item.name: int(tag.weight) for tag in tags}
93
94 except pylast.WSError:
95 logging.warning(f"Album {artist} - {title} hasn't been found on last.fm")
96 return {}
97
98
99def format_genres(genres_str):
100 genres = re.findall('\[\[.*?]]', genres_str)
101 genres = [genre[2:-2].lower() for genre in genres]
102 genres = [min(genre.split("|"), key=len) for genre in genres]
103 return genres
104
105
106def remove_duplicates(arr):
107 return list(set(arr))
108
109
110def get_genre(album, artist):
111 names_list = wikipedia.search(album + " (" + artist + ")") # clever, Robert, very clever
112 for name_index in range(min(5, len(names_list))):
113 so = wptools.page(names_list[name_index], silent=True).get_parse()
114 infobox = so.data.get('infobox') if so else None
115 genres_str = infobox.get('genre') if infobox else None
116
117 if genres_str:
118 return remove_duplicates(format_genres(genres_str))
119
120 logging.warning(f"Album {artist} - {album} hasn't been found on wikipedia")
121 return []
122
123
124def get_albums_info(api_key, albums_names):
125 # connecting with API clients
126 network = pylast.LastFMNetwork(api_key=api_key)
127 spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())
128
129 # returning the full list of albums
130 return [{"title": title,
131 "artist": artist,
132 "tags": get_tags(artist, title, network),
133 "features": get_features(title, artist, spotify),
134 "genre": get_genre(title, artist)}
135 for artist, title in albums_names]
136
137
138def write_to_json(data, path):
139 with open(path, 'w') as json_file:
140 json_file.write(json.dumps(data, indent=4, sort_keys=True))
141
142
143def main():
144 # passwords = read_passwords_from_csv(passwd_path)
145 albums_names = read_album_info_from_csv(albums_info_path)
146 albums_info = get_albums_info(api_key=last_fm_api_key,
147 albums_names=albums_names)
148 write_to_json(albums_info, dest_path)
149
150
151if __name__ == '__main__':
152 main()