· 6 years ago · Jan 02, 2020, 06:52 AM
1"""
2 This file has the module search which takes a search text as a parameter
3 and returns the first result's video url from youtube
4"""
5
6from googleapiclient.discovery import build
7from googleapiclient.errors import HttpError
8
9import pandas as pd
10import datetime
11import time
12import random
13
14# developer key obtained from google projects
15YOUTUBE_API_SERVICE_NAME = 'youtube'
16YOUTUBE_API_VERSION = 'v3'
17API_KEY = '****' # youtube API KEY from google cloud console
18
19DELAY_SECONDS = 3
20
21
22def youtube_get_list(search_text, exponential_delay, ix):
23
24 try:
25
26 # youtube creates a google client object where service = youtube
27 # read google api client documentation,
28 # link provided in README for more info
29 youtube = build(YOUTUBE_API_SERVICE_NAME,
30 YOUTUBE_API_VERSION,
31 developerKey=API_KEY)
32
33 search_list = youtube.search().list(q=search_text,
34 part='id',
35 maxResults=1
36 ).execute()
37 except HttpError as e: # not handling exceptions
38 return None
39
40 return search_list
41
42
43def search(search_text):
44 # the url without the videoid
45 url = "https://www.youtube.com/watch?v="
46
47 search_response = youtube_get_list(search_text, DELAY_SECONDS, 0)
48 if search_response:
49 # looping through the response
50 for search_result in search_response.get('items', []):
51 # checking if the result is a video
52 if search_result['id']['kind'] == 'youtube#video':
53 # appending the videoId to the rest of the url
54 url += search_result['id']['videoId']
55
56 else:
57 url = "VIDEO NOT FOUND"
58
59 return url
60
61
62def read_files():
63 now = datetime.datetime.now() # reading and printing time differences in each step for logging purposes
64 count = 0
65 print(datetime.datetime.now() - now)
66
67 df = pd.read_csv('list2.txt', sep='\t').assign(YoutubeLink="") # had saved the list from pastebin in a txt file
68 print(len(df))
69 subdf = df[start_row: end_row] # was dividing the list to run it periodically
70
71 for row in subdf.iterrows():
72 if type(row[1]['Artist']) == str: # checking for nan/blank album names taking artist + song
73 if row[1]['Artist'] != row[1]['Track']:
74 search_text = row[1]['Artist'] + ' ' + row[1]['Track']
75 else:
76 search_text = row[1]['Track']
77 elif type(row[1]['Album']) == str:
78 if row[1]['Album'] != row[1]['Track']:
79 search_text = row[1]['Album'] + ' ' + row[1]['Track']
80 else:
81 search_text = row[1]['Track']
82 else:
83 search_text = row[1]['Track']
84
85 print(datetime.datetime.now() - now)
86 content_link = search(search_text)
87 if content_link == 'VIDEO NOT FOUND':
88 break
89 subdf.loc[row[0]]['YoutubeLink'] = content_link
90 print(datetime.datetime.now() - now)
91 print(count)
92 count = count + 1
93
94 subdf.to_csv('videos_list.csv') # saving result in a csv file which is then manually exported to google sheets
95
96
97# main function
98if __name__ == "__main__":
99 # print(search("tenet"))
100 read_files()