8VfwF6Yh

· 6 years ago · Jan 02, 2020, 06:52 AM
1"""
2    This file has the module search which takes a search text as a parameter
3    and returns the first result's video url from youtube
4"""
5
6from googleapiclient.discovery import build
7from googleapiclient.errors import HttpError
8
9import pandas as pd
10import datetime
11import time
12import random
13
14# developer key obtained from google projects
15YOUTUBE_API_SERVICE_NAME = 'youtube'
16YOUTUBE_API_VERSION = 'v3'
17API_KEY = '****'   # youtube API KEY from google cloud console
18
19DELAY_SECONDS = 3
20
21
22def youtube_get_list(search_text, exponential_delay, ix):
23    
24    try:
25
26        # youtube creates a google client object where service = youtube
27        # read google api client documentation,
28        # link provided in README for more info
29        youtube = build(YOUTUBE_API_SERVICE_NAME,
30                        YOUTUBE_API_VERSION,
31                        developerKey=API_KEY)
32
33        search_list = youtube.search().list(q=search_text,
34                                            part='id',
35                                            maxResults=1
36                                            ).execute()
37    except HttpError as e:  # not handling exceptions 
38        return None
39
40    return search_list
41
42
43def search(search_text):
44    # the url without the videoid
45    url = "https://www.youtube.com/watch?v="
46
47    search_response = youtube_get_list(search_text, DELAY_SECONDS, 0)
48    if search_response:
49        # looping through the response
50        for search_result in search_response.get('items', []):
51            # checking if the result is a video
52            if search_result['id']['kind'] == 'youtube#video':
53                # appending the videoId to the rest of the url
54                url += search_result['id']['videoId']
55
56    else:
57        url = "VIDEO NOT FOUND" 
58
59    return url
60
61
62def read_files():
63    now = datetime.datetime.now()  # reading and printing time differences in each step for logging purposes
64    count = 0
65    print(datetime.datetime.now() - now)
66
67    df = pd.read_csv('list2.txt', sep='\t').assign(YoutubeLink="")  # had saved the list from pastebin in a txt file
68    print(len(df))
69    subdf = df[start_row: end_row]  # was dividing the list to run it periodically
70
71    for row in subdf.iterrows():
72        if type(row[1]['Artist']) == str:  # checking for nan/blank album names taking artist + song
73            if row[1]['Artist'] != row[1]['Track']:
74                search_text = row[1]['Artist'] + ' ' + row[1]['Track']
75            else:
76                search_text = row[1]['Track']
77        elif type(row[1]['Album']) == str:
78            if row[1]['Album'] != row[1]['Track']:
79                search_text = row[1]['Album'] + ' ' + row[1]['Track']
80            else:
81                search_text = row[1]['Track']
82        else:
83            search_text = row[1]['Track']
84
85        print(datetime.datetime.now() - now)
86        content_link = search(search_text)
87        if content_link == 'VIDEO NOT FOUND':
88            break
89        subdf.loc[row[0]]['YoutubeLink'] = content_link
90        print(datetime.datetime.now() - now)
91        print(count)
92        count = count + 1
93
94    subdf.to_csv('videos_list.csv')  # saving result in a csv file which is then manually exported to google sheets
95
96
97# main function
98if __name__ == "__main__":
99    # print(search("tenet"))
100    read_files()