· 6 years ago · Dec 08, 2019, 02:12 PM
1# IMPORT MODULES
2import spacy
3import json, re, requests, datetime
4# from gensim.models import LdaModel
5# from gensim.corpora.dictionary import Dictionary
6
7class NewsAnalyzer:
8
9 # constructor
10 def __init__(self, api_key:str):
11 # ASSIGN SPACY AND API KEY
12 self.nlp = spacy.load('en_core_web_sm')
13 self.api_key = api_key
14
15 def get_data_from_api(self, query_term:str, start_date='2019-11-08') -> list:
16 article_list = []
17 endpoint = 'https://newsapi.org/v2/everything'
18 # IMPLEMENT YOUR CODE HERE
19
20 next_seven_days_list = self.get_next_seven_days(start_date)
21 for day in next_seven_days_list:
22 PARAMS = {
23 'q': 'bitcoin',
24 'apikey': self.api_key,
25 'from': day,
26 'to': day,
27 'pageSize': 100,
28 'language': 'en'
29 }
30
31 r = requests.get(endpoint, params=PARAMS)
32 res = r.json()
33
34 for article in res['articles']:
35 article_list.append(article)
36
37 return article_list
38
39 def save_news_data_to_file(self, output_file_name:str, query_term:str, start_date='2019-11-01'):
40 # IMPLEMENT YOUR CODE HERE
41 pass
42
43
44 def extract_clean_news_content(self, news_list:list) -> list:
45 cleaned_list = []
46 # IMPLEMENT YOUR CODE HERE
47
48
49
50
51 return cleaned_list
52
53 def analyze_topics(self, news_data_file, num_topics) -> list:
54 keyword_list = []
55 # IMPLEMENT YOUR CODE HERE
56
57
58
59
60
61 return keyword_list
62
63 def get_next_seven_days(self, start_date_in:str):
64 return_list = []
65 start_date_string = start_date_in
66 year, month, day = start_date_string.split('-')
67 start_date = datetime.date(int(year), int(month), int(day))
68 for i in range(7):
69 next_day = start_date + datetime.timedelta(i)
70 date = '{}-{}-{}'.format(next_day.year, next_day.month, next_day.day)
71 # print(date)
72 return_list.append(date)
73 return return_list
74
75inst = NewsAnalyzer('e9fd4dafeb8c4abe9d4483a67d078763')
76res = inst.get_data_from_api('bitcoin')
77print(res)