· 5 years ago · Aug 04, 2020, 07:10 AM
1# Functions related to API calls
2import logging as log
3from datetime import datetime
4import urllib.request
5import creds # Our creds for different services
6import json
7from twitter import OAuth,Twitter # Check the methods you need
8import sqlite3
9import webbrowser
10import time
11from lib import DB, Parse#, Cluster
12from googleapiclient.discovery import build
13import requests
14from selenium import webdriver
15from selenium.webdriver.common.keys import Keys
16from selenium.webdriver.support.ui import WebDriverWait
17from selenium.common.exceptions import TimeoutException, NoSuchElementException
18from selenium.webdriver.chrome.options import Options
19import itertools
20
21
22
23def checkNaverConnect():
24 url = "https://openapi.naver.com/v1/search/cafearticle?query=taco&display1"
25 request = urllib.request.Request(url)
26 request.add_header("X-Naver-Client-Id",creds.naver_id)
27 request.add_header("X-Naver-Client-Secret",creds.naver_secret)
28 response = urllib.request.urlopen(request)
29 log.info("Naver connection status: " + str(response.getcode()))
30 return response.getcode()
31
32def checkDDGConnect():
33 url = "https://api.duckduckgo.com/?q=jacheongbi&format=json&pretty=1"
34 request = urllib.request.Request(url)
35 response = urllib.request.urlopen(request)
36 log.info("DuckDuckGo connection status: " + str(response.getcode()))
37 return response.getcode()
38
39def checkTwitterConnect():
40 t = Twitter(auth = OAuth(creds.twitter_access_token,
41 creds.twitter_access_secret,
42 creds.twitter_consumer_id,
43 creds.twitter_consumer_secret))
44 tstatus = t.statuses.home_timeline(count=1)
45 log.info("Twitter connection status: " + str(tstatus))
46 if tstatus:
47 return "Twitter"
48
49# Check that all of the creds we have are valid
50# Throw an error if not valid, mark as invalid, and continue
51def checkCreds():
52 log.info("Checking creds")
53 APIs = []
54 # NAVER API (cafe)
55 try:
56 if creds.naver_id and creds.naver_secret:
57 log.info("Have Naver creds")
58 status = checkNaverConnect()
59 if status == 200:
60 print("Connected to Naver API... \tOK!")
61 APIs.append("Naver")
62 except:
63 print("Naver's authentication failed..")
64 if creds.twitter_access_token and creds.twitter_access_secret:
65 log.info("Have Twitter creds")
66 status = checkTwitterConnect()
67 if status:
68 print("Connected to Twitter API... \tOK!")
69 APIs.append("Twitter")
70 # Some APIs do not need creds. Just check connect.
71 # DuckDuckGo API (sidebox)
72 status = checkDDGConnect()
73 if status == 200:
74 print("Connected to DDG API... \tOK!")
75 APIs.append("DDG")
76 # Return all valid API connections
77 log.info("Valid creds: " + str(APIs))
78 return APIs
79
80def callQuery():
81 with open ('query.json', 'rt', encoding='UTF-8-sig') as query:
82 data = json.load(query)
83
84 nameko = '"'+(str(data['NameKo']))+'" '
85 nameen = '"'+(str(data['NameEn']))+'" '
86 phonum = '"'+(str(data['PhoneNum']))+'" '
87 datebr = '"'+(str(data['datebr']))+'" '
88 ssnum = '"'+(str(data['ssnum']))+'" '
89 ceo = '"'+(str(data['ceo']))+'" '
90 crn = '"'+(str(data['crn']))+'" '
91 weburl = '"'+(str(data['weburl']))+'" '
92 celnum = '"'+(str(data['celnum']))+'" '
93 faxnum = '"'+(str(data['faxnum']))+'" '
94 userid1 = '"/'+(str(data['userid']))+'" '
95 userid2 = '"@'+(str(data['userid']))+'" '
96 userid3 = '"'+(str(data['userid']))+'@" '
97 key1 = '"'+(str(data['key1']))+'" '
98 key2 = '"'+(str(data['key2']))+'" '
99 key3 = '"'+(str(data['key3']))+'" '
100 rem1 = '-"'+(str(data['rem1']))+'" '
101 rem2 = '-"'+(str(data['rem2']))+'" '
102 rem3 = '-"'+(str(data['rem3']))+'" '
103
104
105 queries = [nameko, userid1, userid2, userid3, nameen, phonum, key1, rem1, weburl, datebr, ssnum, ceo, celnum, faxnum, crn, key2, key3, rem2, rem3]
106
107 while('"" | ' in queries) :
108 queries.remove('"" | ')
109
110 while('-"" | ' in queries) :
111 queries.remove('-"" | ')
112
113 while('"@" | ' in queries) :
114 queries.remove('"@" | ')
115
116 while('"/" | ' in queries) :
117 queries.remove('"/" | ')
118
119 return queries
120
121def searchNaver(query):
122
123 DB.CreateDB('results.db', 'url')
124
125 # query = callQuery()
126
127 queryorder = 1
128 print("Searching Naver...")
129 # print(query)
130 while queryorder <= 5:
131 for querylist in itertools.combinations(query, queryorder):
132
133
134
135 querylist = str(querylist)[2:-2]
136 querylist = str(querylist).replace("', '", "")
137 querylist = str(querylist)[:-5]
138
139 # del query[-1]
140 # print('Queries: ')
141
142 print(querylist)
143
144 encText = urllib.parse.quote(querylist) # Search Keyword
145
146 search_filter = ["webkr"]#, "blog", "cafearticle", "news", "kin", "doc"]
147 search_order = 0
148
149 # while search_order < len(search_filter):
150
151 url = "https://openapi.naver.com/v1/search/" + search_filter[search_order] + "?query=" + encText +"&display1000"
152
153 request = urllib.request.Request(url)
154 request.add_header("X-Naver-Client-Id",creds.naver_id)
155 request.add_header("X-Naver-Client-Secret",creds.naver_secret)
156 response = urllib.request.urlopen(request)
157 rescode = response.getcode()
158
159
160 if(rescode==200):
161 # if search_order == 0:
162 # print('Searching from Web Documents')
163 # elif search_order == 1:
164 # print('Searching from Blog')
165 # elif search_order == 2:
166 # print('Searching from Cafe')
167 # elif search_order == 3:
168 # print('Searching from News')
169 # elif search_order == 4:
170 # print('Searching from Q&A')
171 # elif search_order == 5:
172 # print('Searching from Scholar')
173
174 response_body = response.read()
175 search_result = response_body.decode('utf-8')
176 results_json = json.loads(search_result)
177
178 if len(results_json['items']) == 0:
179 print('.')
180
181 else:
182 # print('# of results:', len(results_json['items']))
183 item_num = 0
184
185 while item_num < len(results_json['items']):
186
187 url_list = []
188 url_list.append(results_json["items"][item_num]["link"])
189 try:
190 url_save = url_list[0]
191 except:
192 print('url error..')
193
194 try:
195 # DB.RecordUrl('results.db', 'naver', url_save, (Parse.CountMatch(url_save, query)))
196 DB.RecordUrl('results.db', 'naver', url_save,)
197 # Parse.Links(url_save)
198 # Parse.PhoneNum(url_save)
199 # Parse.Email(url_save)
200 # Parse.Keywords(url_save)
201 #Cluster.find()
202 except:
203 print('url save error..')
204
205 item_num = item_num + 1
206
207 # else:
208 # search_order = search_order + 1
209 else:
210 print("Error Code:" + rescode)
211
212 queryorder += 1
213 # else:
214 print('Keyword search from Naver is completed.')
215
216 # DB.askUser('results.db')
217
218def searchGoogle(query):
219 DB.CreateDB('results.db', 'url')
220
221 # query = callQuery()
222
223 queryorder = 1
224 print("Searching Google...")
225
226 while queryorder <= 5:
227 for querylist in itertools.combinations(query, queryorder):
228
229 querylist = str(querylist)[2:-2]
230 querylist = str(querylist).replace("', '", "")
231 querylist = str(querylist)[:-5]
232
233 # del query[-1]
234 # print('Queries: ')
235 print(querylist)
236
237 service = build("customsearch", "v1",
238 developerKey=creds.google_devkey)
239
240 res = service.cse().list(
241 q=query,
242 cx=creds.google_cseid,
243 ).execute()
244
245 if res['searchInformation']['totalResults'] == '0':
246 print('.')
247
248 else:
249 # print('# of results:', len(res['items']))
250 item_num = 0
251
252 while item_num < len(res['items']):
253
254 url_list = []
255 url_list.append(res["items"][item_num]["link"])
256 try:
257 url_save = url_list[0]
258 except:
259 print('url error..')
260
261 try:
262 # DB.RecordUrl('results.db', 'google', url_save, (Parse.CountMatch(url_save, query)))
263 DB.RecordUrl('results.db', 'google', url_save,)
264 # Parse.Links(url_save)
265 # Parse.PhoneNum(url_save)
266 # Parse.Email(url_save)
267 # Parse.Keywords(url_save)
268 #Cluster.find()
269 except:
270 print('url save error..')
271
272 item_num = item_num + 1
273 queryorder += 1
274 print('Keyword search from Google is completed.')
275
276def searchDaum(query):
277 DB.CreateDB('results.db', 'url')
278
279 # query = callQuery()
280
281 queryorder = 1
282 print("Searching Daum...")
283
284 while queryorder <= 5:
285 for querylist in itertools.combinations(query, queryorder):
286
287 querylist = str(querylist)[2:-2]
288 querylist = str(querylist).replace("', '", "")
289 querylist = str(querylist)[:-5]
290 # print('Queries: ')
291 # print(query)
292 print(querylist)
293
294 url = 'https://dapi.kakao.com/v2/search/web'
295 queryString = {'query':querylist}
296 header = {'authorization': creds.kakao_key}
297 r = requests.get(url, headers=header, params=queryString)
298 results_json = json.loads(r.text)
299
300 try:
301 # print('# of results:', len(results_json['documents']))
302 item_num = 1
303 # print('.')
304 while item_num < len(results_json['documents']):
305
306 url_list = []
307 url_list.append(results_json["documents"][item_num]["url"])
308 # print(results_json["documents"][item_num]["url"])
309 try:
310 url_save = url_list[0]
311 except:
312 print('url error..')
313
314 try:
315 # DB.RecordUrl('results.db', 'naver', url_save, (Parse.CountMatch(url_save, query)))
316 DB.RecordUrl('results.db', 'daum', url_save,)
317 # Parse.Links(url_save)
318 # Parse.PhoneNum(url_save)
319 # Parse.Email(url_save)
320 # Parse.Keywords(url_save)
321 #Cluster.find()
322 except:
323 print('url save error..')
324
325 item_num = item_num + 1
326
327 # else:
328 # search_order = search_order + 1
329 except:
330 print('.')
331 queryorder += 1
332 # else:
333 print('Keyword search from Daum is completed.')
334
335def searchTwitter(query):
336
337 DB.CreateDB('results.db', 'url')
338
339 # query = callQuery()
340
341 queryorder = 0
342
343 print("Searching Twitter...")
344
345 path = 'chromedriver'
346
347 driver = webdriver.Chrome(path)
348
349 driver.get('https://twitter.com/search?q='+query)
350
351 path = []
352
353 while True:
354 WebDriverWait(driver, 15).until(lambda x: x.find_element(by='css selector', value="a[role='link']"))
355 last_height = driver.execute_script("return document.body.scrollHeight")
356 traffic_path = driver.find_elements_by_css_selector("a[role='link']")
357 path.extend([traffic.get_attribute('href') for traffic in traffic_path])
358 driver.execute_script("window.scrollTo(0, {})".format(last_height+500))
359 time.sleep(3)
360 new_height = driver.execute_script("return document.body.scrollHeight")
361 if last_height == new_height:
362 break
363
364
365 tweets = [i for i in path if 'status' in i]
366
367 try:
368 item_num = 1
369 while item_num < len(tweets):
370
371 url_list = []
372 url_list.append(tweets[item_num])
373 # print(results_json["documents"][item_num]["url"])
374 try:
375 url_save = url_list[0]
376 except:
377 print('url error..')
378
379 try:
380 # DB.RecordUrl('results.db', 'naver', url_save, (Parse.CountMatch(url_save, query)))
381 DB.RecordUrl('results.db', 'twitter', url_save,)
382 # Parse.Links(url_save)
383 # Parse.PhoneNum(url_save)
384 # Parse.Email(url_save)
385 # Parse.Keywords(url_save)
386 #Cluster.find()
387 except:
388 print('url save error..')
389
390 item_num = item_num + 1
391
392 # else:
393 # search_order = search_order + 1
394 except:
395 print('.')
396 # else:
397 print('Keyword search from Twitter is completed.')
398
399def searchInsta(query):
400
401 DB.CreateDB('results.db', 'url')
402
403 # query = callQuery()
404
405 queryorder = 0
406
407 print("Searching Instagram...")
408
409 path = 'chromedriver'
410
411 driver = webdriver.Chrome(path)
412
413 driver.get("https://www.instagram.com/accounts/login/?source=auth_switcher")
414
415 time.sleep(3)
416
417 driver.find_element_by_name('username').send_keys(creds.insta_id)
418 driver.find_element_by_name('password').send_keys(creds.insta_pw)
419
420 driver.find_element_by_name('password').submit()
421
422 time.sleep(3)
423
424 driver.get("https://www.instagram.com/explore/tags/"+query)
425
426 post_links = []
427 while len(post_links) < 25:
428 links = [a.get_attribute('href') for a in driver.find_elements_by_tag_name('a')]
429 for link in links:
430 post_links.append(link)
431 scroll_down = "window.scrollTo(0, document.body.scrollHeight);"
432 driver.execute_script(scroll_down)
433
434 post_links = [i for i in post_links if '/p/' in i ]
435
436 insta = list(set(post_links))
437
438 try:
439 item_num = 1
440 while item_num < len(insta):
441
442 url_list = []
443 url_list.append(insta[item_num])
444 # print(results_json["documents"][item_num]["url"])
445 try:
446 url_save = url_list[0]
447 except:
448 print('url error..')
449
450 try:
451 # DB.RecordUrl('results.db', 'naver', url_save, (Parse.CountMatch(url_save, query)))
452 DB.RecordUrl('results.db', 'insta', url_save,)
453 # Parse.Links(url_save)
454 # Parse.PhoneNum(url_save)
455 # Parse.Email(url_save)
456 # Parse.Keywords(url_save)
457 #Cluster.find()
458 except:
459 print('url save error..')
460
461 item_num = item_num + 1
462
463 # else:
464 # search_order = search_order + 1
465 except:
466 print('.')
467 # else:
468 print('Keyword search from Instagram is completed.')
469
470def searchFacebook(query):
471
472 DB.CreateDB('results.db', 'url')
473
474 # query = callQuery()
475
476 queryorder = 0
477
478 print("Searching Facebook...")
479
480 chrome_options = Options()
481 chrome_options.add_argument("--disable-notifications")
482 driver = webdriver.Chrome(chrome_options=chrome_options)
483
484 driver.get("https://www.facebook.com")
485
486 time.sleep(1)
487
488 driver.find_element_by_name('email').send_keys(creds.fbk_id)
489 driver.find_element_by_name('pass').send_keys(creds.fbk_pw)
490 driver.find_element_by_name('pass').submit()
491
492 time.sleep(3)
493
494 driver.find_element_by_name('q').send_keys(query)
495 driver.find_element_by_name('q').submit()
496
497 time.sleep(3)
498
499 body = driver.find_element_by_css_selector('body')
500
501 for i in range(10):
502 body.send_keys(Keys.PAGE_DOWN)
503 time.sleep(1)
504
505 post_links = []
506
507 elems = driver.find_elements_by_xpath("//a[@href]")
508 for elem in elems:
509 post_links.append(elem.get_attribute("href"))
510
511 post_links = [i for i in post_links if '/posts/' in i ]
512
513 post_links.pop(0)
514 fbk = list(set(post_links))
515
516 try:
517 item_num = 1
518 while item_num < len(fbk):
519
520 url_list = []
521 url_list.append(fbk[item_num])
522 # print(results_json["documents"][item_num]["url"])
523 try:
524 url_save = url_list[0]
525 except:
526 print('url error..')
527
528 try:
529 # DB.RecordUrl('results.db', 'naver', url_save, (Parse.CountMatch(url_save, query)))
530 DB.RecordUrl('results.db', 'facebook', url_save,)
531 # Parse.Links(url_save)
532 # Parse.PhoneNum(url_save)
533 # Parse.Email(url_save)
534 # Parse.Keywords(url_save)
535 #Cluster.find()
536 except:
537 print('url save error..')
538
539 item_num = item_num + 1
540
541 # else:
542 # search_order = search_order + 1
543 except:
544 print('.')
545 # else:
546 print('Keyword search from Facebook is completed.')
547
548
549log.basicConfig(filename='jacheongbi.log',level=log.DEBUG)