· 4 years ago · Jul 08, 2021, 11:10 AM
1"""
2skript Author: tmm88
3jaar: 2021/7/6
4
5language: python
6
7tools used:
8* os sys json re python3 python2 pip3 pip2
9* wikipedia
10
11description (abstrakt):
12* performs a search in wikipedia throughout wikipedia api
13* retrieves a dictionary with the search results
14* opens the tags in different websites
15* provides a small script for downloading videos from youtube
16* runs a wget on the relevant search query and retrieves to the program
17
18currently site saving doesn't works but with limitations. there are some bugs and also some other tasks get interrupted
19"""
20
21### ****************************************************** ###
22
23# IMPORT CORE LIBRARIES
24import os
25import sys
26import json
27import re
28
29### ****************************************************** ###
30
31# INSTALL SOME RUBBISH
32
33
34os.system("sudo pacman -S python python3 python2")
35os.system("sudo pacman -S python2-pip python3-pip python-pip")
36os.system("sudo pacman -S youtube-dl ffmpeg")
37
38### ********************************************************* ###
39
40os.system("sudo pip3 install wikipedia")
41os.system("sudo pip2 install wikipedia")
42os.system("sudo pip2 install pytube")
43os.system("sudo pip2 install youtube-search-python")
44os.system("sudo pip install pywebcopy")
45os.system("sudo pip2 install pywebcopy")
46os.system("sudo pip3 install pywebcopy")
47os.system("pip install googlesearch-python")
48
49
50### ********************************************************* ###
51
52### //////////////////////// ###
53
54# import extra libraries
55import wikipedia
56
57### //////////////////////// ###
58
59from pytube import YouTube
60from youtubesearchpython import VideosSearch
61
62### //////////////////////// ###
63
64from pywebcopy import save_webpage
65from googlesearch import search
66
67### //////////////////////// ###
68
69### ********************************************************* ###
70
71# reading argumenten
72wikipediaSearchTag = sys.argv[1]
73
74#search=wikipedia.search(str(wikipediaSearchTag))
75print(search)
76
77search2=search(str(wikipediaSearchTag))
78print(search2)
79
80for key in search2:
81 print(key)
82 os.system("python -m pywebcopy save_webpage " + key + " /home/tmm88/ --bypass_robots=True")
83
84### ********************************************************* ###
85
86#iterating dict in wikipedia
87
88myKey=str('test')
89
90for key in search:
91 url = key
92 mURL=url.replace(" ", "+").lower().replace(".", "")
93 print(mURL)
94
95 search2=search(str(key))
96 print(search2)
97
98 for key2 in search2:
99 print(key2)
100 os.system("python -m pywebcopy save_webpage " + key2 + " /home/tmm88/ --bypass_robots=True")
101
102
103 search_result=VideosSearch(mURL, limit=10000).result()
104
105 # Aah it retuned a dict. No json!! json is just a data-interchange format!
106 print('search result type:', type(search_result))
107
108 ### ********************************************************* ###
109
110 ### ITERATES THROUGHOUT DAS UBBER FANTASTICHEN UBBER SKRIPT IN DAS UN IKONIKEN UBBER SCHNELL MIT MEIN FLAMMEN ARGANEL
111 if 'result' in search_result: # safety check in case there's no result.
112 for result_entry in search_result['result']: # I asume the result is always a list
113
114 url = result_entry['link']
115 ###os.system("cd /home/tmm88; youtube-dl " + url)
116
117 stream = YouTube(url).streams.first()
118 print('download from youtube url' , url, 'size in bytes', stream.filesize)
119
120 stream.download('/home/tmm88/') # Change this.
121 print('download ready')
122
123 print(result_entry['title'])
124 print(result_entry['link'])
125
126### ********************************************************* ###
127
128