· 6 years ago · Apr 24, 2019, 05:26 PM
1import requests
2import time
3from bs4 import BeautifulSoup
4import re
5import sqlite3
6import datetime
7
8def fetch_all_to_list(fetch_all_list):
9 return_list = []
10 for each in fetch_all_list:
11 return_list.append(each[0])
12
13 return return_list
14
15
16_url = 'https://bitcointalk.org/index.php'
17detail_url = 'https://bitcointalk.org/index.php?topic={id}.0'
18
19data = {
20 'board': ''
21}
22headers = {
23 # "Cookie": '__cfduid=db2b46b3d1566ccd14f8131529bc3df551556119338; PHPSESSID=a3rf4pev8l8dth2ofs8puap0uloa2t7c',
24 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36",
25}
26
27
28for page in range(3):
29 # time.sleep(1)
30 data['board'] = '159.' + str(page * 40)
31 url = _url + '?board='+data['board']
32 content = requests.get(url, headers=headers)
33 content = content.text
34
35 soup = BeautifulSoup(content, 'html.parser')
36 basic_details = soup.findAll('span', id=re.compile('^msg_'))
37
38 conn = sqlite3.connect('db.db')
39 c = conn.cursor()
40 c.execute(
41 """CREATE TABLE IF NOT EXISTS log_list (
42 id INTEGER primary key ,
43 title varchar,
44 create_time varchar(20));""")
45 c.execute('SELECT id FROM log_list;')
46 id_list = c.fetchall()
47 id_list = fetch_all_to_list(id_list)
48
49 for each in basic_details:
50 id = int(each.attrs['id'].strip('msg_'))
51 title = str(each.string)
52 url = each.contents[0].attrs['href']
53 id = url.strip('https://bitcointalk.org/index.php?topic=').rstrip('.0')
54
55 try:
56 sql = "INSERT INTO log_list(id, title, create_time) VALUES('{0}','{1}','{2}');".format(id, title, datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
57 c.execute(sql)
58 conn.commit()
59 except Exception as e:
60 continue
61
62conn = sqlite3.connect('db.db')
63c = conn.cursor()
64c.execute('SELECT id, title, create_time FROM log_list ORDER BY id DESC;')
65full_title = c.fetchall()
66with open('output.html', 'w+', encoding='utf-8') as f1:
67 for each in full_title:
68 list1 = [str(each[0]), ' ', each[1], '..................................................................', each[2]]
69 str1 = ''.join(list1)
70 f1.writelines('<p>'+ str1 + '</p>')