· 7 years ago · Jan 22, 2019, 02:06 PM
1#!/usr/bin/env python3
2import requests
3import sqlite3
4from bs4 import BeautifulSoup
5from io import BytesIO
6from zipfile import ZipFile
7from datetime import datetime
8
9zip_file = 'http://www1.caixa.gov.br/loterias/_arquivos/loterias/D_megase.zip'
10html_file = 'D_MEGA.HTM'
11
12with requests.get(zip_file) as request:
13 if request.ok:
14 with ZipFile(BytesIO(request.content)).open(html_file) as html:
15 db = sqlite3.connect('megasena.db')
16 db.execute('''
17 DROP TABLE IF EXISTS polls;
18 ''')
19 db.execute('''
20 CREATE TABLE polls (
21 id INTEGER NOT NULL PRIMARY KEY,
22 date DATE NOT NULL,
23 n1 INTEGER NOT NULL,
24 n2 INTEGER NOT NULL,
25 n3 INTEGER NOT NULL,
26 n4 INTEGER NOT NULL,
27 n5 INTEGER NOT NULL,
28 n6 INTEGER NOT NULL
29 );''')
30 soup = BeautifulSoup(html, 'html.parser')
31 for tr in soup('tr'):
32 td = [tag.get_text() for tag in tr('td', limit=8, rowspan=True)]
33 if td and td[0].isdigit():
34 td[1] = datetime.strptime(td[1], '%d/%m/%Y')
35 db.execute('INSERT INTO polls VALUES (?, ?, ?, ?, ?, ?, ?, ?);', td)
36 db.commit()
37 db.close()