· 7 years ago · Sep 29, 2018, 10:32 PM
1import sys, nntplib, mysql.connector, datetime
2
3#connect to database
4conn = mysql.connector.connect(user = 'user', password = 'password', host = 'host', database = 'database')
5conn.text_factory = str
6db = conn.cursor()
7
8#leave this in for now
9cSQL = "CREATE TABLE IF NOT EXISTS HEADER_BODY (SERVERID INTEGER, GROUPID INTEGER, ARTICLEID INTEGER, LINENBR INTEGER, CATEGORY TEXT, DATA TEXT, PRIMARY KEY (SERVERID,GROUPID,ARTICLEID,LINENBR));"
10db.execute(cSQL)
11
12#once there's data in the table this will print it
13cSQL = "SELECT SERVERID, GROUPID, ARTICLEID, LINENBR, CATEGORY, DATA FROM HEADER_BODY;"
14db.execute(cSQL)
15for SID, GID, AID, LNBR, CATG, DATA in db.fetchall():
16 print SID, GID, AID, LNBR, CATG, DATA
17
18
19#server
20SRVID = 1
21SRV = "news_server"
22PRT = 119
23USR = "user_name"
24PWD = "password"
25
26
27#group
28GRPID = 1
29GRP = 'comp.os.linux.advocacy'
30
31print "connecting to",SRV,"..."
32news = nntplib.NNTP(SRV,PRT,USR,PWD)
33resp, IDs, beginID, endID, grpNm = news.group(GRP)
34print 'server response:',resp
35
36#INSERT OR IGNORE means it won't fail and quit when you try to add
37#the same articles more than once
38cSQL = "INSERT OR IGNORE INTO HEADER_BODY (SERVERID, GROUPID, ARTICLEID, LINENBR, CATEGORY, DATA) VALUES (?,?,?,?,?,?) "
39
40#this will capture only the headers you want
41myheaders = ['From','Date','Subject','Message-ID','User-Agent','X-Newsreader','References']
42
43#python range goes from start to end-1
44#so this will download 650200 and 650201
45for articleID in range(704495,704500):
46
47 print '====================='
48 print 'article',articleID
49 print '====================='
50
51
52 try: response, artID, msgID, headers = news.head(str(articleID))
53 except (nntplib.NNTPTemporaryError,nntplib.NNTPProtocolError,nntplib.NNTPDataError,nntplib.NNTPReplyError) as headerError:
54 print "(article ID",articleID, "Header NNTP Error", headerError,")"
55 continue
56
57 try: response, artID, msgID, body = news.body(str(articleID))
58 except (nntplib.NNTPTemporaryError,nntplib.NNTPProtocolError,nntplib.NNTPDataError,nntplib.NNTPReplyError) as bodyError:
59 print "(article ID",articleID, "Body NNTP Error", bodyError,")"
60 continue
61
62 #parse and save header/body
63 linenbr = 1
64 for header in headers:
65 header = header.strip()
66 if header != "":
67 if ':' in header:
68 hdrname = header[:header.index(':')]
69 hdrval = header[header.index(':')+2:]
70 else:
71 hdrname = 'na'
72 hdrval = header
73
74 if hdrname in myheaders:
75 print hdrname + ': ' + hdrval
76 db.execute(cSQL, (SRVID,GRPID,articleID,linenbr,hdrname,hdrval))
77 linenbr += 1
78
79 print
80 for bod in body:
81 print bod
82 db.execute(cSQL, (SRVID,GRPID,articleID,linenbr,'Body',bod))
83 linenbr += 1
84
85 #commit data every Nth article
86 if articleID % 10 == 0:
87 conn.commit()
88
89#exit
90conn.commit()
91db.close()
92conn.close()
93news.quit()