· 6 years ago · Jan 14, 2019, 07:52 AM
1import twitter,json,csv
2
3#Twitter API KEYS
4
5CONSUMER_KEY = 'os0oodSXE46hUbcHYbTtT77JA'
6CONSUMER_SECRET = 'giqnOQtjgPcaTrhCiynSAIjQXAks5Rg8o9gl7iAFyxECnF7nNA'
7OAUTH_TOKEN = '222215663-E3GL8xxiScmN4g2oWNAVaMmMChcUwf9ouPHzTt9F'
8OAUTH_TOKEN_SECRET = 'nmEbDSo4MIUOfzCIgo4VsyXbCt8ihlyY3jgYX6NWkd9gl'
9
10
11auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
12 CONSUMER_KEY, CONSUMER_SECRET)
13
14twitter_api = twitter.Twitter(auth=auth)
15
16# setup a file to write to
17csvfile = open('Trumpwall.csv', 'w')
18csvwriter = csv.writer(csvfile, delimiter='|')
19
20# heres a function that takes out characters that can break
21# our import into Excel and replaces them with spaces
22# it also does unicode
23def getVal(val):
24 clean = ""
25 if val:
26 val = val.replace('|', ' ')
27 val = val.replace('\n', ' ')
28 val = val.replace('\r', ' ')
29 clean = val.encode('utf-8')
30 return clean
31
32retweet_indicator = "RT".encode('utf-8', 'ignore').decode('utf-8')
33
34#What hashtag gets filtered out of the Twitter Stream
35
36q = "#trump" # Comma-separated list of terms can go here
37print 'Filtering the public timeline for track="%s"' % (q,)
38
39twitter_stream = twitter.TwitterStream(auth=twitter_api.auth)
40
41stream = twitter_stream.statuses.filter(track=q)
42
43for tweet in stream:
44
45
46### This is the part that structures the tweet in order to recognize unicode, giving back retweets with the full version of the retweeted tweet itself
47 if 'retweeted_status' in tweet: #checking whether the tweet is a retweet
48 if 'extended_tweet' in tweet['retweeted_status']:
49 user_text = tweet['retweeted_status']['user']
50 [x.encode('utf-8') for x in user_text]
51 retweet_text = tweet['retweeted_status']['extended_tweet']['full_text']
52 [x.encode('utf-8') for x in retweet_text]
53 tweet_text = "RT " + tweet['retweeted_status']['user']['screen_name'] + tweet['retweeted_status']['extended_tweet']['full_text']
54 # else:
55 # tweet_text = tweet['text']
56 elif 'truncated' in tweet and tweet['truncated'] == True:
57 tweet_text = tweet['extended_tweet']['full_text']
58
59#if it's none of what mentioned above just brings back the "simple" tweet.
60 else:
61 tweet_text = tweet['text']
62
63 #What we want to write on CSV. Distinguishin what regards the tweet and what regards the User.
64
65 # write the values to file
66 csvwriter.writerow([
67 tweet['created_at'],
68 getVal(tweet['user']['screen_name']),
69 getVal(tweet_text),
70 getVal(tweet['user']['location']),
71 getVal(tweet['source']),
72 tweet['user']['statuses_count'],
73 tweet['user']['followers_count'],
74 tweet['user']['friends_count'],
75 tweet['user']['created_at'],
76 tweet['lang'],
77 tweet['geo'],
78 ])
79
80 # this is what it will gets print out on the terminal in order to understand what's going on during the filtering.
81 print tweet['user']['screen_name'].encode('utf-8'), tweet['text'].encode('utf-8')
82 # if 'retweeted_status' in tweet:
83 # if 'extended_tweet' in tweet['retweeted_status']:
84 #
85 # # print json.dumps(tweet) + "||||||||||||||"
86 # print tweet['retweeted_status']['extended_tweet']['full_text']