hTGxtFjt

· 6 years ago · Jan 14, 2019, 07:52 AM
1import twitter,json,csv 
2
3#Twitter API KEYS
4
5CONSUMER_KEY = 'os0oodSXE46hUbcHYbTtT77JA'                               
6CONSUMER_SECRET = 'giqnOQtjgPcaTrhCiynSAIjQXAks5Rg8o9gl7iAFyxECnF7nNA'
7OAUTH_TOKEN = '222215663-E3GL8xxiScmN4g2oWNAVaMmMChcUwf9ouPHzTt9F'
8OAUTH_TOKEN_SECRET = 'nmEbDSo4MIUOfzCIgo4VsyXbCt8ihlyY3jgYX6NWkd9gl'
9
10
11auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
12                           CONSUMER_KEY, CONSUMER_SECRET)
13
14twitter_api = twitter.Twitter(auth=auth)
15
16# setup a file to write to
17csvfile = open('Trumpwall.csv', 'w')
18csvwriter = csv.writer(csvfile, delimiter='|')
19
20#  heres a function that takes out characters that can break
21#  our import into Excel and replaces them with spaces
22#  it also does unicode 
23def getVal(val):
24    clean = ""
25    if val:
26        val = val.replace('|', ' ')
27        val = val.replace('\n', ' ')
28        val = val.replace('\r', ' ')
29        clean = val.encode('utf-8')
30    return clean
31
32retweet_indicator = "RT".encode('utf-8', 'ignore').decode('utf-8')
33
34#What hashtag gets filtered out of the Twitter Stream
35
36q = "#trump" # Comma-separated list of terms can go here
37print 'Filtering the public timeline for track="%s"' % (q,)
38
39twitter_stream = twitter.TwitterStream(auth=twitter_api.auth)
40
41stream = twitter_stream.statuses.filter(track=q)
42
43for tweet in stream:
44
45
46### This is the part that structures the tweet in order to recognize unicode, giving back retweets with the full version of the retweeted tweet itself
47    if 'retweeted_status' in tweet: #checking whether the tweet is a retweet
48        if 'extended_tweet' in tweet['retweeted_status']:
49            user_text = tweet['retweeted_status']['user']
50            [x.encode('utf-8') for x in user_text]
51            retweet_text = tweet['retweeted_status']['extended_tweet']['full_text']
52            [x.encode('utf-8') for x in retweet_text]
53            tweet_text = "RT " + tweet['retweeted_status']['user']['screen_name'] + tweet['retweeted_status']['extended_tweet']['full_text']
54            # else:
55            #     tweet_text = tweet['text']
56    elif 'truncated' in tweet and tweet['truncated'] == True:
57        tweet_text = tweet['extended_tweet']['full_text']
58
59#if it's none of what mentioned above just brings back the "simple" tweet. 
60    else:
61        tweet_text = tweet['text']
62
63    #What we want to write on CSV. Distinguishin what regards the tweet and what regards the User. 
64
65    # write the values to file
66    csvwriter.writerow([
67        tweet['created_at'],
68        getVal(tweet['user']['screen_name']),
69        getVal(tweet_text),
70        getVal(tweet['user']['location']),
71        getVal(tweet['source']),
72        tweet['user']['statuses_count'],
73        tweet['user']['followers_count'],
74        tweet['user']['friends_count'],
75        tweet['user']['created_at'],
76        tweet['lang'],
77        tweet['geo'],
78        ])
79
80    # this is what it will gets print out on the terminal in order to understand what's going on during the filtering. 
81    print tweet['user']['screen_name'].encode('utf-8'), tweet['text'].encode('utf-8')
82    # if 'retweeted_status' in tweet:
83    #     if 'extended_tweet' in tweet['retweeted_status']:
84    #
85    #     # print json.dumps(tweet) + "||||||||||||||"
86    #         print tweet['retweeted_status']['extended_tweet']['full_text']