· 9 years ago · Sep 02, 2016, 05:18 AM
1import twitter
2import json
3from prettytable import PrettyTable
4from collections import Counter
5
6
7def oauth_login():
8 CONSUMER_KEY = '0pJAid2aqhCqcL6dKvPAerp8b'
9 CONSUMER_SECRET = 'rfrb0fbGgCvpf1uttRx7OsrBCT7p8DPWuB8WpeLJ9LfelJW8sp'
10 OAUTH_TOKEN = '15648766-lxT6QBxMgp69bNHJdb6FI4KqporqqvOyd4U5t4qD7'
11 OAUTH_TOKEN_SECRET = 'KYdm5roVu2xMlo5rMG61LGHwYBRL0Gxi5IkXMRZLsuJR2'
12
13 auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET, CONSUMER_KEY, CONSUMER_SECRET)
14
15 twitter_api = twitter.Twitter(auth=auth)
16 return twitter_api
17
18# Language = English
19# Result type = recent or popular
20# count = how many tweets to return
21# geocode = “latitude,longitude,radiusâ€, for example, “"37.781157,-122.398720,10mi"â€
22# will limit the results for tweets within 10 miles of San Francisco
23# Other parameters not used but available: until, since_id, max_id
24
25def twitter_search(twitter_api, q, max_results=200, **kw):
26 search_results = twitter_api.search.tweets(q=q, count=100, lang='en', result_type='recent', geocode= "37.781157,-122.398720,10mi", **kw)
27 statuses = search_results['statuses']
28
29 max_results = min(100, max_results)
30
31 for _ in range(10):
32 try:
33 next_results = search_results['search_metadata']['next_results']
34 except KeyError as e:
35 break
36
37 kwargs = dict([ kv.split('=')
38 for kv in next_results[1:].split("&") ])
39
40 search_results = twitter_api.search.tweets(**kwargs)
41 statuses += search_results['statuses']
42
43 if len(statuses) > max_results:
44 break
45
46 return statuses
47
48def find_popular_tweets(twitter_api, statuses, retweet_threshold=30):
49
50 return [ status
51 for status in statuses
52 if status['retweet_count'] > retweet_threshold ]
53
54def extract_tweet_entities(statuses):
55 if len(statuses) == 0:
56 return [], [], [], []
57
58 screen_names = [ user_mention['screen_name']
59 for status in statuses
60 for user_mention in status['entities']['user_mentions'] ]
61
62 hashtags = [ hashtag['text']
63 for status in statuses
64 for hashtag in status['entities']['hashtags'] ]
65
66 urls = [ url['expanded_url']
67 for status in statuses
68 for url in status['entities']['urls'] ]
69
70 return screen_names, hashtags, urls
71
72def get_common_tweet_entities(statuses, entity_threshold=3):
73 tweet_entities = [ e
74 for status in statuses
75 for entity_type in extract_tweet_entities([status])
76 for e in entity_type
77 ]
78 c = Counter(tweet_entities).most_common()
79
80 return [ (k,v)
81 for (k,v) in c
82 if v >= entity_threshold
83 ]
84
85def analyze_tweet_content(statuses):
86 if len(statuses) == 0:
87 print ("No statuses to analyze")
88 return
89
90 def average_words(statuses):
91 total_words = sum([ len(s.split()) for s in statuses ])
92 return 1.0*total_words/len(statuses)
93
94 status_texts = [ status['text'] for status in statuses ]
95 screen_names, hashtags, urls, _ = extract_tweet_entities(statuses)
96
97 words = [ w
98 for t in status_texts
99 for w in t.split() ]
100
101 print ("Averge words per tweet:", average_words(status_texts))
102
103
104def analyze_favorites(twitter_api, screen_name, entity_threshold=2):
105 favs = twitter_api.favorites.list(screen_name=screen_name, count=200)
106 print ("Number of favorites:", len(favs))
107
108 common_entities = get_common_tweet_entities(favs, entity_threshold=entity_threshold)
109
110 pt = PrettyTable(field_names=['Entity', 'Count'])
111 [ pt.add_row(kv) for kv in common_entities ]
112 pt.align['Entity'], pt.align['Count'] = 'l', 'r'
113
114 print ()
115 print ("Common entities in favorites...")
116 print (pt)
117 print ()
118 print ("Some statistics about the content of the favorities...")
119 print (analyze_tweet_content(favs))
120
121
122
123# *************************************************************
124
125
126twitter_api = oauth_login()
127q = "surf"
128
129search_results = twitter_search(twitter_api, q, max_results=20)
130popular_tweets = find_popular_tweets(twitter_api, search_results)
131
132print ("************POPULAR TWEETS*********************")
133print ("************(TWEET, RE_TWEET COUNT*************")
134print ("")
135for tweet in popular_tweets:
136 print (tweet['text'].encode('utf8'), tweet['retweet_count'])
137
138
139statuses = twitter_search(twitter_api, q)
140screen_names, hashtags, urls = extract_tweet_entities(statuses)
141
142
143print ("************Tweet Entities*********************")
144print ("***********************************************")
145print ("")
146print ("**************TOP 50 HANDLES*******************")
147# json.dumps([dict(mpn=pn) for pn in lst])
148print (json.dumps(screen_names[0:50], indent=1))
149print ("")
150print ("**************TOP 50 HASHTAGS*****************")
151print(json.dumps(hashtags[0:50], indent=1))
152print ("")
153print ("**************TOP 50 URLs*********************")
154print(json.dumps(urls[0:50], indent=1))
155print ("")
156
157
158
159common_entities = get_common_tweet_entities(search_results)
160
161print ("*****************************************************")
162print ("************Most Common Entities*********************")
163print (common_entities)
164print ("*****************************************************")
165print ("*****************************************************")
166
167pt = PrettyTable(field_names=['Entity', 'Count'])
168[ pt.add_row(kv) for kv in common_entities ]
169pt.align['Entity'], pt.align['Count'] = 'l', 'r'
170
171print (pt)
172
173# Analyze a user's favorit tweets. Insert user handle
174analyze_tweet_content(search_results)
175print ("*****************************************************")
176print ("*****************************************************")
177analyze_favorites(twitter_api, "theebecky")