· 6 years ago · Mar 01, 2019, 08:24 PM
1from itertools import islice
2import json
3from http.client import BadStatusLine
4from urllib.error import URLError
5import time
6import sys
7from sys import maxsize as maxint
8from functools import partial
9import twitter
10import networkx as nx
11import matplotlib.pyplot as plt
12
13
14def oauth_login():
15
16 CONSUMER_KEY = '36s5dMBe4X5twhpUsCuMnPW2L'
17 CONSUMER_SECRET = 'rUT1ckMAVDFg8RaiOKLt9VdPtGg7plpgcrlxDbtjuTKHWRxEWi'
18 OAUTH_TOKEN = '1092861968296361985-5T7fufQShJL7ghXL4ys2iZjeL6lZmz'
19 OAUTH_TOKEN_SECRET = 'XD5YOGl31m1PCDhKy8l7CIdvVwk7uuzyp03YNgomz0YDI'
20
21 auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
22 CONSUMER_KEY, CONSUMER_SECRET)
23
24 twitter_api = twitter.Twitter(auth=auth)
25 return twitter_api
26
27
28G = nx.Graph()
29
30twitter_api = oauth_login()
31
32
33def make_twitter_request(twitter_api_func, max_errors=10, *args, **kw):
34
35 # A nested helper function that handles common HTTPErrors. Return an updated
36 # value for wait_period if the problem is a 500 level error. Block until the
37 # rate limit is reset if it's a rate limiting issue (429 error). Returns None
38 # for 401 and 404 errors, which requires special handling by the caller.
39 def handle_twitter_http_error(e, wait_period=2, sleep_when_rate_limited=True):
40
41 if wait_period > 3600: # Seconds
42 print('Too many retries. Quitting.', file=sys.stderr)
43 raise e
44
45 # See https://developer.twitter.com/en/docs/basics/response-codes
46 # for common codes
47
48 if e.e.code == 401:
49 print('Encountered 401 Error (Not Authorized)', file=sys.stderr)
50 return None
51 elif e.e.code == 404:
52 print('Encountered 404 Error (Not Found)', file=sys.stderr)
53 return None
54 elif e.e.code == 429:
55 print('Encountered 429 Error (Rate Limit Exceeded)', file=sys.stderr)
56 if sleep_when_rate_limited:
57 print("Retrying in 15 minutes...ZzZ...", file=sys.stderr)
58 sys.stderr.flush()
59 time.sleep(60*15 + 5)
60 print('...ZzZ...Awake now and trying again.', file=sys.stderr)
61 return 2
62 else:
63 raise e # Caller must handle the rate limiting issue
64 elif e.e.code in (500, 502, 503, 504):
65 print('Encountered {0} Error. Retrying in {1} seconds' .format(
66 e.e.code, wait_period), file=sys.stderr)
67 time.sleep(wait_period)
68 wait_period *= 1.5
69 return wait_period
70 else:
71 raise e
72
73 # End of nested helper function
74
75 wait_period = 2
76 error_count = 0
77
78 while True:
79 try:
80 return twitter_api_func(*args, **kw)
81 except twitter.api.TwitterHTTPError as e:
82 error_count = 0
83 wait_period = handle_twitter_http_error(e, wait_period)
84 if wait_period is None:
85 return
86 except URLError as e:
87 error_count += 1
88 time.sleep(wait_period)
89 wait_period *= 1.5
90 print("URLError encountered. Continuing.", file=sys.stderr)
91 if error_count > max_errors:
92 print("Too many consecutive errors...bailing out.", file=sys.stderr)
93 raise
94 except BadStatusLine as e:
95 error_count += 1
96 time.sleep(wait_period)
97 wait_period *= 1.5
98 print("BadStatusLine encountered. Continuing.", file=sys.stderr)
99 if error_count > max_errors:
100 print("Too many consecutive errors...bailing out.", file=sys.stderr)
101 raise
102
103
104def get_friends_followers_ids(twitter_api, screen_name=None, user_id=None,
105 friends_limit=maxint, followers_limit=maxint):
106
107 # Must have either screen_name or user_id (logical xor)
108 assert (screen_name != None) != (
109 user_id != None), "Must have screen_name or user_id, but not both"
110
111 # See http://bit.ly/2GcjKJP and http://bit.ly/2rFz90N for details
112 # on API parameters
113
114 get_friends_ids = partial(make_twitter_request, twitter_api.friends.ids,
115 count=5000)
116 get_followers_ids = partial(make_twitter_request, twitter_api.followers.ids,
117 count=5000)
118
119 friends_ids, followers_ids = [], []
120
121 for twitter_api_func, limit, ids, label in [
122 [get_friends_ids, friends_limit, friends_ids, "friends"],
123 [get_followers_ids, followers_limit, followers_ids, "followers"]
124 ]:
125
126 if limit == 0:
127 continue
128
129 cursor = -1
130 while cursor != 0:
131
132 # Use make_twitter_request via the partially bound callable...
133 if screen_name:
134 response = twitter_api_func(
135 screen_name=screen_name, cursor=cursor)
136 else: # user_id
137 response = twitter_api_func(user_id=user_id, cursor=cursor)
138
139 if response is not None:
140 ids += response['ids']
141 cursor = response['next_cursor']
142
143 print('Fetched {0} total {1} ids for {2}'.format(
144 len(ids), label, (user_id or screen_name)), file=sys.stderr)
145
146 # XXX: You may want to store data during each iteration to provide an
147 # an additional layer of protection from exceptional circumstances
148
149 if len(ids) >= limit or response is None:
150 break
151
152 # Do something useful with the IDs, like store them to disk...
153 return friends_ids[:friends_limit], followers_ids[:followers_limit]
154
155
156def get_user_profile(twitter_api, screen_names=None, user_ids=None):
157
158 # Must have either screen_name or user_id (logical xor)
159 assert (screen_names != None) != (
160 user_ids != None), "Must have screen_names or user_ids, but not both"
161
162 items_to_info = {}
163
164 items = screen_names or user_ids
165
166 while len(items) > 0:
167
168 # Process 100 items at a time per the API specifications for /users/lookup.
169 # See http://bit.ly/2Gcjfzr for details.
170
171 items_str = ','.join([str(item) for item in items[:100]])
172 items = items[100:]
173
174 if screen_names:
175 response = make_twitter_request(twitter_api.users.lookup,
176 screen_name=items_str)
177 else: # user_ids
178 response = make_twitter_request(twitter_api.users.lookup,
179 user_id=items_str)
180
181 for user_info in response:
182 if screen_names:
183 items_to_info[user_info['screen_name']] = user_info
184 else: # user_ids
185 items_to_info[user_info['id']] = user_info
186
187 return items_to_info
188
189
190def take(n, iterable):
191 return list(islice(iterable, n))
192
193
194def crawl_followers(twitter_api, name="euchrid", followers_count=100):
195
196 queue = []
197
198 queue.append(get_user_profile(
199 twitter_api, screen_names=[name]))
200 count = 20
201 while True:
202 print(count)
203 if count == 0:
204 break
205 else:
206 user_dictionary = queue.pop(0)
207
208 for k, v in user_dictionary.items():
209 parent_node = k
210
211 followers, friends = get_friends_followers_ids(
212 twitter_api, screen_name=parent_node, friends_limit=10, followers_limit=10)
213
214 reciprical_friends = list(set(followers) & set(friends))
215
216 reciprical_user_profiles = get_user_profile(
217 twitter_api, user_ids=reciprical_friends)
218
219 sorted_dict = sorted(reciprical_user_profiles.items(),
220 key=lambda x: x[1]['followers_count'], reverse=True)
221
222 top_five = take(5, sorted_dict)
223
224 for k, v in top_five:
225
226 child_node = v.get("screen_name")
227 G.add_edge(parent_node, child_node)
228 queue.append({v.get("screen_name"): v})
229 # print(len(queue))
230 count = count - 1
231
232
233crawl_followers(twitter_api, "edmundyu1001")
234nx.draw(G, with_labels=True, font_weight='bold')
235plt.show()