· 5 years ago · Nov 20, 2020, 11:00 AM
1# stdlib libraries
2from collections import Counter
3import json
4
5# third-party libraries (make sure you install them)
6import matplotlib.pyplot as plt
7import numpy as np
8import praw
9from loguru import logger
10from tqdm import tqdm
11
12with open("api_credentials.json") as cred_reader:
13 credentials = json.load(cred_reader)
14# The credentials JSON file should look like this:
15# {
16# "user_agent": "Trout_DB",
17# "client_id": "your_clientID",
18# "client_secret": "your_secret_key"
19# }
20# For information on how to get an API key (client ID and client secret), see:
21# https://github.com/reddit-archive/reddit/wiki/OAuth2-Quick-Start-Example
22
23reddit = praw.Reddit(**credentials)
24
25# find the subreddit
26nnn = reddit.subreddit("nonutnovember")
27
28def get_user_flairs(elem, seen_ids=set()):
29 """Recursively get user flairs as dictionary.
30
31 Can take a comment, a MoreComments or a CommentForest as argument.
32 """
33 user_flairs = {}
34 if isinstance(elem, praw.models.reddit.comment.Comment):
35 # logger.debug(f"Comment {elem.id} by {elem.author.name}")
36 if elem.id in seen_ids:
37 return user_flairs
38 seen_ids.add(elem.id)
39 flair = elem.author_flair_text
40 if flair is not None and elem.author.name not in user_flairs:
41 user_flairs[elem.author.name] = flair
42 # logger.debug(f"User {elem.author.name} has flair {flair}")
43 # add the users in the replies of this comment
44 user_flairs.update(get_user_flairs(elem.replies, seen_ids))
45 elif isinstance(elem, praw.models.reddit.more.MoreComments):
46 if elem.id in seen_ids:
47 return user_flairs
48 seen_ids.add(elem.id)
49 # logger.info(f"Expanding MoreComments at {elem.id}")
50 for com in elem.comments():
51 user_flairs.update(get_user_flairs(com, seen_ids))
52 elif isinstance(elem, praw.models.comment_forest.CommentForest):
53 comlist = elem.list()
54 if not comlist:
55 return user_flairs
56 # logger.info(f"Expanding CommentForest, first comment: {comlist[0].id}")
57 for com in comlist:
58 user_flairs.update(get_user_flairs(com, seen_ids))
59 else:
60 print(f"Unknown type: {type(elem)}")
61 return user_flairs
62
63# read all flairs for all commenters on the top 100 posts for this month
64# and update the user_flairs dictionary for each post
65user_flairs = {}
66for post in tqdm(nnn.top("month", limit=100),
67 desc="Reading comments on posts",
68 total=100,
69 ascii=".#"):
70 user_flairs.update(get_user_flairs(post.comments))
71
72# now count the occurrences of flairs
73ufc = Counter(user_flairs.values())
74
75# get the number of counts per day as {int: int} mapping
76# so it's easy to plot
77daycounts = {}
78for flair, count in ufc.most_common():
79 if flair.startswith("OUT November"):
80 # A flair looks like this: "OUT November 20th | Silver NoNutter"
81 # Here, flair is fist split by whitespace,
82 # then the third word (index 2) is extracted (e.g. 20th)
83 # then the suffix (e.g. th) is stripped,
84 # and finally it's converted to an int
85 day = int(flair.split()[2].strip("stndrh"))
86 daycounts[day] = count
87
88days = np.arange(1, 31)
89counts = np.array([daycounts.get(day, 0) for day in days])
90
91fig, ax = plt.subplots()
92ax.bar(days, counts)
93ax.set_xticks(days)
94
95# create labels in sensible places
96x_spacing = 0.2
97y_spacing = 1
98for day, count in zip(days, counts):
99 ax.text(day-x_spacing, count+y_spacing, count)
100plt.show()