· 6 years ago · Nov 29, 2019, 02:42 PM
1if(!require("httpuv"))install.packages("httpuv")
2if(!require("httr"))install.packages("httr")
3if(!require("ggplot2"))install.packages("ggplot2")
4if(!require("plotly"))install.packages("plotly")
5if(!require("devtools"))install.packages("devtools")
6
7if(!require("jsonlite"))install.packages("jsonlite")
8
9
10# Can be github, linkedin etc depending on application
11oauth_endpoints("github")
12
13# Change based on what you
14myapp <- oauth_app(appname = "SOFTWARE_ENGINEERING_GITHUB_API",
15 key = "f9e7cd4eb70fcd9b1829",
16 secret = "31a12d411be12b805828f1912e39ab405bb2601e")
17
18# Get OAuth credentials
19github_token <- oauth2.0_token(oauth_endpoints("github"), myapp)
20
21# Use API
22gtoken <- config(token = github_token)
23req <- GET("https://api.github.com/users/jtleek/repos", gtoken)
24
25# Take action on http error
26stop_for_status(req)
27
28# Extract content from a request
29json1 = content(req)
30
31# Convert to a data.frame
32gitDF = jsonlite::fromJSON(jsonlite::toJSON(json1))
33
34# Subset data.frame
35gitDF[gitDF$full_name == "jtleek/datasharing", "created_at"]
36
37
38
39#Interrogate the Github API to extract data from my own github account and summarise
40
41#Gets my data
42myData = fromJSON("https://api.github.com/users/cianrellis")
43
44#Displays number of followers
45myData$followers
46
47followers = fromJSON("https://api.github.com/users/cianrellis/followers")
48followers$login #Gives user names of all my followers
49
50myData$following #Displays the number of people I am following
51
52following = fromJSON("https://api.github.com/users/cianrellis/following")
53following$login #Gives the names of all the people I am following
54
55myData$public_repos #Displays the number of repositories I have
56
57repos = fromJSON("https://api.github.com/users/cianrellis/repos")
58repos$name #Details of the names of my public repositories
59repos$created_at #Gives details of the date the repositories were created
60repos$full_name #gives names of repositories
61
62#Used account of Sebastien Eustace to produce plots, one of the most popular developers on Github.
63#Used instead of my account as his account would produce more accurate results.
64#He is also the most influential user that I am following.
65#Big sample size produces better results.
66#Username is sdispater.
67
68#Began to interrogate Sebastien Eustace's account to produce graphs, by first looking at his followers.
69myData = GET("https://api.github.com/users/fabpot/followers?per_page=100;", gtoken)
70stop_for_status(myData)
71extract = content(myData)
72#Converts into dataframe
73githubDB = jsonlite::fromJSON(jsonlite::toJSON(extract))
74githubDB$login
75
76#Retrieve a list of usernames
77id = githubDB$login
78user_ids = c(id)
79
80#Create an empty vector and data.frame
81users = c()
82usersDB = data.frame(
83 username = integer(),
84 following = integer(),
85 followers = integer(),
86 repos = integer(),
87 dateCreated = integer()
88)
89
90#Loops through users and adds to list
91for(i in 1:length(user_ids))
92{
93
94 followingURL = paste("https://api.github.com/users/", user_ids[i], "/following", sep = "")
95 followingRequest = GET(followingURL, gtoken)
96 followingContent = content(followingRequest)
97
98 #Does not add users if they have no followers
99 if(length(followingContent) == 0)
100 {
101 next
102 }
103
104 followingDF = jsonlite::fromJSON(jsonlite::toJSON(followingContent))
105 followingLogin = followingDF$login
106
107 #Loop through 'following' users
108 for (j in 1:length(followingLogin))
109 {
110 #Check for duplicate users
111 if (is.element(followingLogin[j], users) == FALSE)
112 {
113 #Adds user to the current list
114 users[length(users) + 1] = followingLogin[j]
115
116 #Obtain information from each user
117 followingUrl2 = paste("https://api.github.com/users/", followingLogin[j], sep = "")
118 following2 = GET(followingUrl2, gtoken)
119 followingContent2 = content(following2)
120 followingDF2 = jsonlite::fromJSON(jsonlite::toJSON(followingContent2))
121
122 #Retrieves who user is following
123 followingNumber = followingDF2$following
124
125 #Retrieves users followers
126 followersNumber = followingDF2$followers
127
128 #Retrieves how many repository the user has
129 reposNumber = followingDF2$public_repos
130
131 #Retrieve year which each user joined Github
132 yearCreated = substr(followingDF2$created_at, start = 1, stop = 4)
133
134 #Add users data to a new row in dataframe
135 usersDB[nrow(usersDB) + 1, ] = c(followingLogin[j], followingNumber, followersNumber, reposNumber, yearCreated)
136
137 }
138 next
139 }
140 #Stop when there are more than 150 users
141 if(length(users) > 150)
142 {
143 break
144 }
145 next
146}
147
148#Created link to plotly which creates online interactive graphs.
149Sys.setenv("plotly_username"="cianrellis")
150Sys.setenv("plotly_api_key"="IIXbCsYumDNJzMl0IBAi")
151
152#Plot one graphs repositories vs followers by year.
153#Takes into account 150 of Sebastien Eustace's followers.
154#The data is represented by a scatter plot.
155#X-axis displays 'repositories' which shows the no. of repositories per user.
156#Y-axis displays 'followers' which shows the no. of followers of each each of Sebastien Eustace's followers.
157plot1 = plot_ly(data = usersDB, x = ~repos, y = ~followers, text = ~paste("Followers: ", followers, "<br>Repositories: ", repos, "<br>Date Created:", dateCreated), color = ~dateCreated)
158plot1
159#Sends graph to plotly
160api_create(plot1, filename = "Repositories vs Followers")
161#Plot can be viewed on plotly for more interactive visualisation of the data: https://plot.ly/~berryd1/1/#/
162
163#Plot two graphs following vs followers by year.
164#Takes into account 150 of Sebastien Eustace's followers.
165#The data is represented by a scatter plot.
166#X-axis displays 'following' which shows the no. of users followed by each of Sebastien's followers.
167#Y-axis displays 'followers' which shows the no. of followers of each of Sebastien Eustace's followers.
168plot2 = plot_ly(data = usersDB, x = ~following, y = ~followers, text = ~paste("Followers: ", followers, "<br>Following: ", following), color = ~dateCreated)
169plot2
170#Sends graph to plotly
171api_create(plot2, filename = "Following vs Followers")
172#Plot can be viewed on plotly for more interactive visualisation of the data: https://plot.ly/~berryd1/3/
173
174#Below code is to produce plot 3.
175#Graph the 10 most popular languages used by Sebastien Eustace's 150 followers.
176#Same 150 users from two previous plots are used.
177languages = c()
178
179for (i in 1:length(users))
180{
181 RepositoriesUrl = paste("https://api.github.com/users/", users[i], "/repos", sep = "")
182 Repositories = GET(RepositoriesUrl, gtoken)
183 RepositoriesContent = content(Repositories)
184 RepositoriesDF = jsonlite::fromJSON(jsonlite::toJSON(RepositoriesContent))
185 RepositoriesNames = RepositoriesDF$name
186
187 #Loop through all the repositories of an individual user
188 for (j in 1: length(RepositoriesNames))
189 {
190 #Find all repositories and save in data frame
191 RepositoriesUrl2 = paste("https://api.github.com/repos/", users[i], "/", RepositoriesNames[j], sep = "")
192 Repositories2 = GET(RepositoriesUrl2, gtoken)
193 RepositoriesContent2 = content(Repositories2)
194 RepositoriesDF2 = jsonlite::fromJSON(jsonlite::toJSON(RepositoriesContent2))
195 language = RepositoriesDF2$language
196
197 #Removes repositories containing no specific languages
198 if (length(language) != 0 && language != "<NA>")
199 {
200 languages[length(languages)+1] = language
201 }
202 next
203 }
204 next
205}
206
207#Puts 10 most popular languages in table
208allLanguages = sort(table(languages), increasing=TRUE)
209top10Languages = allLanguages[(length(allLanguages)-9):length(allLanguages)]
210
211#Converts to dataframe
212languageDF = as.data.frame(top10Languages)
213
214#Plot three shows the 10 most popular languages used by Sebastien Eustace's 150 followers.
215#The data is represented by a bar chart.
216#X-axis displays 'languages'.
217#Y-axis displays 'number of users'.
218#Ruby is the most popular, followed by JavaScript.
219#C and Rust were determined to be the least popular languages.
220plot3 = plot_ly(data = languageDF, x = languageDF$languages, y = languageDF$Freq, type = "bar")
221plot3
222
223Sys.setenv("plotly_username"="cianrellis")
224Sys.setenv("plotly_api_key"="IIXbCsYumDNJzMl0IBAi")
225#Sends graph to plotly
226api_create(plot3, filename = "10 Most Popular Languages")
227#Plot can be viewed on plotly for more interactive visualisation of the data: https://plot.ly/~cianrellis/5/#/