· 6 years ago · Nov 25, 2019, 09:14 AM
1---
2title: "R for Ryan"
3output: html_notebook
4---
5
6```{r}
7print('Hello world!')
8```
9
10```{r}
11x <- 1
12paste("x is", x)
13print(x);
14sprintf("x is %d", x)
15```
16
17```{r}
18y <- 2
19(x+y)
20```
21
22Data types
23
24```{r}
25num_a <- 10
26class(num_a)
27```
28
29Data Structures
30
31```{r}
32(myVector <- c(1,2,3,4,5,6))
33```
34
35```{r}
36class(c())
37class(numeric())
38class(character())
39```
40
41```{r}
42(myMatrix <- matrix(myVector, nrow=2))
43(myMatrix_2 <- matrix(myVector, ncol=2))
44(myMatrix_3 <- matrix(myVector, ncol=2, byrow=TRUE))
45```
46
47Dataframes
48
49```{r}
50name <- c("Dustin", "Christy", "David")
51age <- c(24, 20, 30)
52student <- c(FALSE, TRUE, FALSE)
53
54df_students <- data.frame(name, age, student)
55class(df_students)
56
57View(df_students)
58```
59
60Slicing and Dicing
61
62```{r}
63(myMatrix_2[1,1])
64(myMatrix_2[,1])
65(myMatrix_2[1,])
66```
67
68```{r}
69df_students[1]
70```
71
72```{r}
73df_students$name
74```
75
76```{r}
77df_students[c(1,3)]
78```
79
80```{r}
81x <- c(1:6)
82y <- matrix(x)
83z <- "Hi"
84(my_list <- list(x=x, y, z))
85my_list[1]
86my_list[[1]]
87my_list$x
88```
89
90```{r}
91my_list[[1]][2]
92```
93
94
95
96CONTROL & FLOW
97
98Conditionals
99
100```{r}
101check_var <- 5
102(check_var == 6)
103if (check_var == 5) {
104 print("variable is 5")
105} else if (check_var == 6) {
106 print("variable is 6")
107} else {
108 # Catch all
109 print("this is any other thing")
110}
111```
112
113
114
115LOOPS
116
117```{r}
118for (i in 2015:2019) {
119
120}
121```
122
123```{r}
124for (n in name) {
125 print(n)
126}
127
128c(1:length(name))
129```
130
131While
132
133```{r}
134counter <- 0
135
136while (counter < 5) {
137 print(counter)
138 counter <- counter + 1
139}
140```
141
142Next, Break
143
144```{r}
145n = 0
146while (n < 10) {
147 cat("Loop", n, sep="\n") # Another function to join strings and print them
148 n <- n + 1
149
150 if (n == 5) {
151 cat("Breaking at loop", n, sep="\n")
152 break
153 }
154}
155```
156
157```{r}
158n = 0
159while (n < 10) {
160 cat("Loop", n, sep="\n") # Another function to join strings and print them
161 n <- n + 1
162
163 if (n == 5) {
164 cat("Skipping loop", n, sep="\n")
165 next
166 }
167}
168```
169
170name_of_function <- function(parameter) {
171 ...
172}
173
174```{r}
175powerFunc <- function(base, exponent) {
176 return (base ** exponent)
177}
178
179powerFunc(2,3)
180```
181
182Strings
183
184```{r}
185myString <- "I am in NUS!"
186class(myString)
187
188strsplit(myString, ' ')
189```
190
191```{r}
192name <- "joe"
193domain <- "gmail.com"
194paste(name, domain, sep='@')
195```
196
197
198
199FACTORS
200
201Numerical vs Categorical Data
202
203```{r}
204sizes <- factor(c("S", "L", "S", "M", "L", "L", "S"), levels=c("S", "M", "L"))
205```
206
207
208PACKAGES
209
210```{r}
211library(ggplot2)
212
213```
214
215
216WORKSPACE
217
218```{r}
219getwd()
220dir()
221
222for (file in dir()) {
223 print(file)
224}
225```
226
227
228READING FILES
229
230```{r}
231dir()
232```
233
234```{r}
235resale_df <- read.csv("resale-sample.csv")
236head(resale_df)
237tail(resale_df)
238```
239
240```{r}
241# install.packages("readxl")
242library(readxl)
243data_excel <- read_xlsx("Sales 2016.xlsx", 1)
244head(data_excel)
245```
246
247```{r}
248#install.packages("rvest")
249library("rvest")
250```
251
252```{r}
253lego_movie_url <- "https://www.imdb.com/title/tt1490017/"
254lego_movie_page <- read_html(lego_movie_url)
255# lego_movie_page
256
257lego_movie_rating <- html_nodes(lego_movie_page, "strong span")
258lego_movie_rating <- html_text(lego_movie_rating)
259lego_movie_rating <- as.numeric(lego_movie_rating)
260
261lego_movie_rating
262```
263
264```{r}
265#install.packages("magrittr")
266library("magrittr")
267
268x <- c(1, 2, 3)
269
270(log_x <- log(x))
271(round_log_x <- round(log_x, 2))
272
273# Shortcut for %>% is Ctrl + Shift + M
274x %>% log() %>% round(2)
275```
276
277```{r}
278lego_movie_cast <- lego_movie_page %>% html_nodes(".primary_photo+td a") %>% html_text()
279lego_movie_cast
280```
281
282```{r}
283wiki_url <- "https://en.wikipedia.org/wiki/Demographics_of_Singapore"
284wiki_page <- read_html(wiki_url)
285
286xpath <- '//*[@id="mw-content-text"]/div/table[5]'
287wiki_table <- html_nodes(wiki_page, xpath=xpath)
288wiki_table_clean <- html_table(wiki_table, fill=TRUE)
289wiki_table_clean
290
291```
292
293
294
295
296Exercise
297
298```{r}
299books_url <- "http://books.toscrape.com/"
300books_page <- read_html(books_url)
301
302books_title <- html_attr(html_nodes(books_page, ".product_pod h3 a"), "title")
303books_price <- as.numeric(substring(html_text(html_nodes(books_page, ".product_pod .product_price .price_color")), 2))
304
305books_df <- data.frame(books_title, books_price)
306colnames(books_df) <- c("Title", "Price")
307summary(books_df)
308
309write.csv(books_df, file="./books.csv")
310```
311
312```{r}
313gnews <- read_html("https://news.google.com/?hl=en-SG&gl=SG&ceid=SG:en")
314gnews2 <- html_text(html_nodes(gnews, "article h3"))
315gnews2
316```
317
318
319
320Financial Data
321
322```{r}
323#install.packages("quantmod")
324library("quantmod")
325
326AAPL <- getSymbols.yahoo('AAPL', auto.assign=FALSE, from='2019-01-01')
327head(AAPL)
328
329# getSymbols.oanda('USD/SGD', , auto.assign=FALSE, from='2019-01-01')
330```
331
332```{r}
333
334library(httr)
335get_json <- GET("https://data.gov.sg/api/action/datastore_search?resource_id=42ff9cfe-abe5-4b54-beda-c88f9bb438ee&limit=50")
336
337get_json_text <- content(get_json, 'text')
338
339#install.packages("jsonlite")
340library("jsonlite")
341
342resale_df <- fromJSON(get_json_text, flatten=TRUE)
343resale_df
344
345```
346
347Cleaning Data
348
349```{r}
350iris_df <- read.csv("http://bit.ly/dirty_iris")
351summary(iris_df)
352
353# Negate so that TRUE indicates columns I want
354!names(iris_df) %in% c("X")
355
356#dataframe[rows, columns]
357iris_df_dirty <- iris_df[, !names(iris_df) %in% c("X")]
358
359colSums(is.na(iris_df_dirty))
360
361levels(iris_df_dirty$Species)
362```
363
364```{r}
365# Data cleaning libraries
366library(plyr)
367library(dplyr)
368
369iris_df_dirty$Species.Clean <- revalue(iris_df_dirty$Species, c("Setosa"="setosa", "SETOSA"="setosa", "Versicolor"="versicolor", "VERSICOLOR"="versicolor", "VIRGINICA"="virginica"))
370
371levels(iris_df_dirty$Species.Clean)
372
373iris_df_dirty
374```
375
376```{r}
377#filter by dataframe[row, columns]
378iris_avg_sepal_length <- iris_df_dirty[!is.na(iris_df_dirty$Sepal.Length),] %>% group_by(Species.Clean) %>% summarise(Species.Mean.Sepal.Length=mean(Sepal.Length))
379
380sepal_length_ident_index <- with(iris_df_dirty, match(Species.Clean, iris_avg_sepal_length$Species.Clean))
381
382iris_df_dirty$Sepal.Length.Clean <- iris_df_dirty$Sepal.Length
383
384iris_df_dirty$Sepal.Length.Clean <- with(iris_df_dirty, ifelse(is.na(iris_df_dirty$Sepal.Length), iris_avg_sepal_length$Species.Mean.Sepal.Length[sepal_length_ident_index], Sepal.Length))
385
386iris_df_dirty
387
388```