Common R Functions

Last Updated 2019-06-24 by Adam Lu

Swirl

# Load Swirl
library("swirl")

# Start Swirl
swirl()

# Enter play() mode
play()

# Exit play() mode
nxt()

# Exit Swirl
bye()

Dealing with Packages

Basic commands

# Install package
install.packages("packageName")

# Load package
library(packageName)

Common Packages

# tidyverse includes the packages readr, dplyr, ggplot2 ... etc.
library(tidyverse)

# ggbeeswarm for beeswarm plots (geom_beeswarm(), etc.)
library(ggbeeswarm)

# broom for representating model data (tidy(), etc.)
library(broom)

# ggpubr for paired box plots (ggpaired(), ggpar(), etc.)
library(ggpubr)

# matlab functions (fullfile, fileparts, etc.)
library(matlab)

General

# Set working directory
setwd("pathToDir")

# List all variable names in the workspace
ls()

# Find a class of a variable
class(var)

# Remove a variable var
rm(var)

# Remove all variables in the workspace
rm(list = ls())

# Run a script
source("script.R")

Dealing with Functions

Assume myFun is a function:

# Look up the documentation for a function
?myFun

# Print the function definition
myFun

# Examine the arguments for a function
args(myFun)

Dealing with Vectors

Assume myVec is a vector:

# Create a vector by concatenation
c(var1, var2, var3)

# Create a sequence vector
seq(from = 0, to = 1, by = 0.25)

# Number of elements in a vector
length(myVec)

# Maximum, minimum and range of all elements of a vector
#	na.rm: ignore missing entries
max(myVec)
min(myVec)
range(myVec)

# Sum of all elements of a vector
sum(myVec)

# Mean of all elements of a vector
mean(x = myVec)

# Standard deviation of all elements of a vector
sd(myVec)

# Extract all unique elements
unique(myVec)

# Extract a random sample of n elements without replacement
sample(x = myVec, size = n)

# Extract a random sample of n elements with replacement
sample(x = myVec, size = n, replace = TRUE)

Logical vectors

# Find indices that matches TRUE
which(logVec)

Dealing with Matrices

Assume myMat is a matrix:

# Create a matrix from a vector
matrix(data = myVec, nrow = nRows, ncol = nCols)

# Number of elements in a matrix
length(myMat)

# Sum of all elements of a matrix
sum(myMat)

# Mean of all elements of a matrix
mean(x = myMat)

# Standard deviation of all elements of a matrix
sd(myMat)

# Extract all unique elements
unique(myMat)

Dealing with Data Frames

Assume myDf is a data frame:

# Create a data frame from vectors
data.frame(myVar1 = myVec1, myVar2 = myVec2, myVar3 = myVec3)

# Write data frame to file
#	row.names: whether to write row names (TRUE by default)
#	col.names: whether to write column names (TRUE by default)
write.table(myDf, file = "path/to/file")

# Read data from file
#	header: whether to read header from file (FALSE by default)
read.table(file = "path/to/file")

#Import and read data from csv
read_csv("myDF.csv")

# Count the number of rows in a data frame
nrow(myDf)

# Count the number of columns in a data frame
ncol(myDf)

# Extract all unique elements
unique(myDf)

# Find the range of all values in the data frame
range(myDf)

# Take a glimpse of all the variables in a data frame
glimpse(myDF)

# Apply a function to more than one group in a data frame
tapply(myDF$var1, myDF$var2, myFunction)
       
# Group data frame by one or more variables
group_by(var1)

Dealing with Expressions

# Create an expression
myExpr <- expression(var1 ~ var2)

# Extract all variables and function names in an expression
all.names(myExpr)

# Extract all the unique variables in an expression
all.vars(myExpr)

Dealing with Strings

Assume myStr1, myStr2, myStr3 are all strings:

# Paste strings together with a space in between
paste(myStr1, myStr2, myStr3)

# Paste strings together with no spaces in between (like strcat() in Matlab)
paste(myStr1, myStr2, myStr3, sep = "")

# Paste strings together with a specific separator
paste(myStr1, myStr2, myStr3, sep = "_")

# Split string(s) into list(s) based on a regular expression pattern for the separator
strsplit(x, regexp)
str_split(x, regexp)

# TODO:
substr(x, start=n1, stop=n2)
grep(pattern,x, value=FALSE, ignore.case=FALSE, fixed=FALSE)
gsub(pattern, replacement, x, ignore.case=FALSE, fixed=FALSE)
gregexpr(pattern, text, ignore.case=FALSE, perl=FALSE,
fixed=FALSE)
paste(... , sep="", collapse=NULL)
sprintf(fmt, ...)
toupper/tolower(x)
nchar(x)

File paths

# Split a file path into parts and get the directory, base name and extension
fileParts <- fileparts(fullPath)
fileDir <- fileParts$pathstr
fileBase <- fileParts$name
fileExt <- fileParts$ext

# The path parts separater (either '\' for Windows or '/' for Unix)
filesep

# Join strings with filesep
fullPath <- fullfile(fileDir, fileName)

Tidyverse strings

# Unquote a string
!!myStr

# Quote a string
quo(myStr)

Dealing with Lists

# Create list from variables (do not have to be the same length)
list(name1 = value1, name2 = value2, name3 = value3)

# Query all the names in a list
names(myList)

Randomization

# Generate 10 random numbers from the Normal(0, 1) distribution
rnorm(10)

# Generate 10 random numbers from the Normal(1.2, 3.4) distribution
rnorm(10, mean = 1.2, sd = 3.4)

# Generate 100 random numbers from the Uniform(0, 1) distribution
runif(100)

Date-Times

# Convert character strings to POSIXlt
strptime(c("20170225230000", "20170226010000"), format = "%Y%m%d%H%M%S")

Wrapper functions

# Apply the same function to a list and return a list
lapply(myList, myFunction)

# Apply the same function to a vector and return a vector
vapply(myVector, myFunction)

# Apply the same function to a list and return a list or a vector
sapply(myVector, myFunction)

# Apply the same function to multiple lists
mapply(myFunction, myList1, myList2)

# Apply the same function to a variable filtered by factors
tapply(myDf$var, myDf$factor, myFunction)

Tibbles (tidyverse)

Basics

# Pipe a tibble to a dplyr function
myDf %>% func()

# Select rows by row number
myDf %>% filter(row_number() = 3L)

# Select rows by condition
myDf %>% filter(var1 == val)

# Select columns by variable names
myDf %>% select(var1, var2)

# Select columns by omission
myDf %>% select(-colNumToOmit)

# Omit columns by variable name patterns
myDf %>% select(-starts_with("Var"))

# Select rows by position
myDF %>% slice(rowNumber)

# Add columns
myDf %>% mutate(newVar = func(oldVar))

# Make variable a categorical variable
myDf %>% mutate(newVar = factor(oldVar))

# Make variable a categorical variable
myDf %>% mutate(newVar = str_replace(oldVar, oldStr, newStr))

# Make variables strings
lapply(myDf, as.character)

# Join two tibbles together by variables, only including rows with data from the first tibble
myDf %>% left_join(myDf2, by = c("Var1", "Var2"))

# Join two tibbles together by variables, only including rows with data from the second tibble
myDf %>% right_join(myDf2, by = c("Var1", "Var2"))

# Join two tibbles together by variables, only including rows with data from both tibbles
myDf %>% inner_join(myDf2, by = c("Var1", "Var2"))

# Join two tibbles together by variables, retaining all data
myDf %>% full_join(myDf2, by = c("Var1", "Var2"))

Tidying data

# Gather multiple columns into 2 columns: 
#	The first "key" column has the original column name as value
#	The second "value" column has retains the original value
myDf %>% gather(key = "keyVar", value = "valueVar", columns)

# Spread rows into columns TODO
myDf %>% spread

# Unite several columns into one column
myDf %>% unite("newVar", c("oldVar1", "oldVar2", "oldVar3"), sep = "_")

# Separate one column into several columns
myDf %>% separate("oldVar", c("newVar1", "newVar2", "newVar3"), sep = "_")

Plotting

Basic plotting

# Generate a color from RGB values
rgb(red, green, blue, alpha)

# Query all graphical parameters:
#	col: color
#	type: line style
#	lwd: line width
#   pch: plotting character (symbol)
#   cex: magnification
#   ylim: y-axis limits
?par

# Plot a vector vec
plot(vec)

# Plot connected line segments
lines(vec)

# Plot points
points(vec)

# Plot a histogram
hist(vec)

ggplot2

# Build a canvas with the data
myPlot <- ggplot(data = myDf, aes(x = varX, y = varY))

# Add a violin plot
myPlot + geom_violin()

# Add a jitter plot
myPlot + geom_jitter()

# Add a combined violin and jitter plot
myPlot + geom_violin() + geom_jitter()

# Add a beeswarm plot
myPlot + geom_beeswarm()

# Save the plot
ggsave("plotname.png")

ggpubr

# Add a paired box plot by id
ggpaired(myDF, x = varX, y = varY, id = pairedVar,..., facet.by = varZ)

Statistics

Linear Model & ANOVA

# Generate a linear model
model <- lm(formula = var1 ~ var2, data = myDf)

# Get the summary statistics of the linear model
summary(model)

Statistical Tests

# Perform a Kruskal-Wallis Test
kruskal.test(formula = var1 ~ var2, data = myDf)

# Perform a two-sample Kolmogorov-Smirnov Test
ks.test(x = myDf$var1, y = myDf$var2)

# Perform the Shapiro-Wilk test for normality
shapiro.test(myDF)

# Perform the Welch Two Sample t-test
t.test(x ~ y, data = myDF)

# Perform the Wilcoxon rank-sum test (Mann-Whitney U Test)
wilcox.test(x ~ y, data = myDF)

# Perform the Two Way Anova Interaction Effect test
aov(varX ~ varY + varZ, data = myDF)

# Perform F-test for variance
var.test(varX ~ varY, myDF, alternative = "two.sided")

Settings_Anaconda

Settings and notes for packages in Anaconda

Common R Functions

Swirl

Dealing with Packages

Basic commands

Common Packages

General

Dealing with Functions

Dealing with Vectors

Logical vectors

Dealing with Matrices

Dealing with Data Frames

Dealing with Expressions

Dealing with Strings

File paths

Tidyverse strings

Dealing with Lists

Randomization

Date-Times

Wrapper functions

Tibbles (tidyverse)

Basics

Tidying data

Plotting

Basic plotting

ggplot2

ggpubr

Statistics

Linear Model & ANOVA

Statistical Tests