Common R Functions
Last Updated 2019-06-24 by Adam Lu
Swirl
# Load Swirl
library("swirl")
# Start Swirl
swirl()
# Enter play() mode
play()
# Exit play() mode
nxt()
# Exit Swirl
bye()
Dealing with Packages
Basic commands
# Install package
install.packages("packageName")
# Load package
library(packageName)
Common Packages
# tidyverse includes the packages readr, dplyr, ggplot2 ... etc.
library(tidyverse)
# ggbeeswarm for beeswarm plots (geom_beeswarm(), etc.)
library(ggbeeswarm)
# broom for representating model data (tidy(), etc.)
library(broom)
# ggpubr for paired box plots (ggpaired(), ggpar(), etc.)
library(ggpubr)
# matlab functions (fullfile, fileparts, etc.)
library(matlab)
General
# Set working directory
setwd("pathToDir")
# List all variable names in the workspace
ls()
# Find a class of a variable
class(var)
# Remove a variable var
rm(var)
# Remove all variables in the workspace
rm(list = ls())
# Run a script
source("script.R")
Dealing with Functions
Assume myFun
is a function:
# Look up the documentation for a function
?myFun
# Print the function definition
myFun
# Examine the arguments for a function
args(myFun)
Dealing with Vectors
Assume myVec
is a vector:
# Create a vector by concatenation
c(var1, var2, var3)
# Create a sequence vector
seq(from = 0, to = 1, by = 0.25)
# Number of elements in a vector
length(myVec)
# Maximum, minimum and range of all elements of a vector
# na.rm: ignore missing entries
max(myVec)
min(myVec)
range(myVec)
# Sum of all elements of a vector
sum(myVec)
# Mean of all elements of a vector
mean(x = myVec)
# Standard deviation of all elements of a vector
sd(myVec)
# Extract all unique elements
unique(myVec)
# Extract a random sample of n elements without replacement
sample(x = myVec, size = n)
# Extract a random sample of n elements with replacement
sample(x = myVec, size = n, replace = TRUE)
Logical vectors
# Find indices that matches TRUE
which(logVec)
Dealing with Matrices
Assume myMat
is a matrix:
# Create a matrix from a vector
matrix(data = myVec, nrow = nRows, ncol = nCols)
# Number of elements in a matrix
length(myMat)
# Sum of all elements of a matrix
sum(myMat)
# Mean of all elements of a matrix
mean(x = myMat)
# Standard deviation of all elements of a matrix
sd(myMat)
# Extract all unique elements
unique(myMat)
Dealing with Data Frames
Assume myDf
is a data frame:
# Create a data frame from vectors
data.frame(myVar1 = myVec1, myVar2 = myVec2, myVar3 = myVec3)
# Write data frame to file
# row.names: whether to write row names (TRUE by default)
# col.names: whether to write column names (TRUE by default)
write.table(myDf, file = "path/to/file")
# Read data from file
# header: whether to read header from file (FALSE by default)
read.table(file = "path/to/file")
#Import and read data from csv
read_csv("myDF.csv")
# Count the number of rows in a data frame
nrow(myDf)
# Count the number of columns in a data frame
ncol(myDf)
# Extract all unique elements
unique(myDf)
# Find the range of all values in the data frame
range(myDf)
# Take a glimpse of all the variables in a data frame
glimpse(myDF)
# Apply a function to more than one group in a data frame
tapply(myDF$var1, myDF$var2, myFunction)
# Group data frame by one or more variables
group_by(var1)
Dealing with Expressions
# Create an expression
myExpr <- expression(var1 ~ var2)
# Extract all variables and function names in an expression
all.names(myExpr)
# Extract all the unique variables in an expression
all.vars(myExpr)
Dealing with Strings
Assume myStr1
, myStr2
, myStr3
are all strings:
# Paste strings together with a space in between
paste(myStr1, myStr2, myStr3)
# Paste strings together with no spaces in between (like strcat() in Matlab)
paste(myStr1, myStr2, myStr3, sep = "")
# Paste strings together with a specific separator
paste(myStr1, myStr2, myStr3, sep = "_")
# Split string(s) into list(s) based on a regular expression pattern for the separator
strsplit(x, regexp)
str_split(x, regexp)
# TODO:
substr(x, start=n1, stop=n2)
grep(pattern,x, value=FALSE, ignore.case=FALSE, fixed=FALSE)
gsub(pattern, replacement, x, ignore.case=FALSE, fixed=FALSE)
gregexpr(pattern, text, ignore.case=FALSE, perl=FALSE,
fixed=FALSE)
paste(... , sep="", collapse=NULL)
sprintf(fmt, ...)
toupper/tolower(x)
nchar(x)
File paths
# Split a file path into parts and get the directory, base name and extension
fileParts <- fileparts(fullPath)
fileDir <- fileParts$pathstr
fileBase <- fileParts$name
fileExt <- fileParts$ext
# The path parts separater (either '\' for Windows or '/' for Unix)
filesep
# Join strings with filesep
fullPath <- fullfile(fileDir, fileName)
Tidyverse strings
# Unquote a string
!!myStr
# Quote a string
quo(myStr)
Dealing with Lists
# Create list from variables (do not have to be the same length)
list(name1 = value1, name2 = value2, name3 = value3)
# Query all the names in a list
names(myList)
Randomization
# Generate 10 random numbers from the Normal(0, 1) distribution
rnorm(10)
# Generate 10 random numbers from the Normal(1.2, 3.4) distribution
rnorm(10, mean = 1.2, sd = 3.4)
# Generate 100 random numbers from the Uniform(0, 1) distribution
runif(100)
Date-Times
# Convert character strings to POSIXlt
strptime(c("20170225230000", "20170226010000"), format = "%Y%m%d%H%M%S")
Wrapper functions
# Apply the same function to a list and return a list
lapply(myList, myFunction)
# Apply the same function to a vector and return a vector
vapply(myVector, myFunction)
# Apply the same function to a list and return a list or a vector
sapply(myVector, myFunction)
# Apply the same function to multiple lists
mapply(myFunction, myList1, myList2)
# Apply the same function to a variable filtered by factors
tapply(myDf$var, myDf$factor, myFunction)
Tibbles (tidyverse)
Basics
# Pipe a tibble to a dplyr function
myDf %>% func()
# Select rows by row number
myDf %>% filter(row_number() = 3L)
# Select rows by condition
myDf %>% filter(var1 == val)
# Select columns by variable names
myDf %>% select(var1, var2)
# Select columns by omission
myDf %>% select(-colNumToOmit)
# Omit columns by variable name patterns
myDf %>% select(-starts_with("Var"))
# Select rows by position
myDF %>% slice(rowNumber)
# Add columns
myDf %>% mutate(newVar = func(oldVar))
# Make variable a categorical variable
myDf %>% mutate(newVar = factor(oldVar))
# Make variable a categorical variable
myDf %>% mutate(newVar = str_replace(oldVar, oldStr, newStr))
# Make variables strings
lapply(myDf, as.character)
# Join two tibbles together by variables, only including rows with data from the first tibble
myDf %>% left_join(myDf2, by = c("Var1", "Var2"))
# Join two tibbles together by variables, only including rows with data from the second tibble
myDf %>% right_join(myDf2, by = c("Var1", "Var2"))
# Join two tibbles together by variables, only including rows with data from both tibbles
myDf %>% inner_join(myDf2, by = c("Var1", "Var2"))
# Join two tibbles together by variables, retaining all data
myDf %>% full_join(myDf2, by = c("Var1", "Var2"))
Tidying data
# Gather multiple columns into 2 columns:
# The first "key" column has the original column name as value
# The second "value" column has retains the original value
myDf %>% gather(key = "keyVar", value = "valueVar", columns)
# Spread rows into columns TODO
myDf %>% spread
# Unite several columns into one column
myDf %>% unite("newVar", c("oldVar1", "oldVar2", "oldVar3"), sep = "_")
# Separate one column into several columns
myDf %>% separate("oldVar", c("newVar1", "newVar2", "newVar3"), sep = "_")
Plotting
Basic plotting
# Generate a color from RGB values
rgb(red, green, blue, alpha)
# Query all graphical parameters:
# col: color
# type: line style
# lwd: line width
# pch: plotting character (symbol)
# cex: magnification
# ylim: y-axis limits
?par
# Plot a vector vec
plot(vec)
# Plot connected line segments
lines(vec)
# Plot points
points(vec)
# Plot a histogram
hist(vec)
ggplot2
# Build a canvas with the data
myPlot <- ggplot(data = myDf, aes(x = varX, y = varY))
# Add a violin plot
myPlot + geom_violin()
# Add a jitter plot
myPlot + geom_jitter()
# Add a combined violin and jitter plot
myPlot + geom_violin() + geom_jitter()
# Add a beeswarm plot
myPlot + geom_beeswarm()
# Save the plot
ggsave("plotname.png")
ggpubr
# Add a paired box plot by id
ggpaired(myDF, x = varX, y = varY, id = pairedVar,..., facet.by = varZ)
Statistics
Linear Model & ANOVA
# Generate a linear model
model <- lm(formula = var1 ~ var2, data = myDf)
# Get the summary statistics of the linear model
summary(model)
Statistical Tests
# Perform a Kruskal-Wallis Test
kruskal.test(formula = var1 ~ var2, data = myDf)
# Perform a two-sample Kolmogorov-Smirnov Test
ks.test(x = myDf$var1, y = myDf$var2)
# Perform the Shapiro-Wilk test for normality
shapiro.test(myDF)
# Perform the Welch Two Sample t-test
t.test(x ~ y, data = myDF)
# Perform the Wilcoxon rank-sum test (Mann-Whitney U Test)
wilcox.test(x ~ y, data = myDF)
# Perform the Two Way Anova Interaction Effect test
aov(varX ~ varY + varZ, data = myDF)
# Perform F-test for variance
var.test(varX ~ varY, myDF, alternative = "two.sided")