View on GitHub

Settings_Anaconda

Settings and notes for packages in Anaconda

Common R Functions

Last Updated 2019-06-24 by Adam Lu


Swirl

# Load Swirl
library("swirl")

# Start Swirl
swirl()

# Enter play() mode
play()

# Exit play() mode
nxt()

# Exit Swirl
bye()

Dealing with Packages

Basic commands
# Install package
install.packages("packageName")

# Load package
library(packageName)
Common Packages
# tidyverse includes the packages readr, dplyr, ggplot2 ... etc.
library(tidyverse)

# ggbeeswarm for beeswarm plots (geom_beeswarm(), etc.)
library(ggbeeswarm)

# broom for representating model data (tidy(), etc.)
library(broom)

# ggpubr for paired box plots (ggpaired(), ggpar(), etc.)
library(ggpubr)

# matlab functions (fullfile, fileparts, etc.)
library(matlab)

General

# Set working directory
setwd("pathToDir")

# List all variable names in the workspace
ls()

# Find a class of a variable
class(var)

# Remove a variable var
rm(var)

# Remove all variables in the workspace
rm(list = ls())

# Run a script
source("script.R")

Dealing with Functions

Assume myFun is a function:

# Look up the documentation for a function
?myFun

# Print the function definition
myFun

# Examine the arguments for a function
args(myFun)

Dealing with Vectors

Assume myVec is a vector:

# Create a vector by concatenation
c(var1, var2, var3)

# Create a sequence vector
seq(from = 0, to = 1, by = 0.25)

# Number of elements in a vector
length(myVec)

# Maximum, minimum and range of all elements of a vector
#	na.rm: ignore missing entries
max(myVec)
min(myVec)
range(myVec)

# Sum of all elements of a vector
sum(myVec)

# Mean of all elements of a vector
mean(x = myVec)

# Standard deviation of all elements of a vector
sd(myVec)

# Extract all unique elements
unique(myVec)

# Extract a random sample of n elements without replacement
sample(x = myVec, size = n)

# Extract a random sample of n elements with replacement
sample(x = myVec, size = n, replace = TRUE)
Logical vectors
# Find indices that matches TRUE
which(logVec)

Dealing with Matrices

Assume myMat is a matrix:

# Create a matrix from a vector
matrix(data = myVec, nrow = nRows, ncol = nCols)

# Number of elements in a matrix
length(myMat)

# Sum of all elements of a matrix
sum(myMat)

# Mean of all elements of a matrix
mean(x = myMat)

# Standard deviation of all elements of a matrix
sd(myMat)

# Extract all unique elements
unique(myMat)

Dealing with Data Frames

Assume myDf is a data frame:

# Create a data frame from vectors
data.frame(myVar1 = myVec1, myVar2 = myVec2, myVar3 = myVec3)

# Write data frame to file
#	row.names: whether to write row names (TRUE by default)
#	col.names: whether to write column names (TRUE by default)
write.table(myDf, file = "path/to/file")

# Read data from file
#	header: whether to read header from file (FALSE by default)
read.table(file = "path/to/file")

#Import and read data from csv
read_csv("myDF.csv")

# Count the number of rows in a data frame
nrow(myDf)

# Count the number of columns in a data frame
ncol(myDf)

# Extract all unique elements
unique(myDf)

# Find the range of all values in the data frame
range(myDf)

# Take a glimpse of all the variables in a data frame
glimpse(myDF)

# Apply a function to more than one group in a data frame
tapply(myDF$var1, myDF$var2, myFunction)
       
# Group data frame by one or more variables
group_by(var1)

Dealing with Expressions

# Create an expression
myExpr <- expression(var1 ~ var2)

# Extract all variables and function names in an expression
all.names(myExpr)

# Extract all the unique variables in an expression
all.vars(myExpr)

Dealing with Strings

Assume myStr1, myStr2, myStr3 are all strings:

# Paste strings together with a space in between
paste(myStr1, myStr2, myStr3)

# Paste strings together with no spaces in between (like strcat() in Matlab)
paste(myStr1, myStr2, myStr3, sep = "")

# Paste strings together with a specific separator
paste(myStr1, myStr2, myStr3, sep = "_")

# Split string(s) into list(s) based on a regular expression pattern for the separator
strsplit(x, regexp)
str_split(x, regexp)

# TODO:
substr(x, start=n1, stop=n2)
grep(pattern,x, value=FALSE, ignore.case=FALSE, fixed=FALSE)
gsub(pattern, replacement, x, ignore.case=FALSE, fixed=FALSE)
gregexpr(pattern, text, ignore.case=FALSE, perl=FALSE,
fixed=FALSE)
paste(... , sep="", collapse=NULL)
sprintf(fmt, ...)
toupper/tolower(x)
nchar(x)
File paths
# Split a file path into parts and get the directory, base name and extension
fileParts <- fileparts(fullPath)
fileDir <- fileParts$pathstr
fileBase <- fileParts$name
fileExt <- fileParts$ext

# The path parts separater (either '\' for Windows or '/' for Unix)
filesep

# Join strings with filesep
fullPath <- fullfile(fileDir, fileName)
Tidyverse strings
# Unquote a string
!!myStr

# Quote a string
quo(myStr)

Dealing with Lists

# Create list from variables (do not have to be the same length)
list(name1 = value1, name2 = value2, name3 = value3)

# Query all the names in a list
names(myList)

Randomization

# Generate 10 random numbers from the Normal(0, 1) distribution
rnorm(10)

# Generate 10 random numbers from the Normal(1.2, 3.4) distribution
rnorm(10, mean = 1.2, sd = 3.4)

# Generate 100 random numbers from the Uniform(0, 1) distribution
runif(100)

Date-Times

# Convert character strings to POSIXlt
strptime(c("20170225230000", "20170226010000"), format = "%Y%m%d%H%M%S")

Wrapper functions

# Apply the same function to a list and return a list
lapply(myList, myFunction)

# Apply the same function to a vector and return a vector
vapply(myVector, myFunction)

# Apply the same function to a list and return a list or a vector
sapply(myVector, myFunction)

# Apply the same function to multiple lists
mapply(myFunction, myList1, myList2)

# Apply the same function to a variable filtered by factors
tapply(myDf$var, myDf$factor, myFunction)

Tibbles (tidyverse)

Basics
# Pipe a tibble to a dplyr function
myDf %>% func()

# Select rows by row number
myDf %>% filter(row_number() = 3L)

# Select rows by condition
myDf %>% filter(var1 == val)

# Select columns by variable names
myDf %>% select(var1, var2)

# Select columns by omission
myDf %>% select(-colNumToOmit)

# Omit columns by variable name patterns
myDf %>% select(-starts_with("Var"))

# Select rows by position
myDF %>% slice(rowNumber)

# Add columns
myDf %>% mutate(newVar = func(oldVar))

# Make variable a categorical variable
myDf %>% mutate(newVar = factor(oldVar))

# Make variable a categorical variable
myDf %>% mutate(newVar = str_replace(oldVar, oldStr, newStr))

# Make variables strings
lapply(myDf, as.character)

# Join two tibbles together by variables, only including rows with data from the first tibble
myDf %>% left_join(myDf2, by = c("Var1", "Var2"))

# Join two tibbles together by variables, only including rows with data from the second tibble
myDf %>% right_join(myDf2, by = c("Var1", "Var2"))

# Join two tibbles together by variables, only including rows with data from both tibbles
myDf %>% inner_join(myDf2, by = c("Var1", "Var2"))

# Join two tibbles together by variables, retaining all data
myDf %>% full_join(myDf2, by = c("Var1", "Var2"))
Tidying data
# Gather multiple columns into 2 columns: 
#	The first "key" column has the original column name as value
#	The second "value" column has retains the original value
myDf %>% gather(key = "keyVar", value = "valueVar", columns)

# Spread rows into columns TODO
myDf %>% spread

# Unite several columns into one column
myDf %>% unite("newVar", c("oldVar1", "oldVar2", "oldVar3"), sep = "_")

# Separate one column into several columns
myDf %>% separate("oldVar", c("newVar1", "newVar2", "newVar3"), sep = "_")

Plotting

Basic plotting
# Generate a color from RGB values
rgb(red, green, blue, alpha)

# Query all graphical parameters:
#	col: color
#	type: line style
#	lwd: line width
#   pch: plotting character (symbol)
#   cex: magnification
#   ylim: y-axis limits
?par

# Plot a vector vec
plot(vec)

# Plot connected line segments
lines(vec)

# Plot points
points(vec)

# Plot a histogram
hist(vec)
ggplot2
# Build a canvas with the data
myPlot <- ggplot(data = myDf, aes(x = varX, y = varY))

# Add a violin plot
myPlot + geom_violin()

# Add a jitter plot
myPlot + geom_jitter()

# Add a combined violin and jitter plot
myPlot + geom_violin() + geom_jitter()

# Add a beeswarm plot
myPlot + geom_beeswarm()

# Save the plot
ggsave("plotname.png")
ggpubr
# Add a paired box plot by id
ggpaired(myDF, x = varX, y = varY, id = pairedVar,..., facet.by = varZ)

Statistics

Linear Model & ANOVA
# Generate a linear model
model <- lm(formula = var1 ~ var2, data = myDf)

# Get the summary statistics of the linear model
summary(model)
Statistical Tests
# Perform a Kruskal-Wallis Test
kruskal.test(formula = var1 ~ var2, data = myDf)

# Perform a two-sample Kolmogorov-Smirnov Test
ks.test(x = myDf$var1, y = myDf$var2)

# Perform the Shapiro-Wilk test for normality
shapiro.test(myDF)

# Perform the Welch Two Sample t-test
t.test(x ~ y, data = myDF)

# Perform the Wilcoxon rank-sum test (Mann-Whitney U Test)
wilcox.test(x ~ y, data = myDF)

# Perform the Two Way Anova Interaction Effect test
aov(varX ~ varY + varZ, data = myDF)

# Perform F-test for variance
var.test(varX ~ varY, myDF, alternative = "two.sided")