### META INFO
# Contacts:
# davide.eynard@gmail.com
# http://davide.eynard.it
### Installation
# Download binaries from http://cran.r-project.org/ or https://cran.rstudio.com
# or, on linux, just get the base packages (MASS can also be installed that way)
# MAC OS : if you have problems with locales, check this out:
# https://cran.r-project.org/bin/macosx/RMacOSX-FAQ.html#Internationalization-of-the-R_002eapp
# from the terminal: defaults write org.R-project.R force.LANG en_US.UTF-8
# from R itself: system("defaults write org.R-project.R force.LANG en_US.UTF-8")
# TODO: also get RStudio: https://www.rstudio.com/, we are going to use it
# during our labs!
# NOTE: the system is case sensitive!
# NOTE: some objects only exist as nested, i.e. month.name exists but month will
# tell "object month not found"
# => use tab completion as an aid to entering function names!!!
### Install new packages:
# try with ISLR and MASS
install.packages() # opens package installation window in R - does not work in RStudio
install.packages('packagename')
# ... check if the package has been installed correctly (also used to load the package)
library() # show list of libraries
library('package name') # load a library module
search() # shows the libraries loaded in memory
ls("package:packagename") # show variables in package
# also see http://r-pkgs.had.co.nz/namespace.html
# look for the paths where libraries are stored
.libPaths()
# Look for other datasets/code related to the book:
# http://www-bcf.usc.edu/~gareth/ISL/data.html
### Looking for help
help
?plot
help.search("regression") # equivalent to ??regression
help(package="packagename") # gets info about package
example(seq)
### Set up the working directory
setwd('~/Desktop')
### Options
pi
defaults = options()
options(digits=15)
pi
options(defaults)
pi
### VARIABLE ASSIGNMENTS:
a <- 'whatever'
a = 'whatever'
a = 5
a = c(1,2,3)
a = c(1:10)
# ==> the seq() function
# actually, 1:10 is a shorthand for seq(1,10)
# we can just write a = 1:10
x = seq(-pi,pi,length=100)
x = seq(-pi,pi,by=.1)
x = seq(-pi,pi,along=y)
# rep
x = rep(1:3, each=3)
x = rep(1:3, times=2)
# unique
unique(x)
### managing variables
ls()
ls(all.names=TRUE)
rm(name)
rm(list=ls()) # removes all
### operations on vectors
# note that operations are performed element wise
### CORRELATION example:
# importance of real random data: https://www.google.it/search?q=random+numbers+book
# runif takes random numbers from a uniform distribution
runif(10,0,10)
round(runif(10,0,10))
# sample allows to have a permutation of a given sequence
x = 1:10
sample(x, size=10, replace=FALSE)
# rnorm initializes randomly, with mean=0 and sd=1 if not specified differently
set.seed(123) # set.seed sets the random seed, if you use the same you will end up with identical random sequences
x = rnorm(50)
y = x + rnorm(50,mean=50,sd=.1)
cor(x,y)
# note that cor(x,y) = cov(x,y)/(sd(x)*sd(y))
# cov(x,y) = mean((x-mean(x))*(y-mean(y)))
# sd(x) = sqrt(mean((x-mean(x))^2))
# why is sd calculated dividing by n-1 instead of n?
# see http://mathworld.wolfram.com/StandardDeviation.html
# and https://en.wikipedia.org/wiki/Bessel%27s_correction
# tell something about variance / sd - use the following as an example
# of operations on vectors:
s
sqrt(sum((a-mean(a))^2)/(length(a)-1))
sd(a)
# var(x) is defined as corrected_mean((x-mean(x))^2)
### MATRICES:
?matrix
x = matrix(c(1,2,3,4),2,2)
a = matrix(c(1:4),2,2,byrow=TRUE)
b = matrix(c(5:8),2,2,byrow=TRUE)
a * b
a%*%b
# [,1] [,2]
# [1,] 19 22
# [2,] 43 50
v = c(1,2)
v %*% a
# [,1] [,2]
# [1,] 7 10
# IS V A VECTOR? TRY WITH THE COMMAND "dim()"
dim(a)
dim(v)
# NOTE that if you don't treat v as a matrix you will be able to
# multiply it both from left and from right, with no errors (the
# interpreter will try to flip it the way it makes sense
a %*% v
# [,1]
# [1,] 5
# [2,] 11
# NOTE that the vector might be changed into vertical/horizontal
# to make the two arguments conformable.
as.matrix(v) # vectors (horizontal) become vertical by default
# [,1]
# [1,] 1
# [2,] 2
as.matrix(v) %*% a
# error
t(as.matrix(v)) %*% a
# [,1] [,2]
# [1,] 7 10
### REFERENCE ELEMENTS OF A VECTOR/MATRIX
x = c(4, 7, 2, 10, 1, 0)
x[2]
x[1:3]
x[c(1,3,4)]
x[-3]
x[-c(2,3)]
# show these also work with matrices
a = matrix(1:100, 10 ,10)
a[1,5]
# [1] 41
a[c(1,3,4),c(2,4,6)]
# [,1] [,2] [,3]
# [1,] 11 31 51
# [2,] 13 33 53
# [3,] 14 34 54
### functions that return indices
which
which.max
which.min
match
which(a==20)
which(a==100)
which.max(a)
which.min(a)
a = matrix(sample(1:10,100,replace=TRUE),10,10)
which(a==10)
match(10,a)
### check for wrong stuff
# show 0/0, Inf/Inf, whatever/0, NA, Nan, Inf
a = rnorm(10)
b = rnorm(10)
a[3] = 0
a[5] = Inf
b[3] = 0
b[5] = Inf
b[7] = 0
b[9] = NA
c = a/b
is.infinite(c)
is.nan(c)
is.na(c)
any(is.na(c))
which(is.na(c))
na.omit(c)
### load data
Auto = read.table('Auto.data')
fix(Auto)
Auto = read.table('Auto.data',header = T, na.strings="?")
Auto = na.omit(Auto)
dim(Auto)
names(Auto)
### additional graphical and numerical summaries
plot(cylinders, mpg)
plot(Auto$cylinders, Auto$mpg)
attach(Auto)
plot(cylinders, mpg)
cylinders=as.factor(cylinders)
plot(cylinders, mpg)
plot(cylinders, mpg, col="red")
# make width proportional to the sqrt of the sample sizes
plot(cylinders, mpg, col="red", varwidth=T)
plot(cylinders, mpg, col="red", varwidth=T, horizontal=T)
hist(mpg)
hist(mpg,col=2)
hist(mpg,col=2,breaks=15)
pairs(Auto)
pairs(~ mpg + displacement + horsepower + weight + acceleration, Auto)
plot(horsepower,mpg)
identify(horsepower,mpg,name)
summary(Auto)
summary(mpg)
### SAVING TO FILE:
x = rnorm(50)
y = x + rnorm(50,mean=50,sd=.5)
pdf('~/Desktop/test.pdf')
plot(x,y,main='main',xlab='hours of study',ylab='final grade')
dev.off()
### 3D PLOTTING:
x = seq(-pi,pi,length=50)
y = x
f = outer(x,y,function(x,y)cos(y)/(1+x^2))
contour(x,y,f)
contour(x,y,f,nlevels=45)
fa = (f-t(f))/2
contour(x,y,fa,nlevels=45)
image(x,y,fa)
persp(x,y,fa)