### META INFO
# Contacts:
# davide.eynard@gmail.com
# http://davide.eynard.it


### Installation
# Download binaries from http://cran.r-project.org/ or https://cran.rstudio.com
# or, on linux, just get the base packages (MASS can also be installed that way)

# MAC OS : if you have problems with locales, check this out:
# https://cran.r-project.org/bin/macosx/RMacOSX-FAQ.html#Internationalization-of-the-R_002eapp
# from the terminal: defaults write org.R-project.R force.LANG en_US.UTF-8
# from R itself: system("defaults write org.R-project.R force.LANG en_US.UTF-8")

# TODO: also get RStudio: https://www.rstudio.com/, we are going to use it
#       during our labs!

# NOTE: the system is case sensitive!
# NOTE: some objects only exist as nested, i.e. month.name exists but month will
#       tell "object month not found"
#       => use tab completion as an aid to entering function names!!!


### Install new packages:
# try with ISLR and MASS
install.packages() # opens package installation window in R - does not work in RStudio
install.packages('packagename')

# ... check if the package has been installed correctly (also used to load the package)
library()
library('packagename')
search()
ls("package:packagename")
# also see http://r-pkgs.had.co.nz/namespace.html

# look for the paths where libraries are stored
.libPaths()

# Look for other datasets/code related to the book:
# http://www-bcf.usc.edu/~gareth/ISL/data.html


### Looking for help
?plot
help.search("regression")
help(package="packagename")
example(seq)

### Set up the working directory
setwd('~/Desktop')

### Options
pi
defaults = options()
options(digits=15)
pi
options(defaults)
pi

### VARIABLE ASSIGNMENTS:
a <- 'whatever'
a = 'whatever'
a = 5
a = c(1,2,3)
a = c(1:10)

# ==> the seq() function
# actually, 1:10 is a shorthand for seq(1,10)
# we can just write a = 1:10
x = seq(-pi,pi,length=100)
x = seq(-pi,pi,by=.1)
x = seq(-pi,pi,along=y)

# rep
x = rep(1:3, each=3)
x = rep(1:3, times=2)

# unique
unique(x)

### managing variables
ls()
ls(all.names=TRUE)
rm(name)
rm(list=ls()) # removes all


### operations on vectors
# note that operations are performed element wise

### CORRELATION example:
# rnorm initializes randomly, with mean=0 and sd=1 if not specified differently
x = rnorm(50)
y = x + rnorm(50,mean=50,sd=.1)
cor(x,y)
# note that cor(x,y) = cov(x,y)/(sd(x)*sd(y))
# cov(x,y) = mean((x-mean(x))*(y-mean(y)))
# sd(x) = sqrt(mean((x-mean(x))^2))

# why is sd calculated dividing by n-1 instead of n?
# see http://mathworld.wolfram.com/StandardDeviation.html
# and https://en.wikipedia.org/wiki/Bessel%27s_correction
# tell something about variance / sd - use the following as an example
# of operations on vectors:
set.seed(123)
a = rnorm(10)
sqrt(sum((a-mean(a))^2)/(length(a)-1))
sd(a)
# var(x) is defined as corrected_mean((x-mean(x))^2)


### MATRICES:
?matrix
x = matrix(c(1,2,3,4),2,2)
a = matrix(c(1:4),2,2,byrow=TRUE)
b = matrix(c(5:8),2,2,byrow=TRUE)
a * b
a%*%b

#      [,1] [,2]
# [1,]   19   22
# [2,]   43   50

v = c(1,2)
v %*% a

#      [,1] [,2]
# [1,]    7   10

# NOTE that if you don't treat v as a matrix you will be able to
# multiply it both from left and from right, with no errors (the
# interpreter will try to flip it the way it makes sense

a %*% v

#      [,1]
# [1,]    5
# [2,]   11

# NOTE that the vector might be changed into vertical/horizontal
# to make the two arguments conformable.

as.matrix(v) # vectors (horizontal) become vertical by default

#      [,1]
# [1,]    1
# [2,]    2

as.matrix(v) %*% a
# error

t(as.matrix(v)) %*% a
#      [,1] [,2]
# [1,]    7   10


### SAVING TO FILE:
x = rnorm(50)
y = x + rnorm(50,mean=50,sd=.5)
pdf('~/Desktop/test.pdf')
plot(x,y,main='main',xlab='hours of study',ylab='final grade')
dev.off()

### 3D PLOTTING:
x = seq(-pi,pi,length=50)
y = x
f = outer(x,y,function(x,y)cos(y)/(1+x^2))
contour(x,y,f)
contour(x,y,f,nlevels=45)
fa = (f-t(f))/2
contour(x,y,fa,nlevels=45)
image(x,y,fa)
persp(x,y,fa)

### REFERENCE ELEMENTS OF A VECTOR/MATRIX
x = c(4, 7, 2, 10, 1, 0)

x[2]
x[1:3]
x[c(1,3,4)]
x[-3]
x[-c(2,3)]

# show these also work with matrices
a = matrix(1:100, 10 ,10)
a[1,5]

# [1] 41

a[c(1,3,4),c(2,4,6)]

#      [,1] [,2] [,3]
# [1,]   11   31   51
# [2,]   13   33   53
# [3,]   14   34   54

### functions that return indices
which
which.max
which.min
match

### check for wrong stuff
# show 0/0, Inf/Inf, whatever/0, NA, Nan, Inf
a = rnorm(10)
b = rnorm(10)
a[3] = 0
a[5] = Inf
b[3] = 0
b[5] = Inf
b[7] = 0
b[9] = NA
c = a/b

is.infinite(c)
is.nan(c)
is.na(c)
any(is.na(c))
which(is.na(c))
na.omit(c)

### load data
Auto = read.table('Auto.data')
fix(Auto)

Auto = read.table('Auto.data',header = T, na.strings="?")
Auto = na.omit(Auto)
dim(Auto)
names(Auto)


### additional graphical and numerical summaries
plot(cylinders, mpg)
plot(Auto$cylinders, Auto$mpg)
attach(Auto)
plot(cylinders, mpg)
cylinders=as.factor(cylinders)
plot(cylinders, mpg)
plot(cylinders, mpg, col="red")
# make width proportional to the sqrt of the sample sizes
plot(cylinders, mpg, col="red", varwidth=T)
plot(cylinders, mpg, col="red", varwidth=T, horizontal=T)
hist(mpg)
hist(mpg,col=2)
hist(mpg,col=2,breaks=15)
pairs(Auto)
pairs(~ mpg + displacement + horsepower + weight + acceleration, Auto)
plot(horsepower,mpg)
identify(horsepower,mpg,name)
summary(Auto)
summary(mpg)