Author: Sonali Arora (sarora@fredhutch.org)
Date: 20-22 July, 2015
The material in this course requires R version 3.2.1 and Bioconductor version 3.2
dir
, list.files
read.table
, scan
c
, factor
, data.frame
, matrix
summary
, table
, xtabs
plot
match
, %in%
, which
split
, cut
strsplit
, grep
, sub
lapply
, sapply
, mapply
t.test
, lm
, anova
dist
, hclust
biocLite
, install.packages
traceback
, debug
, browser
mean(1:10)
## [1] 5.5
rnorm(1:10)
## [1] -0.33290892 -1.03732032 0.00358368 -0.45695476 1.00505058 0.67227284 -0.90495556 -1.45142584
## [9] 0.22424728 -0.20679577
summary(rnorm(1:10))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.9524 -0.4189 0.4298 0.3124 0.6072 2.2260
data(iris)
# find those rows where petal.width is exactly 0.2
iris[iris$Petal.Width==0.2,]
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
## 15 5.8 4.0 1.2 0.2 setosa
## 21 5.4 3.4 1.7 0.2 setosa
## 23 4.6 3.6 1.0 0.2 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 26 5.0 3.0 1.6 0.2 setosa
## 28 5.2 3.5 1.5 0.2 setosa
## 29 5.2 3.4 1.4 0.2 setosa
## 30 4.7 3.2 1.6 0.2 setosa
## 31 4.8 3.1 1.6 0.2 setosa
## 34 5.5 4.2 1.4 0.2 setosa
## 35 4.9 3.1 1.5 0.2 setosa
## 36 5.0 3.2 1.2 0.2 setosa
## 37 5.5 3.5 1.3 0.2 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 40 5.1 3.4 1.5 0.2 setosa
## 43 4.4 3.2 1.3 0.2 setosa
## 47 5.1 3.8 1.6 0.2 setosa
## 48 4.6 3.2 1.4 0.2 setosa
## 49 5.3 3.7 1.5 0.2 setosa
## 50 5.0 3.3 1.4 0.2 setosa
# find those rows where sepal.length is less than 4.5
iris[iris$Sepal.Length < 4.5,]
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 9 4.4 2.9 1.4 0.2 setosa
## 14 4.3 3.0 1.1 0.1 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 43 4.4 3.2 1.3 0.2 setosa
# find all rows belonging to setosa
setosa_iris = iris[iris$Species=="setosa",]
dim(setosa_iris)
## [1] 50 5
head(setosa_iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
# drop the column containing characters i.e., Species
iris <- iris[,!( names(iris) %in% "Species")]
dim(iris)
## [1] 150 4
# find the mean of the first 4 numerical columns
lapply(iris, mean) # simpler: colMeans(iris)
## $Sepal.Length
## [1] 5.843333
##
## $Sepal.Width
## [1] 3.057333
##
## $Petal.Length
## [1] 3.758
##
## $Petal.Width
## [1] 1.199333
# simplify the result
sapply(iris, mean)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 5.843333 3.057333 3.758000 1.199333
# find the mean for each row.
apply(iris, 1 , mean) #simpler : rowMeans(iris)
## [1] 2.550 2.375 2.350 2.350 2.550 2.850 2.425 2.525 2.225 2.400 2.700 2.500 2.325 2.125 2.800
## [16] 3.000 2.750 2.575 2.875 2.675 2.675 2.675 2.350 2.650 2.575 2.450 2.600 2.600 2.550 2.425
## [31] 2.425 2.675 2.725 2.825 2.425 2.400 2.625 2.500 2.225 2.550 2.525 2.100 2.275 2.675 2.800
## [46] 2.375 2.675 2.350 2.675 2.475 4.075 3.900 4.100 3.275 3.850 3.575 3.975 2.900 3.850 3.300
## [61] 2.875 3.650 3.300 3.775 3.350 3.900 3.650 3.400 3.600 3.275 3.925 3.550 3.800 3.700 3.725
## [76] 3.850 3.950 4.100 3.725 3.200 3.200 3.150 3.400 3.850 3.600 3.875 4.000 3.575 3.500 3.325
## [91] 3.425 3.775 3.400 2.900 3.450 3.525 3.525 3.675 2.925 3.475 4.525 3.875 4.525 4.150 4.375
## [106] 4.825 3.400 4.575 4.200 4.850 4.200 4.075 4.350 3.800 4.025 4.300 4.200 5.100 4.875 3.675
## [121] 4.525 3.825 4.800 3.925 4.450 4.550 3.900 3.950 4.225 4.400 4.550 5.025 4.250 3.925 3.925
## [136] 4.775 4.425 4.200 3.900 4.375 4.450 4.350 3.875 4.550 4.550 4.300 3.925 4.175 4.325 3.950
# define a vector
x <- rnorm(1000)
# vectorized calculation
y <- x + rnorm(1000, sd=.8)
# object construction
df <- data.frame(x=x, y=y)
# linear model
fit <- lm(y ~ x, df)
par(mfrow=c(1,2))
plot(y ~ x, df, cex.lab=2)
abline(fit, col="red", lwd=2)
library(ggplot2)
ggplot(df, aes(x, y)) +
geom_point() +
stat_smooth(method="lm")
sessionInfo()
sessionInfo()
## R version 3.2.1 (2015-06-18)
## Platform: x86_64-unknown-linux-gnu (64-bit)
## Running under: Ubuntu 14.04.2 LTS
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8
## [4] LC_COLLATE=C LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_1.0.1 BiocStyle_1.7.4
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.11.6 codetools_0.2-14 digest_0.6.8 MASS_7.3-43 grid_3.2.1
## [6] plyr_1.8.3 gtable_0.1.2 formatR_1.2 magrittr_1.5 scales_0.2.5
## [11] evaluate_0.7 stringi_0.5-5 reshape2_1.4.1 rmarkdown_0.7 labeling_0.3
## [16] proto_0.3-10 tools_3.2.1 stringr_1.0.0 munsell_0.4.2 yaml_2.1.13
## [21] colorspace_1.2-6 htmltools_0.2.6 knitr_1.10.5