## ----knitr, echo=FALSE, results="hide"-----------------------------------
library("knitr")
opts_chunk$set(tidy=FALSE,dev="png",fig.show="hide",
               fig.width=4,fig.height=4.5,
               message=FALSE)


## ----style, eval=TRUE, echo=FALSE, results="asis"------------------------
BiocStyle::latex()


## ----loadDESeq2, echo=FALSE----------------------------------------------
library("DESeq2")


## ----options, results="hide", echo=FALSE---------------------------------
options(digits=3, width=80, prompt=" ", continue=" ")


## ----quick, eval=FALSE---------------------------------------------------
## dds <- DESeqDataSet(se = se, design = ~ condition)
## dds <- DESeq(dds)
## res <- results(dds)


## ----loadSumExp,results="hide",cache=TRUE--------------------------------
library("parathyroidSE")
data("parathyroidGenesSE")
se <- parathyroidGenesSE
colnames(se) <- se$run


## ----sumExpInput, cache=TRUE---------------------------------------------
library("DESeq2")
ddsPara <- DESeqDataSet(se = se, design = ~ patient + treatment)
ddsPara$treatment <- factor(ddsPara$treatment,
                            levels=c("Control","DPN","OHT"))
ddsPara


## ----loadPasilla---------------------------------------------------------
library("pasilla")
library("Biobase")
data("pasillaGenes")
countData <- counts(pasillaGenes)
colData <- pData(pasillaGenes)[,c("condition","type")]


## ----matrixInput---------------------------------------------------------
dds <- DESeqDataSetFromMatrix(countData = countData,
                              colData = colData,
                              design = ~ condition)
dds$condition <- factor(dds$condition,
                        levels=c("untreated","treated"))
dds


## ----htseqDirI, eval=FALSE-----------------------------------------------
## directory <- "/path/to/your/files/"


## ----htseqDirII----------------------------------------------------------
directory <- system.file("extdata", package="pasilla", mustWork=TRUE)


## ----htseqInput, cache=TRUE----------------------------------------------
sampleFiles <- grep("treated",list.files(directory),value=TRUE)
sampleCondition <- sub("(.*treated).*","\\1",sampleFiles)
sampleTable <- data.frame(sampleName = sampleFiles,
                          fileName = sampleFiles,
                          condition = sampleCondition)
ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
                                       directory = directory,
                                       design= ~ condition)
ddsHTSeq$condition <- factor(ddsHTSeq$condition,
                             levels=c("untreated","treated"))
ddsHTSeq


## ----relevel-------------------------------------------------------------
dds$condition <- relevel(dds$condition, "untreated")


## ----droplevels----------------------------------------------------------
dds$condition <- droplevels(dds$condition)


## ----deseq,cache=TRUE----------------------------------------------------
dds <- DESeq(dds)
res <- results(dds)
resOrdered <- res[order(res$padj),]
head(resOrdered)


## ----deseqNoPrior,cache=TRUE,echo=FALSE----------------------------------
# this chunk is used to demonstrate the shrinkage
# of log fold changes 
# note: betaPrior can also be used as an argument to DESeq()
ddsNoPrior <- nbinomWaldTest(dds,betaPrior=FALSE)
resNoPrior <- results(ddsNoPrior)


## ----MA, fig.width=4.5, fig.height=4.5-----------------------------------
plotMA(res, main="DESeq2", ylim=c(-2,2))


## ----MANoPrior, echo=FALSE, fig.width=4.5, fig.height=4.5----------------
plotMA(resNoPrior, 
       main=expression(unshrunken~log[2]~fold~changes), 
       ylim=c(-2,2))


## ----metadata------------------------------------------------------------
mcols(res)$description


## ----export, eval=FALSE--------------------------------------------------
## write.csv(as.data.frame(resOrdered),
##           file="condition_treated_results.csv")


## ----multifactor---------------------------------------------------------
colData(dds)


## ----copyMultifactor-----------------------------------------------------
ddsMF <- dds


## ----subsetMF, echo=FALSE------------------------------------------------
idx <- 1:1000
ddsMF <- ddsMF[idx,]


## ----replaceDesign,cache=TRUE--------------------------------------------
design(ddsMF) <- formula(~ type + condition)
ddsMF <- DESeq(ddsMF)


## ----multiResults--------------------------------------------------------
resMF <- results(ddsMF)
head(resMF)


## ----multiTypeResults----------------------------------------------------
resMFType <- results(ddsMF, contrast=c("type","single-read","paired-end"))
head(resMFType)


## ----echo=FALSE----------------------------------------------------------
ddsblind <- dds
design(ddsblind) <- ~ 1
ddsblind <- estimateDispersions(ddsblind)
# we do blind estimation once, then re-use these for both:
rld <- rlog(ddsblind, blind=FALSE)
vsd <- varianceStabilizingTransformation(ddsblind, blind=FALSE)
rlogMat <- assay(rld)
vstMat <- assay(vsd)


## ----defvsd,eval=FALSE---------------------------------------------------
## rld <- rlog(dds)
## vsd <- varianceStabilizingTransformation(dds)
## rlogMat <- assay(rld)
## vstMat <- assay(vsd)


## ----vsd1, echo=FALSE, fig.width=4.5, fig.height=4.5---------------------
px     <- counts(dds)[,1] / sizeFactors(dds)[1]
ord    <- order(px)
ord    <- ord[px[ord] < 150]
ord    <- ord[seq(1, length(ord), length=50)]
last   <- ord[length(ord)]
vstcol <- c("blue", "black")
matplot(px[ord],
        cbind(assay(vsd)[, 1], log2(px))[ord, ],
        type="l", lty=1, col=vstcol, xlab="n", ylab="f(n)")
legend("bottomright",
       legend = c(
        expression("variance stabilizing transformation"),
        expression(log[2](n/s[1]))),
       fill=vstcol)


## ----vsd2, fig.width=8, fig.height=3, results="hide"---------------------
library("vsn")
par(mfrow=c(1,3))
notAllZero <- (rowSums(counts(dds))>0)
meanSdPlot(log2(counts(dds,normalized=TRUE)[notAllZero,] + 1),
           ylim = c(0,2.5))
meanSdPlot(assay(rld[notAllZero,]), ylim = c(0,2.5))
meanSdPlot(assay(vsd[notAllZero,]), ylim = c(0,2.5))


## ----heatmap-------------------------------------------------------------
library("RColorBrewer")
library("gplots")
select <- order(rowMeans(counts(dds,normalized=TRUE)),decreasing=TRUE)[1:30]
hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)


## ----figHeatmap2a, fig.width=7, fig.height=10----------------------------
heatmap.2(counts(dds,normalized=TRUE)[select,], col = hmcol,
          Rowv = FALSE, Colv = FALSE, scale="none",
          dendrogram="none", trace="none", margin=c(10,6))


## ----figHeatmap2b, fig.width=7, fig.height=10----------------------------
heatmap.2(assay(rld)[select,], col = hmcol,
          Rowv = FALSE, Colv = FALSE, scale="none",
          dendrogram="none", trace="none", margin=c(10, 6))


## ----figHeatmap2c, fig.width=7, fig.height=10----------------------------
heatmap.2(assay(vsd)[select,], col = hmcol,
          Rowv = FALSE, Colv = FALSE, scale="none",
          dendrogram="none", trace="none", margin=c(10, 6))


## ----sampleClust---------------------------------------------------------
distsRL <- dist(t(assay(rld)))


## ----figHeatmapSamples, fig.width=7, fig.height=7------------------------
mat <- as.matrix(distsRL)
rownames(mat) <- colnames(mat) <- with(colData(dds),
                                       paste(condition, type, sep=" : "))
heatmap.2(mat, trace="none", col = rev(hmcol), margin=c(13, 13))


## ----figPCA, fig.width=5, fig.height=5-----------------------------------
print(plotPCA(rld, intgroup=c("condition", "type")))


## ----WaldTest, eval=FALSE------------------------------------------------
## dds <- estimateSizeFactors(dds)
## dds <- estimateDispersions(dds)
## dds <- nbinomWaldTest(dds)


## ----paraSetup-----------------------------------------------------------
ddsCtrst <- ddsPara[, ddsPara$time == "48h"]
as.data.frame(colData(ddsCtrst)[,c("patient","treatment")])
design(ddsCtrst) <- ~ patient + treatment


## ----paraSubset, echo=FALSE----------------------------------------------
idx <- which(rowSums(counts(ddsPara)) > 0)[1:1000]
ddsCtrst <- ddsPara[idx,]


## ----paraDE,cache=TRUE---------------------------------------------------
ddsCtrst <- DESeq(ddsCtrst)


## ----contrastsChar-------------------------------------------------------
resCtrst <- results(ddsCtrst, contrast=c("treatment","OHT","DPN"))
head(resCtrst,2)


## ----contrastList--------------------------------------------------------
resultsNames(ddsCtrst)
resCtrst <- results(ddsCtrst, contrast=list("treatmentOHT","treatmentDPN"))
head(resCtrst,2)


## ----contrastsNum--------------------------------------------------------
resultsNames(ddsCtrst)
resCtrst <- results(ddsCtrst, contrast=c(0,0,0,0,0,0,-1,1))
head(resCtrst,2)


## ----interact------------------------------------------------------------
ddsX <- ddsPara
design(ddsX) <- ~ patient + treatment + patient:treatment


## ----interactSubset, echo=FALSE------------------------------------------
idx <- which(rowSums(counts(ddsX)) > 0)[1:1000]
ddsX <- ddsX[idx,]


## ----interactRun---------------------------------------------------------
ddsX <- DESeq(ddsX)
resultsNames(ddsX)


## ----interactRes---------------------------------------------------------
resX <- results(ddsX, contrast =
                list("patient4.treatmentOHT",
                     "patient4.treatmentControl"))
head(resX, 2)


## ----interact2x2---------------------------------------------------------
ddsXsub <- ddsX[,ddsX$treatment %in% c("Control","OHT") &
                ddsX$patient %in% c("1","2")]
ddsXsub$treatment <- droplevels(ddsXsub$treatment)
ddsXsub$patient <- droplevels(ddsXsub$patient)


## ----interact2x2results--------------------------------------------------
ddsXsub <- DESeq(ddsXsub)
resultsNames(ddsXsub)
resXsub <- results(ddsXsub, name="patient2.treatmentOHT")
head(resXsub, 2)


## ----replaceCooksOutlier-------------------------------------------------
ddsReplace <- replaceOutliers(dds, minReplicates=3)


## ----refitAfterReplace,cache=TRUE----------------------------------------
ddsReplace <- DESeq(ddsReplace)
tab <- table(initial = results(dds)$padj < .1,
             replace = results(ddsReplace)$padj < .1)
addmargins(tab)


## ----likelihoodRatioTest,cache=TRUE--------------------------------------
ddsLRT <- nbinomLRT(dds, reduced = ~ 1)
resLRT <- results(ddsLRT)
head(resLRT,2)
tab <- table(Wald=res$padj < .1, LRT=resLRT$padj < .1)
addmargins(tab)


## ----dispFit-------------------------------------------------------------
plotDispEsts(dds)


## ----dispFitLocalSub, echo=FALSE-----------------------------------------
idx <- which(rowSums(counts(dds)) > 0)[1:500]
ddsLocal <- estimateDispersions(dds[idx,], fitType="local")


## ----dispFitLocal, eval=FALSE--------------------------------------------
## ddsLocal <- estimateDispersions(dds, fitType="local")


## ----dispFitMeanSub, echo=FALSE------------------------------------------
ddsMean <- estimateDispersions(dds[idx,], fitType="mean")


## ----dispFitMean, eval=FALSE---------------------------------------------
## ddsMean <- estimateDispersions(dds, fitType="mean")


## ----dispFitCustomSub, echo=FALSE----------------------------------------
ddsMed <- estimateDispersionsGeneEst(dds[idx,])
useForMedian <- mcols(ddsMed)$dispGeneEst > 1e-7
medianDisp <- median(mcols(ddsMed)$dispGeneEst[useForMedian],na.rm=TRUE)
mcols(ddsMed)$dispFit <- medianDisp
ddsMed <- estimateDispersionsMAP(ddsMed)


## ----dispFitCustom, eval=FALSE-------------------------------------------
## ddsMed <- estimateDispersionsGeneEst(dds)
## useForMedian <- mcols(ddsMed)$dispGeneEst > 1e-7
## medianDisp <- median(mcols(ddsMed)$dispGeneEst[useForMedian],na.rm=TRUE)
## mcols(ddsMed)$dispFit <- medianDisp
## ddsMed <- estimateDispersionsMAP(ddsMed)


## ----filtByMean----------------------------------------------------------
attr(res,"filterThreshold")
plot(attr(res,"filterNumRej"),type="b",
     ylab="number of rejections")


## ----moreFilt------------------------------------------------------------
resNoFilt <- results(dds, independentFiltering=FALSE)
table(filtering=(res$padj < .1), noFiltering=(resNoFilt$padj < .1))
library(genefilter)
rv <- rowVars(counts(dds,normalized=TRUE))
resFiltByVar <- results(dds, filter=rv)
table(rowMean=(res$padj < .1), rowVar=(resFiltByVar$padj < .1))


## ----ddsNoPriorNotRun, eval=FALSE----------------------------------------
## ddsNoPrior <- DESeq(dds, betaPrior=FALSE)


## ----lfcThresh-----------------------------------------------------------
par(mfrow=c(2,2),mar=c(2,2,1,1))
yl <- c(-2.5,2.5)

resGA <- results(dds, lfcThreshold=.5, altHypothesis="greaterAbs")
resLA <- results(ddsNoPrior, lfcThreshold=.5, altHypothesis="lessAbs")
resG <- results(dds, lfcThreshold=.5, altHypothesis="greater")
resL <- results(dds, lfcThreshold=.5, altHypothesis="less")

plotMA(resGA, ylim=yl)
abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
plotMA(resLA, ylim=yl)
abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
plotMA(resG, ylim=yl)
abline(h=.5,col="dodgerblue",lwd=2)
plotMA(resL, ylim=yl)
abline(h=-.5,col="dodgerblue",lwd=2)


## ----mcols---------------------------------------------------------------
mcols(dds,use.names=TRUE)[1:4,1:4]
# here using substr() only for display purposes
substr(names(mcols(dds)),1,10) 
mcols(mcols(dds), use.names=TRUE)[1:4,]


## ----coef----------------------------------------------------------------
head(coef(dds))


## ----normFactors, eval=FALSE---------------------------------------------
## normFactors <- normFactors / mean(normFactors)
## normalizationFactors(dds) <- normFactors


## ----offsetTransform, eval=FALSE-----------------------------------------
## cqnOffset <- cqnObject$glm.offset
## cqnNormFactors <- exp(cqnOffset)
## EDASeqNormFactors <- exp(-1 * EDASeqOffset)


## ----cooksPlot-----------------------------------------------------------
W <- res$stat
maxCooks <- apply(assays(dds)[["cooks"]],1,max)
idx <- !is.na(W)
plot(rank(W[idx]), maxCooks[idx], xlab="rank of Wald statistic", 
     ylab="maximum Cook's distance per gene",
     ylim=c(0,5), cex=.4, col=rgb(0,0,0,.3))
m <- ncol(dds)
p <- 3
abline(h=qf(.99, p, m - p))


## ----indFilt-------------------------------------------------------------
plot(res$baseMean+1, -log10(res$pvalue),
     log="x", xlab="mean of normalized counts",
     ylab=expression(-log[10](pvalue)),
     ylim=c(0,30),
     cex=.4, col=rgb(0,0,0,.3))


## ----histindepfilt, fig.width=7, fig.height=5----------------------------
use <- res$baseMean > attr(res,"filterThreshold")
table(use)
h1 <- hist(res$pvalue[!use], breaks=0:50/50, plot=FALSE)
h2 <- hist(res$pvalue[use], breaks=0:50/50, plot=FALSE)
colori <- c(`do not pass`="khaki", `pass`="powderblue")

## ----fighistindepfilt----------------------------------------------------
barplot(height = rbind(h1$counts, h2$counts), beside = FALSE,
        col = colori, space = 0, main = "", ylab="frequency")
text(x = c(0, length(h1$counts)), y = 0, label = paste(c(0,1)),
     adj = c(0.5,1.7), xpd=NA)
legend("topright", fill=rev(colori), legend=rev(names(colori)))


## ----sortP, cache=TRUE---------------------------------------------------
resFilt <- res[use & !is.na(res$pvalue),]
orderInPlot <- order(resFilt$pvalue)
showInPlot <- (resFilt$pvalue[orderInPlot] <= 0.08)
alpha <- 0.1

## ----sortedP, fig.width=4.5, fig.height=4.5------------------------------
plot(seq(along=which(showInPlot)), resFilt$pvalue[orderInPlot][showInPlot],
     pch=".", xlab = expression(rank(p[i])), ylab=expression(p[i]))
abline(a=0, b=alpha/length(resFilt$pvalue), col="red3", lwd=2)


## ----doBH, echo=FALSE, results="hide"------------------------------------
whichBH <- which(resFilt$pvalue[orderInPlot] <= alpha*seq(along=resFilt$pvalue)/length(resFilt$pvalue))
## Test some assertions:
## - whichBH is a contiguous set of integers from 1 to length(whichBH)
## - the genes selected by this graphical method coincide with those
##   from p.adjust (i.e. padjFilt)
stopifnot(length(whichBH)>0,
          identical(whichBH, seq(along=whichBH)),
          resFilt$padj[orderInPlot][ whichBH] <= alpha,
          resFilt$padj[orderInPlot][-whichBH]  > alpha)


## ----SchwSpjot, echo=FALSE, results="hide"-------------------------------
j  <- round(length(resFilt$pvalue)*c(1, .66))
px <- (1-resFilt$pvalue[orderInPlot[j]])
py <- ((length(resFilt$pvalue)-1):0)[j]
slope <- diff(py)/diff(px)


## ----SchwederSpjotvoll, fig.width=4.5, fig.height=4.5--------------------
plot(1-resFilt$pvalue[orderInPlot],
     (length(resFilt$pvalue)-1):0, pch=".",
     xlab=expression(1-p[i]), ylab=expression(N(p[i])))
abline(a=0, slope, col="red3", lwd=2)


## ----sessInfo, results="asis", echo=FALSE--------------------------------
toLatex(sessionInfo())


## ----resetOptions, results="hide", echo=FALSE----------------------------
options(prompt="> ", continue="+ ")