## ----knitr, echo=FALSE, results="hide"----------------------------------- library("knitr") opts_chunk$set(tidy=FALSE,dev="png",fig.show="hide", fig.width=4,fig.height=4.5, message=FALSE) ## ----style, eval=TRUE, echo=FALSE, results="asis"------------------------ BiocStyle::latex() ## ----loadDESeq2, echo=FALSE---------------------------------------------- library("DESeq2") ## ----options, results="hide", echo=FALSE--------------------------------- options(digits=3, width=80, prompt=" ", continue=" ") ## ----quick, eval=FALSE--------------------------------------------------- ## dds <- DESeqDataSet(se = se, design = ~ condition) ## dds <- DESeq(dds) ## res <- results(dds) ## ----loadSumExp,results="hide",cache=TRUE-------------------------------- library("parathyroidSE") data("parathyroidGenesSE") se <- parathyroidGenesSE colnames(se) <- se$run ## ----sumExpInput, cache=TRUE--------------------------------------------- library("DESeq2") ddsPara <- DESeqDataSet(se = se, design = ~ patient + treatment) ddsPara$treatment <- factor(ddsPara$treatment, levels=c("Control","DPN","OHT")) ddsPara ## ----loadPasilla--------------------------------------------------------- library("pasilla") library("Biobase") data("pasillaGenes") countData <- counts(pasillaGenes) colData <- pData(pasillaGenes)[,c("condition","type")] ## ----matrixInput--------------------------------------------------------- dds <- DESeqDataSetFromMatrix(countData = countData, colData = colData, design = ~ condition) dds$condition <- factor(dds$condition, levels=c("untreated","treated")) dds ## ----htseqDirI, eval=FALSE----------------------------------------------- ## directory <- "/path/to/your/files/" ## ----htseqDirII---------------------------------------------------------- directory <- system.file("extdata", package="pasilla", mustWork=TRUE) ## ----htseqInput, cache=TRUE---------------------------------------------- sampleFiles <- grep("treated",list.files(directory),value=TRUE) sampleCondition <- sub("(.*treated).*","\\1",sampleFiles) sampleTable <- data.frame(sampleName = sampleFiles, fileName = sampleFiles, condition = sampleCondition) ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, directory = directory, design= ~ condition) ddsHTSeq$condition <- factor(ddsHTSeq$condition, levels=c("untreated","treated")) ddsHTSeq ## ----relevel------------------------------------------------------------- dds$condition <- relevel(dds$condition, "untreated") ## ----droplevels---------------------------------------------------------- dds$condition <- droplevels(dds$condition) ## ----deseq,cache=TRUE---------------------------------------------------- dds <- DESeq(dds) res <- results(dds) resOrdered <- res[order(res$padj),] head(resOrdered) ## ----deseqNoPrior,cache=TRUE,echo=FALSE---------------------------------- # this chunk is used to demonstrate the shrinkage # of log fold changes # note: betaPrior can also be used as an argument to DESeq() ddsNoPrior <- nbinomWaldTest(dds,betaPrior=FALSE) resNoPrior <- results(ddsNoPrior) ## ----MA, fig.width=4.5, fig.height=4.5----------------------------------- plotMA(res, main="DESeq2", ylim=c(-2,2)) ## ----MANoPrior, echo=FALSE, fig.width=4.5, fig.height=4.5---------------- plotMA(resNoPrior, main=expression(unshrunken~log[2]~fold~changes), ylim=c(-2,2)) ## ----metadata------------------------------------------------------------ mcols(res)$description ## ----export, eval=FALSE-------------------------------------------------- ## write.csv(as.data.frame(resOrdered), ## file="condition_treated_results.csv") ## ----multifactor--------------------------------------------------------- colData(dds) ## ----copyMultifactor----------------------------------------------------- ddsMF <- dds ## ----subsetMF, echo=FALSE------------------------------------------------ idx <- 1:1000 ddsMF <- ddsMF[idx,] ## ----replaceDesign,cache=TRUE-------------------------------------------- design(ddsMF) <- formula(~ type + condition) ddsMF <- DESeq(ddsMF) ## ----multiResults-------------------------------------------------------- resMF <- results(ddsMF) head(resMF) ## ----multiTypeResults---------------------------------------------------- resMFType <- results(ddsMF, contrast=c("type","single-read","paired-end")) head(resMFType) ## ----echo=FALSE---------------------------------------------------------- ddsblind <- dds design(ddsblind) <- ~ 1 ddsblind <- estimateDispersions(ddsblind) # we do blind estimation once, then re-use these for both: rld <- rlog(ddsblind, blind=FALSE) vsd <- varianceStabilizingTransformation(ddsblind, blind=FALSE) rlogMat <- assay(rld) vstMat <- assay(vsd) ## ----defvsd,eval=FALSE--------------------------------------------------- ## rld <- rlog(dds) ## vsd <- varianceStabilizingTransformation(dds) ## rlogMat <- assay(rld) ## vstMat <- assay(vsd) ## ----vsd1, echo=FALSE, fig.width=4.5, fig.height=4.5--------------------- px <- counts(dds)[,1] / sizeFactors(dds)[1] ord <- order(px) ord <- ord[px[ord] < 150] ord <- ord[seq(1, length(ord), length=50)] last <- ord[length(ord)] vstcol <- c("blue", "black") matplot(px[ord], cbind(assay(vsd)[, 1], log2(px))[ord, ], type="l", lty=1, col=vstcol, xlab="n", ylab="f(n)") legend("bottomright", legend = c( expression("variance stabilizing transformation"), expression(log[2](n/s[1]))), fill=vstcol) ## ----vsd2, fig.width=8, fig.height=3, results="hide"--------------------- library("vsn") par(mfrow=c(1,3)) notAllZero <- (rowSums(counts(dds))>0) meanSdPlot(log2(counts(dds,normalized=TRUE)[notAllZero,] + 1), ylim = c(0,2.5)) meanSdPlot(assay(rld[notAllZero,]), ylim = c(0,2.5)) meanSdPlot(assay(vsd[notAllZero,]), ylim = c(0,2.5)) ## ----heatmap------------------------------------------------------------- library("RColorBrewer") library("gplots") select <- order(rowMeans(counts(dds,normalized=TRUE)),decreasing=TRUE)[1:30] hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100) ## ----figHeatmap2a, fig.width=7, fig.height=10---------------------------- heatmap.2(counts(dds,normalized=TRUE)[select,], col = hmcol, Rowv = FALSE, Colv = FALSE, scale="none", dendrogram="none", trace="none", margin=c(10,6)) ## ----figHeatmap2b, fig.width=7, fig.height=10---------------------------- heatmap.2(assay(rld)[select,], col = hmcol, Rowv = FALSE, Colv = FALSE, scale="none", dendrogram="none", trace="none", margin=c(10, 6)) ## ----figHeatmap2c, fig.width=7, fig.height=10---------------------------- heatmap.2(assay(vsd)[select,], col = hmcol, Rowv = FALSE, Colv = FALSE, scale="none", dendrogram="none", trace="none", margin=c(10, 6)) ## ----sampleClust--------------------------------------------------------- distsRL <- dist(t(assay(rld))) ## ----figHeatmapSamples, fig.width=7, fig.height=7------------------------ mat <- as.matrix(distsRL) rownames(mat) <- colnames(mat) <- with(colData(dds), paste(condition, type, sep=" : ")) heatmap.2(mat, trace="none", col = rev(hmcol), margin=c(13, 13)) ## ----figPCA, fig.width=5, fig.height=5----------------------------------- print(plotPCA(rld, intgroup=c("condition", "type"))) ## ----WaldTest, eval=FALSE------------------------------------------------ ## dds <- estimateSizeFactors(dds) ## dds <- estimateDispersions(dds) ## dds <- nbinomWaldTest(dds) ## ----paraSetup----------------------------------------------------------- ddsCtrst <- ddsPara[, ddsPara$time == "48h"] as.data.frame(colData(ddsCtrst)[,c("patient","treatment")]) design(ddsCtrst) <- ~ patient + treatment ## ----paraSubset, echo=FALSE---------------------------------------------- idx <- which(rowSums(counts(ddsPara)) > 0)[1:1000] ddsCtrst <- ddsPara[idx,] ## ----paraDE,cache=TRUE--------------------------------------------------- ddsCtrst <- DESeq(ddsCtrst) ## ----contrastsChar------------------------------------------------------- resCtrst <- results(ddsCtrst, contrast=c("treatment","OHT","DPN")) head(resCtrst,2) ## ----contrastList-------------------------------------------------------- resultsNames(ddsCtrst) resCtrst <- results(ddsCtrst, contrast=list("treatmentOHT","treatmentDPN")) head(resCtrst,2) ## ----contrastsNum-------------------------------------------------------- resultsNames(ddsCtrst) resCtrst <- results(ddsCtrst, contrast=c(0,0,0,0,0,0,-1,1)) head(resCtrst,2) ## ----interact------------------------------------------------------------ ddsX <- ddsPara design(ddsX) <- ~ patient + treatment + patient:treatment ## ----interactSubset, echo=FALSE------------------------------------------ idx <- which(rowSums(counts(ddsX)) > 0)[1:1000] ddsX <- ddsX[idx,] ## ----interactRun--------------------------------------------------------- ddsX <- DESeq(ddsX) resultsNames(ddsX) ## ----interactRes--------------------------------------------------------- resX <- results(ddsX, contrast = list("patient4.treatmentOHT", "patient4.treatmentControl")) head(resX, 2) ## ----interact2x2--------------------------------------------------------- ddsXsub <- ddsX[,ddsX$treatment %in% c("Control","OHT") & ddsX$patient %in% c("1","2")] ddsXsub$treatment <- droplevels(ddsXsub$treatment) ddsXsub$patient <- droplevels(ddsXsub$patient) ## ----interact2x2results-------------------------------------------------- ddsXsub <- DESeq(ddsXsub) resultsNames(ddsXsub) resXsub <- results(ddsXsub, name="patient2.treatmentOHT") head(resXsub, 2) ## ----replaceCooksOutlier------------------------------------------------- ddsReplace <- replaceOutliers(dds, minReplicates=3) ## ----refitAfterReplace,cache=TRUE---------------------------------------- ddsReplace <- DESeq(ddsReplace) tab <- table(initial = results(dds)$padj < .1, replace = results(ddsReplace)$padj < .1) addmargins(tab) ## ----likelihoodRatioTest,cache=TRUE-------------------------------------- ddsLRT <- nbinomLRT(dds, reduced = ~ 1) resLRT <- results(ddsLRT) head(resLRT,2) tab <- table(Wald=res$padj < .1, LRT=resLRT$padj < .1) addmargins(tab) ## ----dispFit------------------------------------------------------------- plotDispEsts(dds) ## ----dispFitLocalSub, echo=FALSE----------------------------------------- idx <- which(rowSums(counts(dds)) > 0)[1:500] ddsLocal <- estimateDispersions(dds[idx,], fitType="local") ## ----dispFitLocal, eval=FALSE-------------------------------------------- ## ddsLocal <- estimateDispersions(dds, fitType="local") ## ----dispFitMeanSub, echo=FALSE------------------------------------------ ddsMean <- estimateDispersions(dds[idx,], fitType="mean") ## ----dispFitMean, eval=FALSE--------------------------------------------- ## ddsMean <- estimateDispersions(dds, fitType="mean") ## ----dispFitCustomSub, echo=FALSE---------------------------------------- ddsMed <- estimateDispersionsGeneEst(dds[idx,]) useForMedian <- mcols(ddsMed)$dispGeneEst > 1e-7 medianDisp <- median(mcols(ddsMed)$dispGeneEst[useForMedian],na.rm=TRUE) mcols(ddsMed)$dispFit <- medianDisp ddsMed <- estimateDispersionsMAP(ddsMed) ## ----dispFitCustom, eval=FALSE------------------------------------------- ## ddsMed <- estimateDispersionsGeneEst(dds) ## useForMedian <- mcols(ddsMed)$dispGeneEst > 1e-7 ## medianDisp <- median(mcols(ddsMed)$dispGeneEst[useForMedian],na.rm=TRUE) ## mcols(ddsMed)$dispFit <- medianDisp ## ddsMed <- estimateDispersionsMAP(ddsMed) ## ----filtByMean---------------------------------------------------------- attr(res,"filterThreshold") plot(attr(res,"filterNumRej"),type="b", ylab="number of rejections") ## ----moreFilt------------------------------------------------------------ resNoFilt <- results(dds, independentFiltering=FALSE) table(filtering=(res$padj < .1), noFiltering=(resNoFilt$padj < .1)) library(genefilter) rv <- rowVars(counts(dds,normalized=TRUE)) resFiltByVar <- results(dds, filter=rv) table(rowMean=(res$padj < .1), rowVar=(resFiltByVar$padj < .1)) ## ----ddsNoPriorNotRun, eval=FALSE---------------------------------------- ## ddsNoPrior <- DESeq(dds, betaPrior=FALSE) ## ----lfcThresh----------------------------------------------------------- par(mfrow=c(2,2),mar=c(2,2,1,1)) yl <- c(-2.5,2.5) resGA <- results(dds, lfcThreshold=.5, altHypothesis="greaterAbs") resLA <- results(ddsNoPrior, lfcThreshold=.5, altHypothesis="lessAbs") resG <- results(dds, lfcThreshold=.5, altHypothesis="greater") resL <- results(dds, lfcThreshold=.5, altHypothesis="less") plotMA(resGA, ylim=yl) abline(h=c(-.5,.5),col="dodgerblue",lwd=2) plotMA(resLA, ylim=yl) abline(h=c(-.5,.5),col="dodgerblue",lwd=2) plotMA(resG, ylim=yl) abline(h=.5,col="dodgerblue",lwd=2) plotMA(resL, ylim=yl) abline(h=-.5,col="dodgerblue",lwd=2) ## ----mcols--------------------------------------------------------------- mcols(dds,use.names=TRUE)[1:4,1:4] # here using substr() only for display purposes substr(names(mcols(dds)),1,10) mcols(mcols(dds), use.names=TRUE)[1:4,] ## ----coef---------------------------------------------------------------- head(coef(dds)) ## ----normFactors, eval=FALSE--------------------------------------------- ## normFactors <- normFactors / mean(normFactors) ## normalizationFactors(dds) <- normFactors ## ----offsetTransform, eval=FALSE----------------------------------------- ## cqnOffset <- cqnObject$glm.offset ## cqnNormFactors <- exp(cqnOffset) ## EDASeqNormFactors <- exp(-1 * EDASeqOffset) ## ----cooksPlot----------------------------------------------------------- W <- res$stat maxCooks <- apply(assays(dds)[["cooks"]],1,max) idx <- !is.na(W) plot(rank(W[idx]), maxCooks[idx], xlab="rank of Wald statistic", ylab="maximum Cook's distance per gene", ylim=c(0,5), cex=.4, col=rgb(0,0,0,.3)) m <- ncol(dds) p <- 3 abline(h=qf(.99, p, m - p)) ## ----indFilt------------------------------------------------------------- plot(res$baseMean+1, -log10(res$pvalue), log="x", xlab="mean of normalized counts", ylab=expression(-log[10](pvalue)), ylim=c(0,30), cex=.4, col=rgb(0,0,0,.3)) ## ----histindepfilt, fig.width=7, fig.height=5---------------------------- use <- res$baseMean > attr(res,"filterThreshold") table(use) h1 <- hist(res$pvalue[!use], breaks=0:50/50, plot=FALSE) h2 <- hist(res$pvalue[use], breaks=0:50/50, plot=FALSE) colori <- c(`do not pass`="khaki", `pass`="powderblue") ## ----fighistindepfilt---------------------------------------------------- barplot(height = rbind(h1$counts, h2$counts), beside = FALSE, col = colori, space = 0, main = "", ylab="frequency") text(x = c(0, length(h1$counts)), y = 0, label = paste(c(0,1)), adj = c(0.5,1.7), xpd=NA) legend("topright", fill=rev(colori), legend=rev(names(colori))) ## ----sortP, cache=TRUE--------------------------------------------------- resFilt <- res[use & !is.na(res$pvalue),] orderInPlot <- order(resFilt$pvalue) showInPlot <- (resFilt$pvalue[orderInPlot] <= 0.08) alpha <- 0.1 ## ----sortedP, fig.width=4.5, fig.height=4.5------------------------------ plot(seq(along=which(showInPlot)), resFilt$pvalue[orderInPlot][showInPlot], pch=".", xlab = expression(rank(p[i])), ylab=expression(p[i])) abline(a=0, b=alpha/length(resFilt$pvalue), col="red3", lwd=2) ## ----doBH, echo=FALSE, results="hide"------------------------------------ whichBH <- which(resFilt$pvalue[orderInPlot] <= alpha*seq(along=resFilt$pvalue)/length(resFilt$pvalue)) ## Test some assertions: ## - whichBH is a contiguous set of integers from 1 to length(whichBH) ## - the genes selected by this graphical method coincide with those ## from p.adjust (i.e. padjFilt) stopifnot(length(whichBH)>0, identical(whichBH, seq(along=whichBH)), resFilt$padj[orderInPlot][ whichBH] <= alpha, resFilt$padj[orderInPlot][-whichBH] > alpha) ## ----SchwSpjot, echo=FALSE, results="hide"------------------------------- j <- round(length(resFilt$pvalue)*c(1, .66)) px <- (1-resFilt$pvalue[orderInPlot[j]]) py <- ((length(resFilt$pvalue)-1):0)[j] slope <- diff(py)/diff(px) ## ----SchwederSpjotvoll, fig.width=4.5, fig.height=4.5-------------------- plot(1-resFilt$pvalue[orderInPlot], (length(resFilt$pvalue)-1):0, pch=".", xlab=expression(1-p[i]), ylab=expression(N(p[i]))) abline(a=0, slope, col="red3", lwd=2) ## ----sessInfo, results="asis", echo=FALSE-------------------------------- toLatex(sessionInfo()) ## ----resetOptions, results="hide", echo=FALSE---------------------------- options(prompt="> ", continue="+ ")