## ----opts, echo=FALSE, results="hide"------------------------------------ library("knitr") opts_chunk$set(error=FALSE) ## ----style, echo=FALSE, results="asis"----------------------------------- library("BiocStyle") BiocStyle::markdown() ## ---- eval=FALSE--------------------------------------------------------- # ?DESeq ## ---- eval=FALSE--------------------------------------------------------- # browseVignettes("DESeq2") ## ----message=FALSE------------------------------------------------------- library("airway") ## ------------------------------------------------------------------------ dir <- system.file("extdata", package="airway", mustWork=TRUE) gtffile <- file.path(dir, "Homo_sapiens.GRCh37.75_subset.gtf") ## ----message=FALSE------------------------------------------------------- library("GenomicFeatures") txdb <- makeTxDbFromGFF(gtffile, format="gtf") ## ------------------------------------------------------------------------ ebg <- exonsBy(txdb, by="gene") ebg[[1]] ## ----annohub, message=FALSE, warning=FALSE------------------------------- library("AnnotationHub") ah <- AnnotationHub() ## ------------------------------------------------------------------------ qu <- query(ah, c("Ensembl","gtf","Saccharomyces cerevisiae","release-80")) stopifnot(length(qu) == 1) # check there was only one match gtf <- qu[[1]] # this actually downloads the gene model txdbFromAHub <- makeTxDbFromGRanges(gtf) ## ------------------------------------------------------------------------ list.files(dir) ## ------------------------------------------------------------------------ csvfile <- file.path(dir,"sample_table.csv") ( sampleTable <- read.csv(csvfile,row.names=1) ) ## ------------------------------------------------------------------------ filenames <- file.path(dir, paste0(sampleTable$Run, "_subset.bam")) ## ----message=FALSE------------------------------------------------------- library("Rsamtools") bamfiles <- BamFileList(filenames, yieldSize=2000000) ## ------------------------------------------------------------------------ seqinfo(bamfiles[1]) ## ----message=FALSE------------------------------------------------------- library("GenomicAlignments") ## ------------------------------------------------------------------------ se <- summarizeOverlaps(features=ebg, reads=bamfiles, mode="Union", singleEnd=FALSE, ignore.strand=TRUE, fragments=TRUE ) ## ----sumexp, echo=FALSE-------------------------------------------------- par(mar=c(0,0,0,0)) plot(1,1,xlim=c(0,100),ylim=c(0,100),bty="n", type="n",xlab="",ylab="",xaxt="n",yaxt="n") polygon(c(45,80,80,45),c(10,10,70,70),col="pink",border=NA) polygon(c(45,80,80,45),c(68,68,70,70),col="pink3",border=NA) text(62.5,40,"assay(s)") text(62.5,30,"e.g. 'counts'") polygon(c(20,40,40,20),c(10,10,70,70),col="skyblue",border=NA) polygon(c(20,40,40,20),c(68,68,70,70),col="skyblue3",border=NA) text(30,40,"rowRanges") polygon(c(45,80,80,45),c(75,75,90,90),col="palegreen",border=NA) polygon(c(45,47,47,45),c(75,75,90,90),col="palegreen3",border=NA) text(62.5,82.5,"colData") ## ------------------------------------------------------------------------ se head(assay(se)) colSums(assay(se)) colData(se) rowRanges(se) ## ------------------------------------------------------------------------ str(metadata(rowRanges(se))) ## ------------------------------------------------------------------------ colData(se) <- DataFrame(sampleTable) ## ----message=FALSE------------------------------------------------------- library("Rsubread") fc <- featureCounts(filenames[1], annot.ext=gtffile, isGTFAnnotationFile=TRUE, isPairedEnd=TRUE) names(fc) head(fc$counts) ## ------------------------------------------------------------------------ data("airway") se <- airway ## ------------------------------------------------------------------------ round( colSums(assay(se)) / 1e6, 1 ) ## ------------------------------------------------------------------------ colData(se) ## ----message=FALSE------------------------------------------------------- library("DESeq2") ## ------------------------------------------------------------------------ dds <- DESeqDataSet(se, design = ~ cell + dex) ## ------------------------------------------------------------------------ countdata <- assay(se) head(countdata) ## ------------------------------------------------------------------------ coldata <- colData(se) ## ------------------------------------------------------------------------ (ddsMat <- DESeqDataSetFromMatrix(countData = countdata, colData = coldata, design = ~ cell + dex)) ## ------------------------------------------------------------------------ rld <- rlog(dds) head(assay(rld)) ## ----rldplot, fig.width=8, fig.height=4---------------------------------- par( mfrow = c( 1, 2 ) ) dds <- estimateSizeFactors(dds) plot(log2( 1 + counts(dds, normalized=TRUE)[ , 1:2] ), pch=16, cex=0.3) plot(assay(rld)[ , 1:2], pch=16, cex=0.3) ## ------------------------------------------------------------------------ sampleDists <- dist( t( assay(rld) ) ) sampleDists ## ----message=FALSE------------------------------------------------------- library("pheatmap") library("RColorBrewer") ## ----distheatmap, fig.width=8-------------------------------------------- sampleDistMatrix <- as.matrix( sampleDists ) rownames(sampleDistMatrix) <- paste( rld$dex, rld$cell, sep="-" ) colnames(sampleDistMatrix) <- NULL colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) pheatmap(sampleDistMatrix, clustering_distance_rows=sampleDists, clustering_distance_cols=sampleDists, col=colors) ## ----message=FALSE------------------------------------------------------- library("PoiClaClu") poisd <- PoissonDistance(t(counts(dds))) ## ----poisdistheatmap, fig.width=8---------------------------------------- samplePoisDistMatrix <- as.matrix( poisd$dd ) rownames(samplePoisDistMatrix) <- paste( rld$dex, rld$cell, sep="-" ) colnames(samplePoisDistMatrix) <- NULL pheatmap(samplePoisDistMatrix, clustering_distance_rows=poisd$dd, clustering_distance_cols=poisd$dd, col=colors) ## ----plotpca, fig.width=6, fig.height=4.5-------------------------------- plotPCA(rld, intgroup = c("dex", "cell")) ## ------------------------------------------------------------------------ (data <- plotPCA(rld, intgroup = c( "dex", "cell"), returnData=TRUE)) percentVar <- round(100 * attr(data, "percentVar")) ## ----message=FALSE------------------------------------------------------- library("ggplot2") ## ----ggplotpca, fig.width=6, fig.height=4.5------------------------------ ggplot(data, aes(PC1, PC2, color=dex, shape=cell)) + geom_point(size=3) + xlab(paste0("PC1: ",percentVar[1],"% variance")) + ylab(paste0("PC2: ",percentVar[2],"% variance")) ## ----mdsrlog, fig.width=6, fig.height=4.5-------------------------------- mds <- data.frame(cmdscale(sampleDistMatrix)) mds <- cbind(mds, as.data.frame(colData(rld))) ggplot(mds, aes(X1,X2,color=dex,shape=cell)) + geom_point(size=3) ## ----mdspois, fig.width=6, fig.height=4.5-------------------------------- mds <- data.frame(cmdscale(samplePoisDistMatrix)) mds <- cbind(mds, as.data.frame(colData(dds))) ggplot(mds, aes(X1,X2,color=dex,shape=cell)) + geom_point(size=3) ## ------------------------------------------------------------------------ dds$dex <- relevel(dds$dex, "untrt") ## ------------------------------------------------------------------------ dds <- DESeq(dds) ## ------------------------------------------------------------------------ ( res <- results(dds) ) ## ------------------------------------------------------------------------ mcols(res, use.names=TRUE) ## ------------------------------------------------------------------------ summary(res) ## ------------------------------------------------------------------------ res.05 <- results(dds, alpha=.05) table(res.05$padj < .05) ## ------------------------------------------------------------------------ ( res.cell <- results(dds, contrast=c("cell", "N061011", "N61311")) ) ## ----plotcounts, fig.width=5, fig.height=5------------------------------- topGene <- rownames(res)[which.min(res$padj)] plotCounts(dds, gene=topGene, intgroup=c("dex")) ## ----ggplotcountsjitter, fig.height=5------------------------------------ data <- plotCounts(dds, gene=topGene, intgroup=c("dex","cell"), returnData=TRUE) ggplot(data, aes(x=dex, y=count, color=cell)) + scale_y_log10() + geom_point(position=position_jitter(width=.1,height=0), size=3) ## ----ggplotcountsdot, fig.height=5--------------------------------------- ggplot(data, aes(x=dex, y=count, fill=dex)) + scale_y_log10() + geom_dotplot(binaxis="y", stackdir="center") ## ----ggplotcountsgroup, fig.height=5------------------------------------- ggplot(data, aes(x=dex, y=count, color=cell, group=cell)) + scale_y_log10() + geom_point(size=3) + geom_line() ## ----plotma-------------------------------------------------------------- plotMA(res, ylim=c(-5,5)) ## ----plotma2------------------------------------------------------------- plotMA(res, ylim=c(-5,5)) with(res[topGene, ], { points(baseMean, log2FoldChange, col="dodgerblue", cex=2, lwd=2) text(baseMean, log2FoldChange, topGene, pos=2, col="dodgerblue") }) ## ----message=FALSE------------------------------------------------------- library("genefilter") topVarGenes <- head(order(-rowVars(assay(rld))),20) ## ----genescluster-------------------------------------------------------- mat <- assay(rld)[ topVarGenes, ] mat <- mat - rowMeans(mat) df <- as.data.frame(colData(rld)[,c("cell","dex")]) pheatmap(mat, annotation_col=df) ## ----message=FALSE------------------------------------------------------- library("AnnotationDbi") library("org.Hs.eg.db") ## ------------------------------------------------------------------------ columns(org.Hs.eg.db) ## ------------------------------------------------------------------------ res$symbol <- mapIds(org.Hs.eg.db, keys=row.names(res), column="SYMBOL", keytype="ENSEMBL", multiVals="first") res$entrez <- mapIds(org.Hs.eg.db, keys=row.names(res), column="ENTREZID", keytype="ENSEMBL", multiVals="first") ## ------------------------------------------------------------------------ resOrdered <- res[order(res$padj),] head(resOrdered) ## ----eval=FALSE---------------------------------------------------------- # write.csv(as.data.frame(resOrdered), file="results.csv") ## ----message=FALSE------------------------------------------------------- library("ReportingTools") ## ------------------------------------------------------------------------ resTop <- head(resOrdered,1000) resTop <- as(resTop, "data.frame") resTop <- resTop[,c("padj","baseMean","log2FoldChange","pvalue", "symbol","entrez")] resTop$ensembl <- rownames(resTop) ## ----eval=FALSE---------------------------------------------------------- # getwd() # htmlRep <- HTMLReport(shortName = "airway_DE_report", # title = "Airway differential expression analysis 06/2015", # reportDirectory = "./reports") # publish(resTop, htmlRep) # url <- finish(htmlRep) # browseURL(url) ## ------------------------------------------------------------------------ resReport <- head(resOrdered, 50) ddsReport <- dds[ rownames(resReport), ] rownames(resReport) <- resReport$entrez rownames(ddsReport) <- rownames(resReport) ## ----eval=FALSE---------------------------------------------------------- # htmlRep <- HTMLReport(shortName = "airway_DE_report", # title = "Airway differential expression analysis 06/2015", # reportDirectory = "./reports") # publish(resReport, htmlRep, DataSet=ddsReport, # annotation.db="org.Hs.eg.db", # n=50, factor=dds$dex) # url <- finish(htmlRep) # browseURL(url) ## ------------------------------------------------------------------------ total.range <- GRanges("1", IRanges(11e6, 11.4e6)) indexBam(filenames[1]) ## ------------------------------------------------------------------------ ga <- readGAlignmentPairs(filenames[1], param=ScanBamParam(which=total.range)) ga ## ----simplecov----------------------------------------------------------- cov <- coverage(ga) zoom <- GRanges("1", IRanges(11.07e6, 11.09e6)) cov[zoom] cov.numeric <- as.numeric(cov[zoom][[1]]) plot(cov.numeric, type="h") ## ----gviz, message=FALSE------------------------------------------------- library("Gviz") options(ucscChromosomeNames=FALSE) grtrack <- GeneRegionTrack(txdb) gtrack <- GenomeAxisTrack() altrack <- AlignmentsTrack(filenames[1]) plotTracks(list(gtrack, grtrack, altrack), from=11.07e6, to=11.09e6, chromosome="1") ## ----gvizSashimi--------------------------------------------------------- altrack <- AlignmentsTrack(filenames[1], type=c("coverage","sashimi")) plotTracks(list(gtrack, grtrack, altrack), from=11.07e6, to=11.09e6, chromosome="1") ## ------------------------------------------------------------------------ ebt <- exonsBy(txdb, by="tx") ebt.sub <- ebt[ebt %over% zoom] fco <- findCompatibleOverlaps(ga, ebt.sub) fco ## ------------------------------------------------------------------------ (tab <- table(subjectHits(fco))) ebt.top <- ebt.sub[names(tab)[which.max(tab)]] ## ----gvizebttop---------------------------------------------------------- grtrack2 <- GeneRegionTrack(ebt.top) plotTracks(list(gtrack, grtrack2, altrack), from=11.07e6, to=11.09e6, chromosome="1") ## ------------------------------------------------------------------------ sessionInfo()