## ----knitr, echo=FALSE, results="hide"----------------------------------- library("knitr") opts_chunk$set(tidy=FALSE,dev="png",fig.show="hide", fig.width=4,fig.height=4.5, message=FALSE) ## ----style, eval=TRUE, echo=FALSE, results="asis"------------------------ BiocStyle::latex() ## ----loadDESeq2, echo=FALSE---------------------------------------------- # in order to print version number below library("DESeq2") ## ----options, results="hide", echo=FALSE--------------------------------- options(digits=3, width=80, prompt=" ", continue=" ") ## ----eval=FALSE---------------------------------------------------------- ## library( "GenomicFeatures" ) ## hse <- makeTranscriptDbFromGFF( "/path/to/your/genemodel.GTF", format="gtf" ) ## exonsByGene <- exonsBy( hse, by="gene" ) ## ----loadExonsByGene, echo=FALSE----------------------------------------- library("parathyroidSE") library("GenomicFeatures") data(exonsByGene) ## ----eval=FALSE---------------------------------------------------------- ## fls <- list.files( "/path/to/bam/files", pattern="bam$", full=TRUE ) ## ----locateFiles, echo=FALSE--------------------------------------------- bamDir <- system.file("extdata",package="parathyroidSE",mustWork=TRUE) fls <- list.files(bamDir, pattern="bam$",full=TRUE) ## ----bamfilepaired------------------------------------------------------- library( "Rsamtools" ) bamLst <- BamFileList( fls, yieldSize=100000 ) ## ----sumOver------------------------------------------------------------- library( "GenomicAlignments" ) se <- summarizeOverlaps( exonsByGene, bamLst, mode="Union", singleEnd=FALSE, ignore.strand=TRUE, fragments=TRUE ) ## ----beginner_plotSE, dev="pdf", echo=FALSE------------------------------ par(mar=c(0,0,0,0)) plot(1,1,xlim=c(0,100),ylim=c(0,100),bty="n", type="n",xlab="",ylab="",xaxt="n",yaxt="n") polygon(c(45,80,80,45),c(10,10,70,70),col=rgb(1,0,0,.5),border=NA) polygon(c(45,80,80,45),c(68,68,70,70),col=rgb(1,0,0,.5),border=NA) text(62.5,40,"assay(s)") text(62.5,30,"e.g. 'counts'") polygon(c(20,40,40,20),c(10,10,70,70),col=rgb(0,0,1,.5),border=NA) polygon(c(20,40,40,20),c(68,68,70,70),col=rgb(0,0,1,.5),border=NA) text(30,40,"rowData") polygon(c(45,80,80,45),c(75,75,90,90),col=rgb(.5,0,.5,.5),border=NA) polygon(c(45,47,47,45),c(75,75,90,90),col=rgb(.5,0,.5,.5),border=NA) text(62.5,82.5,"colData") ## ----examineSE----------------------------------------------------------- se head( assay(se) ) colSums( assay(se) ) colData(se) rowData(se) ## ----libraries----------------------------------------------------------- library( "DESeq2" ) library( "parathyroidSE" ) ## ----loadEcs------------------------------------------------------------- data( "parathyroidGenesSE" ) se <- parathyroidGenesSE colnames(se) <- se$run ## ----coldataSE----------------------------------------------------------- colData(se)[1:5,1:4] ## ----readcsv, eval=FALSE------------------------------------------------- ## sampleInfo <- read.csv( "/path/to/file.CSV" ) ## ----sampleInfo, echo=FALSE---------------------------------------------- sampleInfo <- data.frame(run=rev(se$run), pheno=rep(c("pheno1","pheno2"),length=ncol(se))) ## ----showSampleInfo------------------------------------------------------ head( sampleInfo ) head( colnames(se) ) sampleInfo <- DataFrame( sampleInfo ) ## ----reorderSampleInfo--------------------------------------------------- seIdx <- match(colnames(se), sampleInfo$run) head( cbind( colData(se)[ , 1:3 ], sampleInfo[ seIdx, ] ) ) colData(se) <- cbind( colData(se), sampleInfo[ seIdx, ] ) ## ----removeToy, echo=FALSE----------------------------------------------- se$pheno <- NULL ## ----fromSE-------------------------------------------------------------- ddsFull <- DESeqDataSet( se, design = ~ patient + treatment ) ## ----countTable---------------------------------------------------------- countdata <- assay( parathyroidGenesSE ) head( countdata ) ## ----colData------------------------------------------------------------- coldata <- colData( parathyroidGenesSE ) rownames( coldata ) <- coldata$run colnames( countdata ) <- coldata$run ## ----colDataSubsetCols--------------------------------------------------- head( coldata[ , c( "patient", "treatment", "time" ) ] ) ## ----makeddsfull--------------------------------------------------------- ddsFullCountTable <- DESeqDataSetFromMatrix( countData = countdata, colData = coldata, design = ~ patient + treatment) ddsFullCountTable ## ----columnData---------------------------------------------------------- as.data.frame( colData( ddsFull )[ ,c("sample","patient","treatment","time") ] ) ## ----collapse------------------------------------------------------------ ddsCollapsed <- collapseReplicates( ddsFull, groupby = ddsFull$sample, run = ddsFull$run ) head( as.data.frame( colData(ddsCollapsed)[ ,c("sample","runsCollapsed") ] ), 12 ) ## ----checkCollapsed------------------------------------------------------ original <- rowSums( counts(ddsFull)[ , ddsFull$sample == "SRS308873" ] ) all( original == counts(ddsCollapsed)[ ,"SRS308873" ] ) ## ----subsetCols---------------------------------------------------------- dds <- ddsCollapsed[ , ddsCollapsed$time == "48h" ] ## ----refactor------------------------------------------------------------ dds$time <- droplevels( dds$time ) ## ----relevel------------------------------------------------------------- dds$treatment <- relevel( dds$treatment, "Control" ) ## ----multifactorColData-------------------------------------------------- as.data.frame( colData(dds) ) ## ----subsetRows, echo=FALSE---------------------------------------------- idx <- which(rowSums(counts(dds)) > 0)[1:4000] dds <- dds[idx,] ## ----runDESeq, cache=TRUE------------------------------------------------ dds <- DESeq(dds) ## ----extractResults------------------------------------------------------ res <- results( dds ) res ## ----resCols------------------------------------------------------------- mcols(res, use.names=TRUE) ## ----resultsOther-------------------------------------------------------- res <- results( dds, contrast = c("treatment", "DPN", "Control") ) res ## ----rawpvalue----------------------------------------------------------- sum( res$pvalue < 0.01, na.rm=TRUE ) table( is.na(res$pvalue) ) ## ----adjpvalue----------------------------------------------------------- sum( res$padj < 0.1, na.rm=TRUE ) ## ----strongGenes--------------------------------------------------------- resSig <- res[ which(res$padj < 0.1 ), ] head( resSig[ order( resSig$log2FoldChange ), ] ) ## ----strongGenesUp------------------------------------------------------- tail( resSig[ order( resSig$log2FoldChange ), ] ) ## ----beginner_MA--------------------------------------------------------- plotMA( res, ylim = c(-1, 1) ) ## ----beginner_dispPlot--------------------------------------------------- plotDispEsts( dds, ylim = c(1e-6, 1e1) ) ## ----beginner_pvalHist, dev="pdf"---------------------------------------- hist( res$pvalue, breaks=20, col="grey" ) ## ----beginner_ratioSmallP, dev="pdf", fig.width=8, fig.height=4---------- # create bins using the quantile function qs <- c( 0, quantile( res$baseMean[res$baseMean > 0], 0:7/7 ) ) # "cut" the genes into the bins bins <- cut( res$baseMean, qs ) # rename the levels of the bins using the middle point levels(bins) <- paste0("~",round(.5*qs[-1] + .5*qs[-length(qs)])) # calculate the ratio of $p$ values less than .01 for each bin ratios <- tapply( res$pvalue, bins, function(p) mean( p < .01, na.rm=TRUE ) ) # plot these ratios barplot(ratios, xlab="mean normalized count", ylab="ratio of small $p$ values") ## ----beginner_filtByMean, dev="pdf"-------------------------------------- attr(res,"filterThreshold") plot(attr(res,"filterNumRej"),type="b", xlab="quantiles of 'baseMean'", ylab="number of rejections") ## ------------------------------------------------------------------------ res$ensembl <- sapply( strsplit( rownames(res), split="\\+" ), "[", 1 ) ## ----geneAnnot, cache=TRUE----------------------------------------------- library( "biomaRt" ) ensembl = useMart( "ensembl", dataset = "hsapiens_gene_ensembl" ) genemap <- getBM( attributes = c("ensembl_gene_id", "entrezgene", "hgnc_symbol"), filters = "ensembl_gene_id", values = res$ensembl, mart = ensembl ) idx <- match( res$ensembl, genemap$ensembl_gene_id ) res$entrez <- genemap$entrezgene[ idx ] res$hgnc_symbol <- genemap$hgnc_symbol[ idx ] ## ----showAnnot----------------------------------------------------------- head(res,4) ## ----geneAnnotRes-------------------------------------------------------- res[1:2,] ## ----writeCSV, eval=FALSE------------------------------------------------ ## write.csv( as.data.frame(res), file="results.csv" ) ## ----rld, cache=TRUE----------------------------------------------------- rld <- rlog( dds ) head( assay(rld) ) ## ----beginner_rldPlot,fig.width=10,fig.height=5-------------------------- par( mfrow = c( 1, 2 ) ) plot( log2( 1+counts(dds, normalized=TRUE)[, 1:2] ), col="#00000020", pch=20, cex=0.3 ) plot( assay(rld)[, 1:2], col="#00000020", pch=20, cex=0.3 ) ## ----euclDist------------------------------------------------------------ sampleDists <- dist( t( assay(rld) ) ) sampleDists ## ----beginner_sampleDistHeatmap, dev="pdf"------------------------------- sampleDistMatrix <- as.matrix( sampleDists ) rownames(sampleDistMatrix) <- paste( rld$treatment, rld$patient, sep="-" ) colnames(sampleDistMatrix) <- NULL library( "gplots" ) library( "RColorBrewer" ) colours = colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) heatmap.2( sampleDistMatrix, trace="none", col=colours) ## ----beginner_samplePCA, dev="pdf"--------------------------------------- ramp <- 1:3/3 cols <- c(rgb(ramp, 0, 0), rgb(0, ramp, 0), rgb(0, 0, ramp), rgb(ramp, 0, ramp)) print( plotPCA( rld, intgroup = c( "patient", "treatment"), col=cols ) ) ## ----topVarGenes--------------------------------------------------------- library( "genefilter" ) topVarGenes <- head( order( rowVars( assay(rld) ), decreasing=TRUE ), 35 ) ## ----beginner_geneHeatmap, fig.width=9, fig.height=9--------------------- heatmap.2( assay(rld)[ topVarGenes, ], scale="row", trace="none", dendrogram="column", col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255)) ## ----sessInfo, results="asis", echo=FALSE-------------------------------- toLatex(sessionInfo()) ## ----resetOptions, results="hide", echo=FALSE---------------------------- options(prompt="> ", continue="+ ")