## ----docSetup, bootstrap.show.code = FALSE, dev = device, bootstrap.show.message=FALSE---- ## knitrBoostrap and device chunk options load_install('knitr') opts_chunk$set(bootstrap.show.code = FALSE, dev = device) if(!outputIsHTML) opts_chunk$set(bootstrap.show.code = FALSE, dev = device, echo = FALSE) ## ----setup, bootstrap.show.message=FALSE----------------------------------- #### Libraries needed ## Bioconductor load_install('bumphunter') load_install('derfinder') load_install('derfinderPlot') load_install('GenomeInfoDb') load_install('GenomicRanges') load_install('ggbio') ## Transcription database to use by default if(is.null(txdb)) { load_install('TxDb.Hsapiens.UCSC.hg19.knownGene') txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene } ## CRAN load_install('ggplot2') if(!is.null(theme)) theme_set(theme) load_install('grid') load_install('gridExtra') load_install('knitr') load_install('RColorBrewer') load_install('mgcv') load_install('whisker') load_install('DT') load_install('devtools') ## Working behind the scenes # load_install('knitcitations') # load_install('rmarkdown') ## Optionally # load_install('knitrBootstrap') #### Code setup ## For ggplot tmp <- regions names(tmp) <- seq_len(length(tmp)) regions.df <- as.data.frame(tmp) regions.df$width <- width(tmp) rm(tmp) ## Special subsets: need at least 3 points for a density plot keepChr <- table(regions.df$seqnames) > 2 regions.df.plot <- subset(regions.df, seqnames %in% names(keepChr[keepChr])) if(hasSignificant) { ## Keep only those sig regions.df.sig <- regions.df[significantVar, ] keepChr <- table(regions.df.sig$seqnames) > 2 regions.df.sig <- subset(regions.df.sig, seqnames %in% names(keepChr[keepChr])) } ## Find which chrs are present in the data set chrs <- levels(seqnames(regions)) ## areaVar initialize areaVar <- NULL ## ----pvaluePlots, echo=FALSE, results='asis', eval=hasPvalueVars, echo=hasPvalueVars---- # for(i in seq_len(length(pvalueVars))) { # densityVarName <- names(pvalueVars[i]) # densityVarName <- ifelse(is.null(densityVarName), pvalueVars[i], densityVarName) # cat(knit_child(text = whisker.render(templatePvalueDensityInUse, list(varName = pvalueVars[i], densityVarName = densityVarName)), quiet = TRUE), sep = '\n') # } ## ----regLen, fig.width=14, fig.height=14, eval=hasSignificant, echo=hasSignificant---- # xrange <- range(log10(regions.df.plot$width)) * c(0.95, 1.05) # p2a <- ggplot(regions.df.plot, aes(x=log10(width), colour=seqnames)) + # geom_line(stat='density') + labs(title='Density of region lengths') + # xlab('Region width (log10)') + scale_colour_discrete(limits=chrs) + # xlim(xrange) + theme(legend.title=element_blank()) # p2b <- ggplot(regions.df.sig, aes(x=log10(width), colour=seqnames)) + # geom_line(stat='density') + # labs(title='Density of region lengths (significant only)') + # xlab('Region width (log10)') + scale_colour_discrete(limits=chrs) + # xlim(xrange) + theme(legend.title=element_blank()) # grid.arrange(p2a, p2b) ## ----regLen2, fig.width=10, fig.height=10, eval=!hasSignificant, echo=!hasSignificant---- p2a <- ggplot(regions.df.plot, aes(x=log10(width), colour=seqnames)) + geom_line(stat='density') + labs(title='Density of region lengths') + xlab('Region width (log10)') + scale_colour_discrete(limits=chrs) + theme(legend.title=element_blank()) p2a ## ----density-area, fig.width=14, fig.height=14, eval=hasSignificant, echo=hasSignificant---- # xrange <- range(regions.df.plot[, 'area']) * c(0.95, 1.05) # p3aarea <- ggplot(regions.df.plot[is.finite(regions.df.plot[, 'area']), ], aes(x=area, colour=seqnames)) + # geom_line(stat='density') + labs(title='Density of Area') + # xlab('Area') + scale_colour_discrete(limits=chrs) + # xlim(xrange) + theme(legend.title=element_blank()) # p3barea <- ggplot(regions.df.sig[is.finite(regions.df.sig[, 'area']), ], aes(x=area, colour=seqnames)) + # geom_line(stat='density') + # labs(title='Density of Area (significant only)') + # xlab('Area') + scale_colour_discrete(limits=chrs) + # xlim(xrange) + theme(legend.title=element_blank()) # grid.arrange(p3aarea, p3barea) ## ----density-solo-area, fig.width=10, fig.height=10, eval=!hasSignificant, echo=!hasSignificant---- p3aarea <- ggplot(regions.df.plot[is.finite(regions.df.plot[, 'area']), ], aes(x=area, colour=seqnames)) + geom_line(stat='density') + labs(title='Density of Area') + xlab('Area') + scale_colour_discrete(limits=chrs) + theme(legend.title=element_blank()) p3aarea ## ----density-value, fig.width=14, fig.height=14, eval=hasSignificant, echo=hasSignificant---- # xrange <- range(regions.df.plot[, 'value']) * c(0.95, 1.05) # p3avalue <- ggplot(regions.df.plot[is.finite(regions.df.plot[, 'value']), ], aes(x=value, colour=seqnames)) + # geom_line(stat='density') + labs(title='Density of Value') + # xlab('Value') + scale_colour_discrete(limits=chrs) + # xlim(xrange) + theme(legend.title=element_blank()) # p3bvalue <- ggplot(regions.df.sig[is.finite(regions.df.sig[, 'value']), ], aes(x=value, colour=seqnames)) + # geom_line(stat='density') + # labs(title='Density of Value (significant only)') + # xlab('Value') + scale_colour_discrete(limits=chrs) + # xlim(xrange) + theme(legend.title=element_blank()) # grid.arrange(p3avalue, p3bvalue) ## ----density-solo-value, fig.width=10, fig.height=10, eval=!hasSignificant, echo=!hasSignificant---- p3avalue <- ggplot(regions.df.plot[is.finite(regions.df.plot[, 'value']), ], aes(x=value, colour=seqnames)) + geom_line(stat='density') + labs(title='Density of Value') + xlab('Value') + scale_colour_discrete(limits=chrs) + theme(legend.title=element_blank()) p3avalue ## ----density-clusterL, fig.width=14, fig.height=14, eval=hasSignificant, echo=hasSignificant---- # xrange <- range(regions.df.plot[, 'clusterL']) * c(0.95, 1.05) # p3aclusterL <- ggplot(regions.df.plot[is.finite(regions.df.plot[, 'clusterL']), ], aes(x=clusterL, colour=seqnames)) + # geom_line(stat='density') + labs(title='Density of Cluster Length') + # xlab('Cluster Length') + scale_colour_discrete(limits=chrs) + # xlim(xrange) + theme(legend.title=element_blank()) # p3bclusterL <- ggplot(regions.df.sig[is.finite(regions.df.sig[, 'clusterL']), ], aes(x=clusterL, colour=seqnames)) + # geom_line(stat='density') + # labs(title='Density of Cluster Length (significant only)') + # xlab('Cluster Length') + scale_colour_discrete(limits=chrs) + # xlim(xrange) + theme(legend.title=element_blank()) # grid.arrange(p3aclusterL, p3bclusterL) ## ----density-solo-clusterL, fig.width=10, fig.height=10, eval=!hasSignificant, echo=!hasSignificant---- p3aclusterL <- ggplot(regions.df.plot[is.finite(regions.df.plot[, 'clusterL']), ], aes(x=clusterL, colour=seqnames)) + geom_line(stat='density') + labs(title='Density of Cluster Length') + xlab('Cluster Length') + scale_colour_discrete(limits=chrs) + theme(legend.title=element_blank()) p3aclusterL ## ----densityPlots, echo=FALSE, results='asis', eval=hasDensityVars, echo=hasDensityVars---- for(i in seq_len(length(densityVars))) { densityVarName <- names(densityVars[i]) densityVarName <- ifelse(is.null(densityVarName), densityVars[i], densityVarName) cat(knit_child(text = whisker.render(templateDensityInUse, list(varName = densityVars[i], densityVarName = densityVarName)), quiet = TRUE), sep = '\n') } ## ----genomeOverview1, message=FALSE, fig.width=7, fig.height=9, dpi=300, eval=hasSignificant, echo=hasSignificant---- # ## Choose what variable to show on the top # tmp <- regions # tmp$significant <- factor(significantVar, levels = c('TRUE', 'FALSE')) # if(!'area' %in% colnames(mcols(tmp))) { # if(hasDensityVars) { # tmp$area <- mcols(tmp)[[densityVars[1]]] # areaVar <- densityVars[1] # areaVar <- ifelse(is.null(names(areaVar)), densityVars[1], names(areaVar)) # } else { # tmp$area <- 0 # areaVar <- NULL # } # } else { # areaVar <- 'area' # } # plotOverview(regions=tmp, type='pval', base_size=overviewParams$base_size, areaRel=overviewParams$areaRel, legend.position=c(0.97, 0.12)) # rm(tmp) ## ----manhattanPlots, echo=FALSE, results='asis', eval=hasPvalueVars, echo=hasPvalueVars---- # for(i in seq_len(length(pvalueVars))) { # densityVarName <- names(pvalueVars[i]) # densityVarName <- ifelse(is.null(densityVarName), pvalueVars[i], densityVarName) # cat(knit_child(text = whisker.render(templateManhattanInUse, list(varName = pvalueVars[i], densityVarName = densityVarName)), quiet = TRUE), sep = '\n') # } ## ----genomeOverview2, message=FALSE, fig.width=7, fig.height=9, dpi=300---- ## Annotate regions with bumphunter if(is.null(annotation)) { genes <- annotateTranscripts(txdb = txdb) annotation <- matchGenes(x = regions, subject = genes) } ## Make the plot plotOverview(regions=regions, annotation=annotation, type='annotation', base_size=overviewParams$base_size, areaRel=overviewParams$areaRel, legend.position=c(0.97, 0.12)) ## ----annoReg, results='asis'----------------------------------------------- annoReg <- table(annotation$region, useNA='always') annoReg.df <- data.frame(Region=names(annoReg), Count=as.vector(annoReg)) if(outputIsHTML) { kable(annoReg.df, format = 'markdown', align=rep('c', 3)) } else { kable(annoReg.df) } ## ----genomeOverview3, message=FALSE, fig.width=7, fig.height=9, dpi=300, eval=hasSignificant, echo=hasSignificant---- # plotOverview(regions=regions[significantVar, ], annotation=annotation[significantVar, ], type='annotation', base_size=overviewParams$base_size, areaRel=overviewParams$areaRel, legend.position=c(0.97, 0.12)) ## ----countTable, results='asis'-------------------------------------------- ## Construct genomic state object genomicState <- makeGenomicState(txdb = txdb, chrs = chrs, verbose = FALSE) ## Annotate regions by genomic state annotatedRegions <- annotateRegions(regions, genomicState$fullGenome, verbose = FALSE) ## Genomic states table info <- do.call(rbind, lapply(annotatedRegions$countTable, function(x) { data.frame(table(x)) })) colnames(info) <- c('Number of Overlapping States', 'Frequency') info$State <- gsub('\\..*', '', rownames(info)) rownames(info) <- NULL if(outputIsHTML) { kable(info, format = 'markdown', align=rep('c', 4)) } else { kable(info) } ## ----vennDiagram, fig.width=7, fig.height=7-------------------------------- ## Venn diagram for all regions venn <- vennRegions(annotatedRegions, counts.col = 'blue', main = 'Regions overlapping genomic states') ## ----vennDiagramSignificant, eval = hasSignificant, echo = hasSignificant, fig.width=7, fig.height=7---- # ## Venn diagram for all regions # vennSig <- vennRegions(annotatedRegions, counts.col = 'blue', # main = 'Significant regions overlapping genomic states', # subsetIndex = significantVar) ## ----bestRegionInfo, results='asis'---------------------------------------- ## Add annotation information regions.df <- cbind(regions.df, annotation) ## Rank by p-value (first pvalue variable supplied) if(hasPvalueVars){ topRegions <- head(regions.df[order(regions.df[, pvalueVars[1]], decreasing = FALSE), ], nBestRegions) topRegions <- cbind(data.frame('pvalueRank' = seq_len(nrow(topRegions))), topRegions) } else { topRegions <- head(regions.df, nBestRegions) } ## Clean up -Inf, Inf if present ## More details at https://github.com/ramnathv/rCharts/issues/259 replaceInf <- function(df, colsubset=seq_len(ncol(df))) { for(i in colsubset) { inf.idx <- !is.finite(df[, i]) if(any(inf.idx)) { inf.sign <- sign(df[inf.idx, i]) df[inf.idx, i] <- inf.sign * 1e100 } } return(df) } topRegions <- replaceInf(topRegions, which(sapply(topRegions, function(x) { class(x) %in% c('numeric', 'integer')}))) ## Make the table greptext <- 'value$|area$|mean|log2FoldChange' greppval <- 'pvalues$|qvalues$|fwer$' if(hasPvalueVars) { greppval <- paste0(paste(pvalueVars, collapse = '$|'), '$|', greppval) } if(hasDensityVars) { greptext <- paste0(paste(densityVars, collapse = '$|'), '$|', greptext) } for(i in which(grepl(greppval, colnames(topRegions)))) topRegions[, i] <- format(topRegions[, i], scientific = TRUE) if(outputIsHTML) { datatable(topRegions, options = list(pagingType='full_numbers', pageLength=10, scrollX='100%'), rownames = FALSE) %>% formatRound(which(grepl(greptext, colnames(topRegions))), digits) } else { ## Only print the top part if your output is a PDF file df_top <- head(topRegions, 20) for(i in which(grepl(greptext, colnames(topRegions)))) df_top[, i] <- round(df_top[, i], digits) kable(df_top) } ## ----child = customCode, eval = hasCustomCode------------------------------ # NA ## ----thecall, echo=FALSE--------------------------------------------------- theCall ## ----reproducibility1, echo=FALSE------------------------------------------ ## Date the report was generated Sys.time() ## ----reproducibility2, echo=FALSE------------------------------------------ ## Processing time in seconds totalTime <- diff(c(startTime, Sys.time())) round(totalTime, digits=3) ## ----reproducibility3, echo=FALSE------------------------------------------------------------------------------------- ## Session info options(width = 120) session_info() ## ----bibliography, results='asis', echo=FALSE, warning = FALSE-------------------------------------------------------- ## Print bibliography bibliography()