## ----'installDer', eval = FALSE----------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # BiocManager::install("derfinder") # # ## Check that you have a valid Bioconductor installation # BiocManager::valid() ## ----'citation'--------------------------------------------------------------- ## Citation info citation("derfinder") ## ----vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE---------------- ## Track time spent on making the vignette startTime <- Sys.time() ## Bib setup library("knitcitations") ## Load knitcitations with a clean bibliography cleanbib() cite_options(hyperlink = "to.doc", citation_format = "text", style = "html") # Note links won't show for now due to the following issue # https://github.com/cboettig/knitcitations/issues/63 ## Write bibliography information bib <- c( knitcitations = citation("knitcitations"), derfinder = citation("derfinder")[1], BiocStyle = citation("BiocStyle"), knitr = citation("knitr")[3], rmarkdown = citation("rmarkdown")[1], brainspan = RefManageR::BibEntry( bibtype = "Unpublished", key = "brainspan", title = "Atlas of the Developing Human Brain [Internet]. Funded by ARRA Awards 1RC2MH089921-01, 1RC2MH090047-01, and 1RC2MH089929-01.", author = "BrainSpan", year = 2011, url = "http://www.brainspan.org/" ), originalder = citation("derfinder")[2], R = citation(), IRanges = citation("IRanges"), sessioninfo = citation("sessioninfo"), testthat = citation("testthat"), GenomeInfoDb = RefManageR::BibEntry( bibtype = "manual", key = "GenomeInfoDb", author = "Sonali Arora and Martin Morgan and Marc Carlson and H. Pagès", title = "GenomeInfoDb: Utilities for manipulating chromosome and other 'seqname' identifiers", year = 2017, doi = "10.18129/B9.bioc.GenomeInfoDb" ), GenomicRanges = citation("GenomicRanges"), ggplot2 = citation("ggplot2"), bumphunter = citation("bumphunter")[1], TxDb.Hsapiens.UCSC.hg19.knownGene = citation("TxDb.Hsapiens.UCSC.hg19.knownGene"), AnnotationDbi = RefManageR::BibEntry( bibtype = "manual", key = "AnnotationDbi", author = "Hervé Pagès and Marc Carlson and Seth Falcon and Nianhua Li", title = "AnnotationDbi: Annotation Database Interface", year = 2017, doi = "10.18129/B9.bioc.AnnotationDbi" ), BiocParallel = citation("BiocParallel"), derfinderHelper = citation("derfinderHelper"), GenomicAlignments = citation("GenomicAlignments"), GenomicFeatures = citation("GenomicFeatures"), GenomicFiles = citation("GenomicFiles"), Hmisc = citation("Hmisc"), qvalue = citation("qvalue"), Rsamtools = citation("Rsamtools"), rtracklayer = citation("rtracklayer"), S4Vectors = RefManageR::BibEntry( bibtype = "manual", key = "S4Vectors", author = "Hervé Pagès and Michael Lawrence and Patrick Aboyoun", title = "S4Vectors: S4 implementation of vector-like and list-like objects", year = 2017, doi = "10.18129/B9.bioc.S4Vectors" ), bumphunterPaper = RefManageR::BibEntry( bibtype = "article", key = "bumphunterPaper", title = "Bump hunting to identify differentially methylated regions in epigenetic epidemiology studies", author = "Jaffe, Andrew E and Murakami, Peter and Lee, Hwajin and Leek, Jeffrey T and Fallin, M Daniele and Feinberg, Andrew P and Irizarry, Rafael A", year = 2012, journal = "International Journal of Epidemiology" ), derfinderData = citation("derfinderData") ) write.bibtex(bib, file = "quickstartRef.bib") ## ----'ultraQuick', eval = FALSE----------------------------------------------- # ## Load libraries # library("derfinder") # library("derfinderData") # library("GenomicRanges") # # ## Determine the files to use and fix the names # files <- rawFiles(system.file("extdata", "AMY", package = "derfinderData"), # samplepatt = "bw", fileterm = NULL # ) # names(files) <- gsub(".bw", "", names(files)) # # ## Load the data from disk -- On Windows you have to load data from Bam files # fullCov <- fullCoverage(files = files, chrs = "chr21", verbose = FALSE) # # ## Get the region matrix of Expressed Regions (ERs) # regionMat <- regionMatrix(fullCov, cutoff = 30, L = 76, verbose = FALSE) # # ## Get pheno table # pheno <- subset(brainspanPheno, structure_acronym == "AMY") # # ## Identify which ERs are differentially expressed, that is, find the DERs # library("DESeq2") # # ## Round matrix # counts <- round(regionMat$chr21$coverageMatrix) # # ## Round matrix and specify design # dse <- DESeqDataSetFromMatrix(counts, pheno, ~ group + gender) # # ## Perform DE analysis # dse <- DESeq(dse, test = "LRT", reduced = ~gender, fitType = "local") # # ## Extract results # mcols(regionMat$chr21$regions) <- c(mcols(regionMat$chr21$regions), results(dse)) # # ## Save info in an object with a shorter name # ers <- regionMat$chr21$regions # ers ## ----'start', message=FALSE--------------------------------------------------- ## Load libraries library("derfinder") library("derfinderData") library("GenomicRanges") ## ----'locateAMYfiles'--------------------------------------------------------- ## Determine the files to use and fix the names files <- rawFiles(system.file("extdata", "AMY", package = "derfinderData"), samplepatt = "bw", fileterm = NULL ) names(files) <- gsub(".bw", "", names(files)) ## ----'getData', eval = .Platform$OS.type != "windows"------------------------- ## Load the data from disk fullCov <- fullCoverage( files = files, chrs = "chr21", verbose = FALSE, totalMapped = rep(1, length(files)), targetSize = 1 ) ## ----'getDataWindows', eval = .Platform$OS.type == "windows", echo = FALSE---- # ## Load data in Windows case # foo <- function() { # load(system.file("extdata", "fullCov", "fullCovAMY.RData", # package = "derfinderData" # )) # return(fullCovAMY) # } # fullCov <- foo() ## ----'regionMatrix'----------------------------------------------------------- ## Get the region matrix of Expressed Regions (ERs) regionMat <- regionMatrix(fullCov, cutoff = 30, L = 76, verbose = FALSE) ## ----'exploreRegMatRegs'------------------------------------------------------ ## regions output regionMat$chr21$regions ## ----'exploreRegMatBP'-------------------------------------------------------- ## Base-level coverage matrices for each of the regions ## Useful for plotting lapply(regionMat$chr21$bpCoverage[1:2], head, n = 2) ## Check dimensions. First region is 565 long, second one is 138 bp long. ## The columns match the number of samples (12 in this case). lapply(regionMat$chr21$bpCoverage[1:2], dim) ## ----'exploreRegMatrix'------------------------------------------------------- ## Dimensions of the coverage matrix dim(regionMat$chr21$coverageMatrix) ## Coverage for each region. This matrix can then be used with limma or other pkgs head(regionMat$chr21$coverageMatrix) ## ----'phenoData'-------------------------------------------------------------- ## Get pheno table pheno <- subset(brainspanPheno, structure_acronym == "AMY") ## ----'identifyDERsDESeq2'----------------------------------------------------- ## Identify which ERs are differentially expressed, that is, find the DERs library("DESeq2") ## Round matrix counts <- round(regionMat$chr21$coverageMatrix) ## Round matrix and specify design dse <- DESeqDataSetFromMatrix(counts, pheno, ~ group + gender) ## Perform DE analysis dse <- DESeq(dse, test = "LRT", reduced = ~gender, fitType = "local") ## Extract results mcols(regionMat$chr21$regions) <- c( mcols(regionMat$chr21$regions), results(dse) ) ## Save info in an object with a shorter name ers <- regionMat$chr21$regions ers ## ----'vennRegions', fig.cap = "Venn diagram showing ERs by annotation class."---- ## Find overlaps between regions and summarized genomic annotation annoRegs <- annotateRegions(ers, genomicState$fullGenome, verbose = FALSE) library("derfinderPlot") venn <- vennRegions(annoRegs, counts.col = "blue", main = "Venn diagram using TxDb.Hsapiens.UCSC.hg19.knownGene annotation" ) ## ----'nearestGene'------------------------------------------------------------ ## Load database of interest library("TxDb.Hsapiens.UCSC.hg19.knownGene") txdb <- keepSeqlevels(TxDb.Hsapiens.UCSC.hg19.knownGene, "chr21") ## Find nearest feature library("bumphunter") genes <- annotateTranscripts(txdb) annotation <- matchGenes(ers, subject = genes) ## Restore seqlevels txdb <- restoreSeqlevels(txdb) ## View annotation results head(annotation) ## You can use derfinderPlot::plotOverview() to visualize this information ## ----'firstfive', fig.cap = paste0("Base-pair resolution plot of differentially expressed region ", 1:5, ".")---- ## Extract the region coverage regionCov <- regionMat$chr21$bpCoverage plotRegionCoverage( regions = ers, regionCoverage = regionCov, groupInfo = pheno$group, nearestAnnotation = annotation, annotatedRegions = annoRegs, whichRegions = seq_len(5), txdb = txdb, scalefac = 1, ask = FALSE, verbose = FALSE ) ## ----createVignette, eval=FALSE----------------------------------------------- # ## Create the vignette # library("rmarkdown") # system.time(render("derfinder-quickstart.Rmd", "BiocStyle::html_document")) # # ## Extract the R code # library("knitr") # knit("derfinder-quickstart.Rmd", tangle = TRUE) ## ----createVignette2---------------------------------------------------------- ## Clean up file.remove("quickstartRef.bib") ## ----reproducibility1, echo=FALSE--------------------------------------------- ## Date the vignette was generated Sys.time() ## ----reproducibility2, echo=FALSE--------------------------------------------- ## Processing time in seconds totalTime <- diff(c(startTime, Sys.time())) round(totalTime, digits = 3) ## ----reproducibility3, echo=FALSE------------------------------------------------------------------------------------- ## Session info library("sessioninfo") options(width = 120) session_info() ## ----vignetteBiblio, results = 'asis', echo = FALSE, warning = FALSE, message = FALSE--------------------------------- ## Print bibliography bibliography()