## ---- echo=FALSE, results="hide", warning=FALSE-------------------------- suppressPackageStartupMessages({ library(NADfinder) library(BSgenome.Mmusculus.UCSC.mm10) library(rtracklayer) }) knitr::opts_chunk$set(warning=FALSE, message=FALSE, eval=TRUE) ## ----quickStart, fig.width=6, fig.height=4------------------------------- ## load the library library(NADfinder) library(SummarizedExperiment) ## bam file path path <- "path/to/your/bam/files" f <- c(genome="genome.bam", nucleosome="nucleosome.bam") ## window size for tile of genome. Here we set it to a big window size, ## ie., 50k because of the following two reasons: ## 1. peaks of NADs are wide; ## 2. data will be smoothed better with bigger window size. ws <- 50000 ## step for tile of genome step <- 1000 ## Set the background. ## 0.25 means 25% of the lower ratios will be used for background training. backgroundPercentage <- 0.25 ## Count the reads for each window with a given step in the genome. ## The output will be a GRanges object. library(BSgenome.Mmusculus.UCSC.mm10) ## ----eval=FALSE---------------------------------------------------------- # se <- tileCount(reads=file.path(path, f), # genome=Mmusculus, # windowSize=ws, # step=step) ## ------------------------------------------------------------------------ data(single.count) se <- single.count ## ------------------------------------------------------------------------ ## Calculate ratios for peak calling. We use nucleosome vs genomic DNA. dat <- log2se(se, nucleosomeCols = "nucleosome.bam", genomeCols="genome.bam") ## Smooth the ratios for each chromosome. ## We found that for each chromosome, the ratios are higher in 5'end than 3'end. datList <- smoothRatiosByChromosome(dat, N=100) ## check the difference between the cumulative percentage tag allocation ## in genome and nucleosome samples. cumulativePercentage(datList[["chr18"]]) ## ------------------------------------------------------------------------ ## check the results of smooth function chr18 <- datList[["chr18"]] ## we only have reads in chr18 in test data. chr18subset <- subset(chr18, seq.int(floor(length(chr18)/10))*10) chr18 <- assays(chr18subset) ylim <- range(c(chr18$ratio[, 1], chr18$bcRatio[, 1], chr18$smoothedRatio[, 1])) plot(chr18$ratio[, 1], ylim=ylim*c(.9, 1.1), type="l", main="chr18") abline(h=0, col="cyan", lty=2) points(chr18$bcRatio[, 1], type="l", col="green") points(chr18$smoothedRatio[, 1], type="l", col="red") legend("topright", legend = c("raw_ratio", "background_corrected", "smoothed"), fill = c("black", "green", "red")) ## call peaks for each chromosome peaks <- lapply(datList, trimPeaks, backgroundPercentage=backgroundPercentage, cutoffPvalue=0.05, countFilter=1000) ## plot the peaks in "chr18" peaks.subset <- countOverlaps(rowRanges(chr18subset), peaks$chr18)>=1 peaks.run <- rle(peaks.subset) run <- cumsum(peaks.run$lengths) run <- data.frame(x0=c(1, run[-length(run)]), x1=run) run <- run[peaks.run$values, , drop=FALSE] rect(xleft = run$x0, ybottom = ylim[2]*.75, xright = run$x1, ytop = ylim[2]*.8, col = "black") ## convert list to a GRanges object peaks.gr <- unlist(GRangesList(peaks)) ## ------------------------------------------------------------------------ ## output the peaks write.csv(as.data.frame(unname(peaks.gr)), "peaklist.csv", row.names=FALSE) ## export peaks to a bed file. library(rtracklayer) export(peaks.gr, "peaklist.bed", "BED") ## ------------------------------------------------------------------------ library(NADfinder) ## bam file path path <- "path/to/your/bam/files" f <- c("G26.bam", "G28.bam", "G29.bam", "N26.bam", "N28.bam", "N29.bam") ws <- 50000 step <- 1000 library(BSgenome.Mmusculus.UCSC.mm10) ## ----eval=FALSE---------------------------------------------------------- # se <- tileCount(reads=file.path(path, f), # genome=Mmusculus, # windowSize=ws, # step=step) ## ------------------------------------------------------------------------ data(triplicates.counts) se <- triplicates.counts ## Calculate ratios for nucleoli vs genomic sample. gps <- c("26", "28", "29") se <- log2se(se, nucleosomeCols = paste0("N", gps, ".bam"), genomeCols = paste0("G", gps, ".bam")) getCorrelations(se, "chr18") seList<- smoothRatiosByChromosome(se, chr="chr18") cumulativePercentage(seList[["chr18"]]) peaks <- lapply(seList, callPeaks, cutoffAdjPvalue=0.05, countFilter=1000) peaks <- unlist(GRangesList(peaks)) peaks ## ----fig.height=1.5------------------------------------------------------ ideo <- readRDS(system.file("extdata", "ideo.mm10.rds", package = "NADfinder")) plotSig(ideo=ideo, grList=GRangesList(peaks), mcolName="AveSig", layout=list("chr18"), parameterList=list(types="heatmap")) ## ----sessionInfo--------------------------------------------------------- sessionInfo()