## ---- echo=FALSE, results="hide", warning=FALSE, message=FALSE----------- suppressPackageStartupMessages({ library(ATACseqQC) library(ChIPpeakAnno) library(BSgenome.Hsapiens.UCSC.hg19) library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(phastCons100way.UCSC.hg19) library(MotifDb) }) knitr::opts_chunk$set(warning=FALSE, message=FALSE) ## ------------------------------------------------------------------------ ## load the library library(ATACseqQC) ## input is bamFile bamfile <- system.file("extdata", "GL1.bam", package="ATACseqQC", mustWork=TRUE) bamfile.labels <- gsub(".bam", "", basename(bamfile)) ## ------------------------------------------------------------------------ ## generate fragement size distribution fragSize <- fragSizeDist(bamfile, bamfile.labels) ## ------------------------------------------------------------------------ ## bamfile tags tags <- c("AS", "XN", "XM", "XO", "XG", "NM", "MD", "YS", "YT") ## files will be output into outPath outPath <- "splited" dir.create(outPath) ## shift the bam file by the 5'ends library(BSgenome.Hsapiens.UCSC.hg19) seqlev <- "chr1" ## subsample data for quick run which <- as(seqinfo(Hsapiens)[seqlev], "GRanges") gal <- readBamFile(bamfile, tag=tags, which=which, asMates=TRUE) gal1 <- shiftGAlignmentsList(gal) shiftedBamfile <- file.path(outPath, "shifted.bam") export(gal1, shiftedBamfile) ## ------------------------------------------------------------------------ library(TxDb.Hsapiens.UCSC.hg19.knownGene) txs <- transcripts(TxDb.Hsapiens.UCSC.hg19.knownGene) library(phastCons100way.UCSC.hg19) ## run program for chromosome 1 only txs <- txs[seqnames(txs) %in% "chr1"] genome <- Hsapiens ## split the reads into NucleosomeFree, mononucleosome, ## dinucleosome and trinucleosome. objs <- splitGAlignmentsByCut(gal1, txs=txs, genome=genome, conservation=phastCons100way.UCSC.hg19) ## ------------------------------------------------------------------------ null <- writeListOfGAlignments(objs, outPath) ## list the files generated by splitBam. dir(outPath) ## ----eval=FALSE---------------------------------------------------------- # objs <- splitBam(bamfile, tags=tags, outPath=outPath, # txs=txs, genome=genome, # conservation=phastCons100way.UCSC.hg19) ## ----fig.height=4, fig.width=4------------------------------------------- library(ChIPpeakAnno) bamfiles <- file.path(outPath, c("NucleosomeFree.bam", "mononucleosome.bam", "dinucleosome.bam", "trinucleosome.bam")) ## Plot the cumulative percentage tag allocation in NucleosomeFree ## and mononucleosome bams. cumulativePercentage(bamfiles[1:2], as(seqinfo(Hsapiens)["chr1"], "GRanges")) ## ----fig.height=8, fig.width=4------------------------------------------- TSS <- promoters(txs, upstream=0, downstream=1) TSS <- unique(TSS) ## estimate the library size for normalization (librarySize <- estLibSize(bamfiles)) ## calculate the signals around TSSs. NTILE <- 101 dws <- ups <- 1010 sigs <- enrichedFragments(bamfiles, TSS=TSS, librarySize=librarySize, seqlev=seqlev, TSS.filter=0.5, n.tile = NTILE, upstream = ups, downstream = dws) ## log2 transformed signals names(sigs) <- gsub(".bam", "", basename(names(sigs))) sigs.log2 <- lapply(sigs, function(.ele) log2(.ele+1)) #plot heatmap featureAlignedHeatmap(sigs.log2, reCenterPeaks(TSS, width=ups+dws), zeroAt=.5, n.tile=NTILE) ## ----fig.show="hide"----------------------------------------------------- ## get signals normalized for nucleosome-free and nucleosome-bound regions. out <- featureAlignedDistribution(sigs, reCenterPeaks(TSS, width=ups+dws), zeroAt=.5, n.tile=NTILE, type="l") ## ------------------------------------------------------------------------ ## rescale the nucleosome-free and nucleosome signals to 0~1 range01 <- function(x){(x-min(x))/(max(x)-min(x))} out <- apply(out, 2, range01) matplot(out, type="l", xaxt="n", xlab="Position (bp)", ylab="Fraction of signal") axis(1, at=seq(0, 100, by=10)+1, labels=c("-1K", seq(-800, 800, by=200), "1K"), las=3) abline(v=seq(0, 100, by=10)+1, lty=2, col="gray") ## ------------------------------------------------------------------------ ## foot prints library(MotifDb) CTCF <- query(MotifDb, c("CTCF")) CTCF <- as.list(CTCF) print(CTCF[[1]], digits=2) factorFootprints(shiftedBamfile, pfm=CTCF[[1]], genome=genome, min.score="95%", seqlev=seqlev, upstream=100, downstream=100) ## ----sessionInfo--------------------------------------------------------- sessionInfo()