## ----setup, include=FALSE------------------------------------------------ knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE) ## ----installpkg,eval=FALSE,message=FALSE,warning=FALSE------------------- # source("http://www.bioconductor.org/biocLite.R") # biocLite("esATAC") ## ----loading00,message=FALSE--------------------------------------------- library(esATAC) ## ----casecontrol, eval=FALSE--------------------------------------------- # library(esATAC) # # # call pipeline # # all human motif in JASPAR will be processed # conclusion <- # atacPipe2( # # MODIFY: Change these paths to your own case files! # # e.g. fastqInput1 = "your/own/data/path.fastq" # case=list(fastqInput1 = system.file(package="esATAC", "extdata", "chr20_1.1.fq.gz"), # fastqInput2 = system.file(package="esATAC", "extdata", "chr20_2.1.fq.gz")), # # MODIFY: Change these paths to your own control files! # # e.g. fastqInput1 = "your/own/data/path.fastq" # control=list(fastqInput1 = system.file(package="esATAC", "extdata", "chr20_1.2.fq.bz2"), # fastqInput2 = system.file(package="esATAC", "extdata", "chr20_2.2.fq.bz2")), # # MODIFY: Change this path to an permanent path to be used in future! # # refdir <- "./esATAC_pipeline/refdir", # # tmpdir = "./esATAC_pipeline/intermediate_results", # # MODIFY: Set the genome for your data # genome = "hg19") # ## ----casesingle, eval=FALSE---------------------------------------------- # library(esATAC) # # # call pipeline # # for overall example(all human motif in JASPAR will be processed) # conclusion <- # atacPipe( # # MODIFY: Change these paths to your own case files! # # e.g. fastqInput1 = "your/own/data/path.fastq" # fastqInput1 = system.file(package="esATAC", "extdata", "chr20_1.1.fq.gz"), # fastqInput2 = system.file(package="esATAC", "extdata", "chr20_2.1.fq.gz"), # # MODIFY: Change this path to an permanent path to be used in future! # # refdir <- "./esATAC_pipeline/refdir", # # tmpdir = "./esATAC_pipeline/intermediate_results", # # MODIFY: Set the genome for your data # genome = "hg19") ## ----eval=FALSE---------------------------------------------------------- # # ### # library(esATAC) # conclusion2 <- # atacPipe2( # case=list(fastqInput1 = system.file(package="esATAC", "extdata", "chr20_1.1.fq.gz"), # fastqInput2 = system.file(package="esATAC", "extdata", "chr20_2.1.fq.gz")), # control=list(fastqInput1 = system.file(package="esATAC", "extdata", "chr20_1.2.fq.bz2"), # fastqInput2 = system.file(package="esATAC", "extdata", "chr20_2.2.fq.bz2")), # genome = "hg19", # motifs = getMotifInfo(motif.file = system.file("extdata", "CustomizedMotif.txt", package="esATAC"))) ## ----eval=FALSE---------------------------------------------------------- # # library(esATAC) # conclusion <- # atacPipe( # fastqInput1 = system.file(package="esATAC", "extdata", "chr20_1.1.fq.gz"), # fastqInput2 = system.file(package="esATAC", "extdata", "chr20_2.1.fq.gz"), # genome = "hg19", # motifs = getMotifInfo(motif.file = system.file("extdata", "CustomizedMotif.txt", package="esATAC"))) ## ----flowchart,message=FALSE,warning=FALSE------------------------------- library(esATAC) printMap() ## ----loadingx3,eval=TRUE,echo=FALSE,message=FALSE------------------------ library(esATAC) ## ----querymannual0,message=FALSE,warning=FALSE--------------------------- ?SamToBed ## ----loadingx4,eval=TRUE,echo=FALSE,message=FALSE------------------------ library(esATAC) ## ----querymannual,message=FALSE,warning=FALSE---------------------------- ?atacSamToBed ## ----loadingx5,eval=TRUE,echo=FALSE,message=FALSE------------------------ library(esATAC) ## ----querymannual1,message=FALSE,warning=FALSE--------------------------- ?samToBed ## ----loading0,message=FALSE---------------------------------------------- library(esATAC) ## ----loading1,message=FALSE---------------------------------------------- options(java.parameters = "-Xmx8000m") ## ----loadingpkg,message=FALSE-------------------------------------------- library(magrittr) library(BSgenome.Hsapiens.UCSC.hg19) library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(org.Hs.eg.db) library(R.utils) ## ----loadingx6,eval=TRUE,echo=FALSE,message=FALSE------------------------ library(esATAC) ## ----loadref, eval=TRUE-------------------------------------------------- # we use temp directiory "td" here # Change it to your directiory because the intermediate file may be huge td<-tempdir() options(atacConf=setConfigure("tmpdir",td)) options(atacConf=setConfigure("threads",8)) ## ----loadingx7,eval=TRUE,echo=FALSE,message=FALSE------------------------ library(esATAC) ## ----config, eval=FALSE-------------------------------------------------- # options(atacConf=setConfigure("refdir","path/to/refdatafolder")) # options(atacConf=setConfigure("genome","hg19")) ## ----warning = FALSE,eval=FALSE, results = 'hide', message = FALSE------- # library(esATAC) # library(magrittr) # # # dir.create("./esATAC_pipeline") # dir.create("./esATAC_pipeline/refdir") # dir.create("./esATAC_pipeline/result") # # #configure reference path, result path, the max number of threads, genome # options(atacConf=setConfigure("refdir","esATAC_pipeline/refdir")) # options(atacConf=setConfigure("tmpdir","esATAC_pipeline/result")) # options(atacConf=setConfigure("threads",2)) # options(atacConf=setConfigure("genome","hg19")) # # #raw reads # fastqInput1 = system.file(package="esATAC", "extdata", "chr20_1.1.fq.gz") # fastqInput2 = system.file(package="esATAC", "extdata", "chr20_2.1.fq.gz") # # #pipeline # atacUnzipAndMerge(fastqInput1 = fastqInput1,fastqInput2 = fastqInput2) %T>% # atacQCReport %>% # atacRenamer %>% # atacRemoveAdapter %>% # atacBowtie2Mapping %T>% # atacLibComplexQC %>% # atacSamToBed(maxFragLen = 2000) %T>% # atacBedToBigWig %T>% # atacFragLenDistr %>% # atacBedUtils(maxFragLen = 100, chrFilterList = NULL) %>% # atacPeakCalling ## ----warning = FALSE,eval=FALSE, results = 'hide', message = FALSE------- # dir.create("./esATAC_pipeline") # dir.create("./esATAC_pipeline/refdir") # dir.create("./esATAC_pipeline/result") # # #configure reference path, result path, the max number of threads, genome # options(atacConf=setConfigure("refdir","esATAC_pipeline/refdir")) # options(atacConf=setConfigure("tmpdir","esATAC_pipeline/result")) # options(atacConf=setConfigure("threads",2)) # options(atacConf=setConfigure("genome","hg19")) ## ----warning = FALSE,eval=FALSE, results = 'hide', message = FALSE------- # #raw reads # fastqInput1 = system.file(package="esATAC", "extdata", "chr20_1.1.fq.gz") # fastqInput2 = system.file(package="esATAC", "extdata", "chr20_2.1.fq.gz") # # #pipeline # atacUnzipAndMerge(fastqInput1 = fastqInput1,fastqInput2 = fastqInput2) %T>% # atacQCReport %>% # atacRenamer %>% # atacRemoveAdapter %>% # atacBowtie2Mapping %T>% # atacLibComplexQC %>% # atacSamToBed(maxFragLen = 2000) %T>% # atacBedToBigWig %T>% # atacFragLenDistr %>% # atacBedUtils(maxFragLen = 100, chrFilterList = NULL) %>% # atacPeakCalling ## ----loadingx8,eval=TRUE,echo=FALSE,message=FALSE------------------------ library(esATAC) ## ----Preprocess, warning = FALSE, results = 'hide', message = FALSE------ # Identify adapters prefix<-system.file(package="esATAC", "extdata", "uzmg") (reads_1 <-file.path(prefix,"m1",dir(file.path(prefix,"m1")))) (reads_2 <-file.path(prefix,"m2",dir(file.path(prefix,"m2")))) reads_merged_1 <- file.path(td,"reads1.fastq") reads_merged_2 <- file.path(td,"reads2.fastq") atacproc <- atacUnzipAndMerge(fastqInput1 = reads_1,fastqInput2 = reads_2) %>% atacRenamer %>% atacRemoveAdapter ## ----adrmhelp,results='hide'--------------------------------------------- library(Rbowtie2) adapterremoval_usage() ## ----loadingx9,eval=TRUE,echo=FALSE,message=FALSE------------------------ library(esATAC) ## ----Mapping, warning = FALSE, results = 'hide', message = FALSE--------- ## Building a bowtie2 index library("Rbowtie2") refs <- dir(system.file(package="esATAC", "extdata", "bt2","refs"), full=TRUE) bowtie2_build(references=refs, bt2Index=file.path(td, "lambda_virus"), "--threads 4 --quiet",overwrite=TRUE) ## Alignments reads_1 <- system.file(package="esATAC", "extdata", "bt2", "reads", "reads_1.fastq") reads_2 <- system.file(package="esATAC", "extdata", "bt2", "reads", "reads_2.fastq") if(file.exists(file.path(td, "lambda_virus.1.bt2"))){ (bowtie2Mapping(bt2Idx = file.path(td, "lambda_virus"), samOutput = file.path(td, "result.sam"), fastqInput1=reads_1,fastqInput2=reads_2,threads=3)) head(readLines(file.path(td, "result.sam"))) } ## ----bt2help,results='hide'---------------------------------------------- library(Rbowtie2) bowtie2_usage() ## ----loadingx10,eval=TRUE,echo=FALSE,message=FALSE----------------------- library(esATAC) ## ----Convert, warning = FALSE, results = 'hide', message = FALSE--------- sambzfile <- system.file(package="esATAC", "extdata", "Example.sam.bz2") samfile <- file.path(td,"Example.sam") bunzip2(sambzfile,destname=samfile,overwrite=TRUE,remove=FALSE) samToBed(samInput = samfile) ## ----loadingx11,eval=TRUE,echo=FALSE,message=FALSE----------------------- library(esATAC) ## ----FiltCalling, warning = FALSE, results = 'hide', message = FALSE----- bedbzfile <- system.file(package="esATAC", "extdata", "chr20.50000.bed.bz2") bedfile <- file.path(td,"chr20.50000.bed") bunzip2(bedbzfile,destname=bedfile,overwrite=TRUE,remove=FALSE) bedUtils(bedInput = bedfile,maxFragLen = 100, chrFilterList = NULL) %>% atacPeakCalling ## ----eval=TRUE,echo=FALSE,message=FALSE---------------------------------- library(esATAC) ## ----echo = TRUE, results = 'hide', message = FALSE---------------------- ## extract example peak file from package "esATAC" p1bz <- system.file("extdata", "Example_peak1.bed.bz2", package="esATAC") peak1_path <- as.vector(bunzip2(filename = p1bz, destname = file.path(getwd(), "Example_peak1.bed"), ext="bz2", FUN=bzfile, overwrite=TRUE, remove = FALSE)) ## run peakanno to annotate peaks AnnoInfo <- peakanno(peakInput = peak1_path, TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene, annoDb = "org.Hs.eg.db") ## ----eval=TRUE,echo=FALSE,message=FALSE---------------------------------- library(esATAC) ## ----eval = TRUE, echo = FALSE, warning = FALSE, results = 'hide', message = FALSE---- library(ChIPseeker) ## ----eval = TRUE, echo = FALSE, warning = FALSE, results = 'hide', message = FALSE---- peakanno <- getReportVal(AnnoInfo, "annoOutput.rds") plotAnnoPie(x = peakanno) ## ----eval=TRUE,echo=FALSE,message=FALSE---------------------------------- library(esATAC) ## ----eval = TRUE, echo = FALSE, warning = FALSE-------------------------- peakanno <- as.data.frame(peakanno) colnames(peakanno)[1] <- "chromatin" peakanno <- subset(peakanno, select=c("chromatin", "start", "end", "annotation", "geneStart", "geneEnd", "geneId", "distanceToTSS", "SYMBOL")) knitr::kable(peakanno[1:5, ]) ## ----eval=TRUE,echo=FALSE,message=FALSE---------------------------------- library(esATAC) ## ----eval = TRUE, echo = TRUE, results = 'hide', message = FALSE--------- ## extract gene ID geneId <- as.character(sample(seq(10000), 100)) ## do GO analysis goAna <- goanalysis(gene = geneId, OrgDb = "org.Hs.eg.db", keytype = "ENTREZID", ont = "MF") ## ----eval=TRUE,echo=FALSE,message=FALSE---------------------------------- library(esATAC) ## ----eval = TRUE, echo = FALSE, warning = FALSE-------------------------- go_path <- getReportVal(goAna, "goOutput") go_data <- read.table(file = go_path, header = TRUE, sep = "\t") go_data <- subset(go_data, select = c("ID", "Description", "GeneRatio", "pvalue", "qvalue")) knitr::kable(go_data[1:5, ]) ## ----eval=TRUE,echo=FALSE,message=FALSE---------------------------------- library(esATAC) ## ----eval = FALSE, echo = TRUE, warning = FALSE, results = 'hide', message = FALSE---- # sample.path <- system.file("extdata", "chr20_sample_peak.bed.bz2", package="esATAC") # sample.path <- as.vector(bunzip2(filename = sample.path, # destname = file.path(getwd(), "chr20_sample_peak.bed"), # ext="bz2", FUN=bzfile, overwrite=TRUE, remove = FALSE)) # # motif <- readRDS(system.file("extdata", "MotifPFM.rds", package="esATAC")) # # motif.data <- motifscan(peak = sample.path, genome = BSgenome.Hsapiens.UCSC.hg19, motifs = motif) ## ----eval=TRUE,echo=FALSE,message=FALSE---------------------------------- library(esATAC) ## ----eval = TRUE, echo = FALSE, warning = FALSE-------------------------- CTCF.df <- data.frame(chromatin = c("chr20", "chr20", "chr20", "chr20"), start = c("189774", "239773", "247783", "281074"), end = c("189792", "239791", "247801", "281092"), strand = c("+", "+", "-", "-"), score = c("0.8794931", "0.9003697", "0.9337214", "0.8511201"), sequence = c("ACTCCTCTAGAGGGTGCTC", "TTGCCACTGGGGGGAGACA", "CTGCCGGCAGATGGCGGTA", "TTGCCTGCAGGGGTGGGAA")) knitr::kable(CTCF.df) ## ----eval=TRUE,echo=FALSE,message=FALSE---------------------------------- library(esATAC) ## ----eval = FALSE, echo = TRUE, warning = FALSE, results = 'hide', message = FALSE---- # ## extract cut site position from bed file # fra_path <- system.file("extdata", "chr20.50000.bed.bz2", package="esATAC") # frag <- as.vector(bunzip2(filename = fra_path, # destname = file.path(getwd(), "chr20.50000.bed"), # ext="bz2", FUN=bzfile, overwrite=TRUE, remove = FALSE)) # cs.data <- extractcutsite(bedInput = frag, prefix = "ATAC") ## ----eval=TRUE,echo=FALSE,message=FALSE---------------------------------- library(esATAC) ## ----eval=FALSE, echo=TRUE, warning=FALSE, results='hide', message=FALSE---- # fp <- atacCutSiteCount(atacProcCutSite = cs.data, atacProcMotifScan = motif.data) ## ----eval=TRUE,echo=FALSE,message=FALSE---------------------------------- library(esATAC) ## ----printmap, warning=FALSE, message=FALSE------------------------------ bedbzfile <- system.file(package="esATAC", "extdata", "chr20.50000.bed.bz2") bedfile <- file.path(td,"chr20.50000.bed") bunzip2(bedbzfile,destname=bedfile,overwrite=TRUE,remove=FALSE) peakproc <-bedUtils(bedInput = bedfile,maxFragLen = 100, chrFilterList = NULL) %>% atacPeakCalling peakproc %>% printMap ## ----eval=TRUE,echo=FALSE,message=FALSE---------------------------------- library(esATAC) ## ----qpara, warning=FALSE, message=FALSE--------------------------------- #query all of available parameters getParamItems(peakproc) ## ----loadingx19,eval=TRUE,echo=FALSE,message=FALSE----------------------- library(esATAC) ## ----getpara, warning=FALSE, message=FALSE------------------------------- #query a parameter value getParam(peakproc,"fragmentSize") ## ----eval=TRUE,echo=FALSE,message=FALSE---------------------------------- library(esATAC) ## ----getReport, warning=FALSE, results='hide', message=FALSE------------- sambzfile <- system.file(package="esATAC", "extdata", "Example.sam.bz2") samfile <- file.path(td,"Example.sam") bunzip2(sambzfile,destname=samfile,overwrite=TRUE,remove=FALSE) samToBedProc<-samToBed(samInput = samfile) ## ----loadingx21,eval=TRUE,echo=FALSE,message=FALSE----------------------- library(esATAC) ## ----getReport1,message=FALSE-------------------------------------------- getReportItems(samToBedProc) ## ----eval=TRUE,echo=FALSE,message=FALSE---------------------------------- library(esATAC) ## ----getReport2,message=FALSE, results = 'hide'-------------------------- #query a parameter value getReportVal(samToBedProc,"report") ## ----loadingx2,eval=TRUE,echo=FALSE,message=FALSE------------------------ library(esATAC) ## ----clearcache, warning=FALSE, results='hide', message=FALSE------------ clearProcCache(peakproc) process(peakproc) ## ----session------------------------------------------------------------- sessionInfo()