## ----echo = FALSE, message=FALSE---------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, # comment = "#>", tidy = FALSE, cache = FALSE, results = 'markup' ) ## ----eval=FALSE--------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("SingleMoleculeFootprinting") ## ----eval=FALSE--------------------------------------------------------------- # clObj <- makeCluster(40) # prj <- QuasR::qAlign( # sampleFile = sampleFile, # genome = "BSgenome.Mmusculus.UCSC.mm10", # aligner = "Rbowtie", # projectName = "prj", # paired = "fr", # bisulfite = "undir", # alignmentParameter = "-e 70 -X 1000 -k 2 --best -strata", # alignmentsDir = "./", # cacheDir = tempdir(), # clObj = clObj # ) # stopCluster(clObj) ## ----setup, message=FALSE----------------------------------------------------- suppressWarnings(library(SingleMoleculeFootprinting)) suppressWarnings(library(SingleMoleculeFootprintingData)) suppressWarnings(library(BSgenome.Mmusculus.UCSC.mm10)) suppressWarnings(library(parallel)) suppressWarnings(library(ggplot2)) ## ----include=FALSE------------------------------------------------------------ # Under the hood: # download and cache SingleMoleculeFootprintingData data # locate bam file and write QuasR sampleFile # Download and cache CachedBamPath = SingleMoleculeFootprintingData::NRF1pair.bam()[[1]] CachedBaiPath = SingleMoleculeFootprintingData::NRF1pair.bam.bai()[[1]] SingleMoleculeFootprintingData::AllCs.rds() SingleMoleculeFootprintingData::EnrichmentRegions_mm10.rds() SingleMoleculeFootprintingData::ReferenceMethylation.rds() # Create copy of bam and bai files with relevant names CacheDir <- ExperimentHub::getExperimentHubOption(arg = "CACHE") file.copy(from = CachedBamPath, to = paste0(CacheDir, "/", "NRF1pair.bam")) file.copy(from = CachedBaiPath, to = paste0(CacheDir, "/", "NRF1pair.bam.bai")) # Write QuasR input data.frame( FileName = paste0(CacheDir, "/", "NRF1pair.bam"), SampleName = "NRF1pair_DE_" ) -> df readr::write_delim(df, paste0(CacheDir, "/NRF1Pair_sampleFile.txt"), delim = "\t") ## ----------------------------------------------------------------------------- sampleFile = paste0(CacheDir, "/NRF1Pair_sampleFile.txt") knitr::kable(suppressMessages(readr::read_delim(sampleFile, delim = "\t"))) ## ----eval=FALSE--------------------------------------------------------------- # BaitRegions <- SingleMoleculeFootprintingData::EnrichmentRegions_mm10.rds() # clObj = makeCluster(4) # BaitCapture( # sampleFile = sampleFile, # genome = BSgenome.Mmusculus.UCSC.mm10, # baits = BaitRegions, # clObj = clObj # ) -> BaitCaptureEfficiency # stopCluster(clObj) ## ----eval=FALSE--------------------------------------------------------------- # ConversionRate( # sampleFile = sampleFile, # genome = BSgenome.Mmusculus.UCSC.mm10, # chr = 19 # ) -> ConversionRateValue ## ----echo=FALSE, eval=FALSE--------------------------------------------------- # sampleFile = "/g/krebs/barzaghi/HTS/SMF/MM/QuasR_input_files/QuasR_input_tmp2.txt" # samples = unique(readr::read_delim(sampleFile, "\t")$SampleName) ## ----eval=FALSE--------------------------------------------------------------- # RegionOfInterest = GRanges(BSgenome.Mmusculus.UCSC.mm10@seqinfo["chr19"]) # # CallContextMethylation( # sampleFile = sampleFile, # samples = samples, # genome = BSgenome.Mmusculus.UCSC.mm10, # RegionOfInterest = RegionOfInterest, # coverage = 20, # ConvRate.thr = NULL, # returnSM = FALSE, # clObj = NULL, # verbose = FALSE # ) -> Methylation # # Methylation %>% # elementMetadata() %>% # as.data.frame() %>% # dplyr::select(grep("_MethRate", colnames(.))) -> MethylationRate_matrix # # png("../inst/extdata/MethRateCorr_AllCs.png", units = "cm", res = 100, width = 20, height = 20) # pairs( # MethylationRate_matrix, # upper.panel = panel.cor, # diag.panel = panel.hist, # lower.panel = panel.jet, # labels = gsub("SMF_MM_|DE_|_MethRate", "", colnames(MethylationRate_matrix)) # ) # dev.off() ## ----------------------------------------------------------------------------- knitr::include_graphics(system.file("extdata", "MethRateCorr_AllCs.png", package="SingleMoleculeFootprinting")) ## ----eval=FALSE--------------------------------------------------------------- # Methylation %>% # elementMetadata() %>% # as.data.frame() %>% # filter(GenomicContext %in% c("DGCHN", "GCH")) %>% # dplyr::select(grep("_MethRate", colnames(.))) -> MethylationRate_matrix_GCH # # png("../inst/extdata/MethRateCorr_GCHs.png", units = "cm", res = 100, width = 20, height = 20) # pairs(MethylationRate_matrix_GCH, upper.panel = panel.cor, diag.panel = panel.hist, lower.panel = panel.jet, labels = colnames(MethylationRate_matrix_GCH)) # dev.off() # # Methylation %>% # elementMetadata() %>% # as.data.frame() %>% # filter(GenomicContext %in% c("NWCGW", "HCG")) %>% # dplyr::select(grep("_MethRate", colnames(.))) -> MethylationRate_matrix_HCG # # png("../inst/extdata/MethRateCorr_HCGs.png", units = "cm", res = 100, width = 20, height = 20) # pairs(MethylationRate_matrix_HCG, upper.panel = panel.cor, diag.panel = panel.hist, lower.panel = panel.jet, labels = colnames(MethylationRate_matrix_HCG)) # dev.off() ## ----------------------------------------------------------------------------- knitr::include_graphics(system.file("extdata", "MethRateCorr_GCHs.png", package="SingleMoleculeFootprinting")) knitr::include_graphics(system.file("extdata", "MethRateCorr_HCGs.png", package="SingleMoleculeFootprinting")) ## ----echo=FALSE, eval=FALSE--------------------------------------------------- # sampleFile_LowCoverage = "/g/krebs/barzaghi/HTS/SMF/MM/QuasR_input_files/QuasR_input_tmp.txt" # samples_LowCoverage = grep( # "_NP_NO_R.*_MiSeq", # unique(readr::read_delim(sampleFile_LowCoverage, "\t")$SampleName), value = TRUE) # # sampleFile_HighCoverage_reference = "/g/krebs/barzaghi/HTS/SMF/MM/QuasR_input_files/QuasR_input_tmp.txt" # samples_HighCoverage_reference = grep( # "SMF_MM_TKO_as_NO_R_NextSeq", # unique(readr::read_delim(sampleFile_HighCoverage_reference, "\t")$SampleName), value = TRUE) ## ----eval=FALSE--------------------------------------------------------------- # RegionOfInterest = GRanges(BSgenome.Mmusculus.UCSC.mm10@seqinfo["chr19"]) # # CallContextMethylation( # sampleFile = sampleFile_LowCoverage, # samples = samples_LowCoverage, # genome = BSgenome.Mmusculus.UCSC.mm10, # RegionOfInterest = RegionOfInterest, # coverage = 1, # ConvRate.thr = NULL, # returnSM = FALSE, # clObj = NULL, # verbose = FALSE # )$DGCHN -> LowCoverageMethylation # # CallContextMethylation( # sampleFile = sampleFile_HighCoverage_reference, # samples = samples_HighCoverage_reference, # genome = BSgenome.Mmusculus.UCSC.mm10, # RegionOfInterest = RegionOfInterest, # coverage = 20, # ConvRate.thr = NULL, # returnSM = FALSE, # clObj = NULL, # verbose = FALSE # )$DGCHN -> HighCoverageMethylation # # CompositeMethylationCorrelation( # LowCoverage = LowCoverageMethylation, # LowCoverage_samples = samples_LowCoverage, # HighCoverage = HighCoverageMethylation, # HighCoverage_samples = samples_HighCoverage_reference, # bins = 50, # returnDF = TRUE, # returnPlot = TRUE, # RMSE = TRUE, # return_RMSE_DF = TRUE, # return_RMSE_plot = TRUE # ) -> results ## ----------------------------------------------------------------------------- results <- qs::qread(system.file("extdata", "low_coverage_methylation_correlation.qs", package="SingleMoleculeFootprinting")) results$MethylationDistribution_plot + scale_color_manual( values = c("#00BFC4", "#00BFC4", "#00BFC4", "#F8766D", "#F8766D"), breaks = c("SMF_MM_TKO_as_NO_R_NextSeq", "SMF_MM_NP_NO_R1_MiSeq", "SMF_MM_NP_NO_R2_MiSeq", "SMF_MM_NP_NO_R3_MiSeq", "SMF_MM_NP_NO_R4_MiSeq")) ## ----------------------------------------------------------------------------- results$RMSE_plot + geom_bar(aes(fill = Sample), stat = "identity") + scale_fill_manual( values = c("#00BFC4", "#00BFC4", "#00BFC4", "#F8766D", "#F8766D"), breaks = c("SMF_MM_TKO_as_NO_R_NextSeq", "SMF_MM_NP_NO_R1_MiSeq", "SMF_MM_NP_NO_R2_MiSeq", "SMF_MM_NP_NO_R3_MiSeq", "SMF_MM_NP_NO_R4_MiSeq")) ## ----echo=FALSE--------------------------------------------------------------- knitr::kable(data.frame(ExperimentType = c("Single enzyme SMF", "Double enzyme SMF", "No enzyme (endogenous mCpG only)"), substring = c("\\_NO_", "\\_DE_", "\\_SS_"), RelevanContext = c("DGCHN & NWCGW", "GCH + HCG", "CG"), Notes = c("Methylation info is reported separately for each context", "Methylation information is aggregated across the contexts", "-"))) ## ----------------------------------------------------------------------------- samples <- suppressMessages(unique(readr::read_delim(sampleFile, delim = "\t")$SampleName)) RegionOfInterest <- GRanges("chr6", IRanges(88106000, 88106500)) CallContextMethylation( sampleFile = sampleFile, samples = samples, genome = BSgenome.Mmusculus.UCSC.mm10, RegionOfInterest = RegionOfInterest, coverage = 20, returnSM = FALSE, ConvRate.thr = NULL, verbose = TRUE, clObj = NULL # N.b.: when returnSM = TRUE, clObj should be set to NULL ) -> Methylation ## ----------------------------------------------------------------------------- head(Methylation) ## ----------------------------------------------------------------------------- CallContextMethylation( sampleFile = sampleFile, samples = samples, genome = BSgenome.Mmusculus.UCSC.mm10, RegionOfInterest = RegionOfInterest, coverage = 20, returnSM = TRUE, ConvRate.thr = NULL, verbose = FALSE, clObj = NULL # N.b.: when returnSM = TRUE, clObj should be set to NULL ) -> Methylation Methylation[[2]]$NRF1pair_DE_[1:10,20:30] ## ----------------------------------------------------------------------------- PlotAvgSMF( MethGR = Methylation[[1]], RegionOfInterest = RegionOfInterest ) ## ----------------------------------------------------------------------------- PlotAvgSMF( MethGR = Methylation[[1]], RegionOfInterest = RegionOfInterest, ShowContext = TRUE ) ## ----------------------------------------------------------------------------- TFBSs = qs::qread(system.file("extdata", "TFBSs_1.qs", package="SingleMoleculeFootprinting")) TFBSs ## ----------------------------------------------------------------------------- PlotAvgSMF( MethGR = Methylation[[1]], RegionOfInterest = RegionOfInterest, TFBSs = plyranges::filter_by_overlaps(TFBSs, RegionOfInterest) ) ## ----------------------------------------------------------------------------- PlotAvgSMF( MethGR = Methylation[[1]], RegionOfInterest = RegionOfInterest, ) -> smf_plot user_annotation = data.frame(xmin = 88106300, xmax = 88106500, label = "nucleosome") smf_plot + geom_line(linewidth = 1.5) + geom_point(size = 3) + geom_rect(data = user_annotation, aes(xmin=xmin, xmax=xmax, ymin=-0.09, ymax=-0.04), inherit.aes = FALSE) + ggrepel::geom_text_repel(data = user_annotation, aes(x=xmin, y=-0.02, label=label), inherit.aes = FALSE) + scale_y_continuous(breaks = c(0,1), limits = c(-.25,1)) + scale_x_continuous(breaks = c(start(RegionOfInterest),end(RegionOfInterest)), limits = c(start(RegionOfInterest),end(RegionOfInterest))) + theme_bw() ## ----echo=FALSE--------------------------------------------------------------- n = 1000 readsSubset <- sample(seq(nrow(Methylation[[2]]$NRF1pair_DE_)), n) Methylation[[2]]$NRF1pair_DE_ <- Methylation[[2]]$NRF1pair_DE_[readsSubset,] ## ----------------------------------------------------------------------------- PlotSM( MethSM = Methylation[[2]], RegionOfInterest = RegionOfInterest, sorting.strategy = "None" ) ## ----------------------------------------------------------------------------- PlotSM( MethSM = Methylation[[2]], RegionOfInterest = RegionOfInterest, sorting.strategy = "hierarchical.clustering" ) ## ----------------------------------------------------------------------------- SNPs = qs::qread(system.file("extdata", "SNPs_1.qs", package="SingleMoleculeFootprinting")) SNPs ## ----------------------------------------------------------------------------- Methylation = qs::qread(system.file("extdata", "Methylation_1.qs", package="SingleMoleculeFootprinting")) TFBSs = qs::qread(system.file("extdata", "TFBSs_2.qs", package="SingleMoleculeFootprinting")) RegionOfInterest = GRanges("chr16", IRanges(8470511,8471010)) PlotAvgSMF( MethGR = Methylation, RegionOfInterest = RegionOfInterest, TFBSs = TFBSs, SNPs = SNPs ) ## ----------------------------------------------------------------------------- Methylation = qs::qread(system.file("extdata", "Methylation_2.qs", package="SingleMoleculeFootprinting")) TFBSs = qs::qread(system.file("extdata", "TFBSs_1.qs", package="SingleMoleculeFootprinting")) SNPs = qs::qread(system.file("extdata", "SNPs_2.qs", package="SingleMoleculeFootprinting")) RegionOfInterest = GRanges("chr6", IRanges(88106000, 88106500)) PlotAvgSMF( MethGR = Methylation, RegionOfInterest = RegionOfInterest, TFBSs = TFBSs, SNPs = SNPs ) + geom_vline(xintercept = start(SNPs[5]), linetype = 2, color = "grey") ## ----------------------------------------------------------------------------- CytosinesToMask = qs::qread(system.file("extdata", "cytosines_to_mask.qs", package="SingleMoleculeFootprinting")) CytosinesToMask ## ----------------------------------------------------------------------------- MaskSNPs( Methylation = Methylation, CytosinesToMask = CytosinesToMask, MaskSMmat = FALSE, SampleStringMatch = list(Cast = "_CTKO", Spret = "_STKO"), Experiment = "DE" ) -> Methylation_masked PlotAvgSMF( MethGR = Methylation_masked, RegionOfInterest = RegionOfInterest, TFBSs = TFBSs, SNPs = SNPs ) + geom_vline(xintercept = start(SNPs[5]), linetype = 2, color = "grey") ## ----echo=FALSE, eval=FALSE--------------------------------------------------- # sampleFile = "/g/krebs/barzaghi/HTS/SMF/MM/QuasR_input_files/QuasR_input_AllCanWGpooled_dprm_DE_only.txt" # samples = suppressMessages(unique(readr::read_delim(sampleFile, "\t")$SampleName)) # plyranges::filter(TFBSs, TF=="REST" & isBound) -> REST_motifs # # clObj = parallel::makeCluster(16) # motif.counts = QuasR::qCount(GetQuasRprj(ChIP_data_dictionary[["Rest"]], genome = BSgenome.Mmusculus.UCSC.mm10), query = IRanges::resize(REST_motifs, 200, "center"), clObj = clObj) # parallel::stopCluster(clObj) # # motif.counts %>% # data.frame() %>% # rownames_to_column("TF.name") %>% # arrange(desc(Rest)) %>% # head(500) %>% # dplyr::select(TF.name) -> top_rest # # TopMotifs = TFBSs[top_rest$TF.name] # elementMetadata(TopMotifs) = NULL # TopMotifs$TF = "REST" # # qs::qsave(TopMotifs, "../inst/extdata/Top_bound_REST.qs") ## ----eval=FALSE--------------------------------------------------------------- # TopMotifs = qs::qread(system.file("extdata", "Top_bound_REST.qs", package="SingleMoleculeFootprinting")) # # CollectCompositeData( # sampleFile = sampleFile, # samples = samples, # genome = BSgenome.Mmusculus.UCSC.mm10, # TFBSs = TopMotifs, # window = 1000, # coverage = 20, # ConvRate.thr = NULL, # cores = 16 # ) -> CompositeData # # png("../inst/extdata/rest_composite.png", units = "cm", res = 100, width = 24, height = 16) # CompositePlot(CompositeData = CompositeData, span = 0.1, TF = "Rest") # dev.off() ## ----------------------------------------------------------------------------- knitr::include_graphics(system.file("extdata", "rest_composite.png", package="SingleMoleculeFootprinting")) ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()