## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") library(RBedMethyl) ## ----ingest------------------------------------------------------------------- # Example bedMethyl-like content, header is dropped df <- data.frame( r1 = c("chr1", 0, 1, "m", 0, "+", 0, 1, 0, 10, 0.5, 5, 5, 0, 0, 0, 0, 0), r2 = c("chr1", 10, 11, "m", 0, "+", 10, 11, 0, 20, 0.25, 5, 15, 0, 0, 0, 0, 0), r3 = c("chr2", 0, 1, "m", 0, "-", 0, 1, 0, 8, 0.375, 3, 5, 0, 0, 0, 0, 0), r4 = c("chr2", 0, 1, "h", 0, "-", 0, 1, 0, 8, 0.375, 3, 5, 0, 0, 0, 0, 0) ) df <- t(df) colnames(df) <- c( "chrom", "chromStart", "chromEnd", "mod", "score", "strand", "thickStart", "thickEnd", "itemRgb", "coverage", "pct", "mod_reads", "unmod_reads", "other_reads", "del_reads", "fail_reads", "diff_reads", "nocall_reads" ) df bed <- tempfile(fileext = ".bed") write.table(df, bed, quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE) # Read only "m" modification rows, and only coverage,pct and mod_reads assays bm <- readBedMethyl(bed, mod = "m", fields = c("coverage", "pct", "mod_reads")) bm ## ----subset------------------------------------------------------------------- # By one or more chromosomes subsetByChromosomes(bm, "chr1") subsetByChromosomes(bm, c("chr1", "chr2")) # By region ## Can supply the chromosome, start, end subsetByRegion(bm, "chr1", 1, 12) ## Or a GRanges object regions <- GenomicRanges::GRanges( seqnames = c("chr1", "chr2"), ranges = IRanges::IRanges(start = c(1, 1), end = c(12, 2)) ) subsetByRegion(bm, regions) ## The [] bracketing index system also works bm[regions] # By minimum coverage filterByCoverage(bm, 15) # By column using a function ## The following keeps only lines where the methylation percentage is higher than 0.3 subsetBy(bm, "pct", function(x) x > 0.3) ## ----summarize---------------------------------------------------------------- regions <- GenomicRanges::GRanges( seqnames = c("chr1", "chr2"), ranges = IRanges::IRanges(start = c(1, 1), end = c(12, 2)) ) # For each region, generate summary statistics ## `mod_reads` is the total number of reads, `beta` stands for average methylation, `n_sites` is the number of sites summarizeByRegion(bm, regions) ## ----coercion----------------------------------------------------------------- # RangedSummarizedExperiment rse <- as(bm, "RangedSummarizedExperiment") rse # bsseq (if installed) if (requireNamespace("bsseq", quietly = TRUE) && methods::hasMethod("coerce", c("RBedMethyl", "BSseq"))) { bs <- as(bm, "BSseq") bs } else { message("bsseq is not installed or BSseq coercion is unavailable; skipping BSseq coercion.") } ## ----session-info------------------------------------------------------------- sessionInfo()