## ----eval=F------------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # BiocManager::install("methylscaper") ## ----message = FALSE, warning=FALSE------------------------------------------- library(methylscaper) ## ----eval=F------------------------------------------------------------------- # methylscaper() ## ----eval=FALSE--------------------------------------------------------------- # filepath <- "~/Downloads/GSE109262_RAW/" # singlecell_subset <- subsetSC(filepath, chromosome = 19, startPos = 8937041, endPos = 8997041) # # To save for later, save as an rds file and change the folder location as desired: # saveRDS(singlecell_subset, "~/Downloads/singlecell_subset.rds") ## ----eval=TRUE---------------------------------------------------------------- gse_subset_path <- list( c( "https://rbacher.rc.ufl.edu/methylscaper/data/GSE109262_SUBSET/GSM2936197_ESC_A08_CpG-met_processed.tsv.gz", "https://rbacher.rc.ufl.edu/methylscaper/data/GSE109262_SUBSET/GSM2936196_ESC_A07_CpG-met_processed.tsv.gz", "https://rbacher.rc.ufl.edu/methylscaper/data/GSE109262_SUBSET/GSM2936192_ESC_A03_CpG-met_processed.tsv.gz" ), c( "https://rbacher.rc.ufl.edu/methylscaper/data/GSE109262_SUBSET/GSM2936197_ESC_A08_GpC-acc_processed.tsv.gz", "https://rbacher.rc.ufl.edu/methylscaper/data/GSE109262_SUBSET/GSM2936196_ESC_A07_GpC-acc_processed.tsv.gz", "https://rbacher.rc.ufl.edu/methylscaper/data/GSE109262_SUBSET/GSM2936192_ESC_A03_GpC-acc_processed.tsv.gz" ), c("GSM2936197_ESC_A08_CpG-met_processed", "GSM2936196_ESC_A07_CpG-met_processed", "GSM2936192_ESC_A03_CpG-met_processed"), c("GSM2936197_ESC_A08_GpC-acc_processed", "GSM2936196_ESC_A07_GpC-acc_processed", "GSM2936192_ESC_A03_GpC-acc_processed") ) # This formatting is a list of 4 objects: the met file urls, the acc file urls, the met file names, and the acc file names. options(timeout = 1000) singlecell_subset <- subsetSC(gse_subset_path, chromosome = 19, startPos = 8937041, endPos = 8997041) # To save for later, save as an rds file and change the folder location as desired: # saveRDS(singlecell_subset, "~/Downloads/singlecell_subset.rds") ## ----eval=TRUE, fig.width=7, fig.height=3------------------------------------- data("mouse_bm") gene.select <- subset(mouse_bm, mgi_symbol == "Eef1g") startPos <- 8966841 endPos <- 8967541 prepSC.out <- prepSC(singlecell_subset, startPos = startPos, endPos = endPos) orderObj <- initialOrder(prepSC.out) plotSequence(orderObj, Title = "Eef1g gene", plotFast = TRUE, drawKey = FALSE) ## ----eval=TRUE, fig.align='left', fig.width=6, fig.height=5------------------- # If you followed the preprocessing code above, then you can do: # mydata <- readRDS("~/Downloads/singlecell_subset.rds") # Otherwise, we have also included this subset in the package directly: mydata <- system.file("extdata", "singlecell_subset.rds", package = "methylscaper") mydata <- readRDS(mydata) gene <- "Eef1g" data("mouse_bm") # for human use human_bm gene.select <- subset(mouse_bm, mgi_symbol == gene) # We will further subset the region to a narrow region of the gene: from 8966841bp to 8967541bp startPos <- 8966841 endPos <- 8967541 # This subsets the data to a specific region and prepares the data for visualization: prepSC.out <- prepSC(mydata, startPos = startPos, endPos = endPos) # Next the cells are ordered using the PCA approach and plot orderObj <- initialOrder(prepSC.out) plotSequence(orderObj, Title = "Eef1g gene", plotFast = TRUE) # We plot the nucleosome size key by default, however this can be turned off via drawKey = FALSE: # plotSequence(orderObj, Title = "Eef1g gene", plotFast=TRUE, drawKey = FALSE) ## ----eval=TRUE---------------------------------------------------------------- orderObj <- initialOrder(prepSC.out, weightStart = 47, weightEnd = 358, weightFeature = "acc" ) ## ----eval=TRUE, fig.width=7, fig.height=6------------------------------------- plotSequence(orderObj, Title = "Eef1g gene", plotFast = TRUE) ## ----eval=TRUE, fig.width=7, fig.height=6------------------------------------- orderObj$order1 <- refineFunction(orderObj, refineStart = 1, refineEnd = 20) plotSequence(orderObj, Title = "Eef1g gene", plotFast = TRUE) ## ----eval=FALSE--------------------------------------------------------------- # png("~/save_my_plot.png", width = 4, height = 6, units = "in", res = 300) # plotSequence(orderObj, Title = "Eef1g gene", plotFast = FASLE) # dev.off() ## ----eval=FALSE--------------------------------------------------------------- # # if (!requireNamespace("biomaRt", quietly = TRUE)) { # # BiocManager::install("biomaRt") # # } # library(biomaRt) # ensembl <- useMart("ensembl") # # Demonstrating how to get the human annotations. # ensembl <- useDataset("hsapiens_gene_ensembl", mart = ensembl) # my_chr <- c(1:22, "M", "X", "Y") # You can choose to omit this or include additional chromosome # # We only need the start, end, and symbol: # human_bm <- getBM( # attributes = c("chromosome_name", "start_position", "end_position", "hgnc_symbol"), # filters = "chromosome_name", # values = my_chr, # mart = ensembl # ) # # ## Now that we have the biomart object, we can extract start and ends for methylscaper: # gene_select <- subset(human_bm, human_bm$hgnc_symbol == "GeneX") # # # These can then be passed into the prepSC function: # prepSC.out <- prepSC(mydata, startPos = gene_select$startPos, endPos = gene_select$endPos) # # # To continue the analysis: # # Next the cells are ordered using the PCA approach and then plot: # orderObj <- initialOrder(prepSC.out) # plotSequence(orderObj, Title = "Gene X", plotFast = TRUE) ## ----eval = TRUE-------------------------------------------------------------- # This provides the path to the raw datasets located in the methylscaper package seq_file <- system.file("extdata", "seq_file.fasta", package = "methylscaper") ref_file <- system.file("extdata", "reference.fa", package = "methylscaper") # Next we read the data into R using the read.fasta function from the seqinr package: fasta <- seqinr::read.fasta(seq_file) ref <- seqinr::read.fasta(ref_file)[[1]] # For the vignette we will only run a subset of the molecules singlemolecule_example <- runAlign(fasta = fasta, ref = ref, fasta_subset = 1:150) # Once complete, we can save the output as an RDS object # saveRDS(singlemolecule_example, file="~/methylscaper_singlemolecule_preprocessed.rds") ## ----eval=TRUE, fig.width=7, fig.height=6------------------------------------- # If skipping the preprocessing steps above, use our pre-aligned data: # data(singlemolecule_example) orderObj <- initialOrder(singlemolecule_example, Method = "PCA", weightStart = 308, weightEnd = 475, weightFeature = "red" ) plotSequence(orderObj, Title = "Ordered by PCA", plotFast = TRUE) ## ----eval=TRUE, fig.align='left', fig.height=8, fig.width=8------------------- par(mfrow = c(2, 2)) props <- methyl_proportion(orderObj, type = "met", makePlot = TRUE, main = "") props <- methyl_proportion(orderObj, type = "acc", makePlot = TRUE, main = "") pcnts <- methyl_percent_sites(orderObj, makePlot = TRUE) avgs <- methyl_average_status(orderObj, makePlot = TRUE, window_length = 25) ## ----sessionInfo, results='markup'-------------------------------------------- sessionInfo()