## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) knitr::opts_chunk$set(echo = TRUE) knitr::opts_chunk$set(eval = TRUE) #knitr::opts_chunk$set(dev = 'png') #knitr::opts_chunk$set(dpi=100) ## ----setup2, echo=FALSE,include=FALSE----------------------------------------- library(plotly) library(ggplot2) library(foreach) library(doParallel) # library(devtools) # install_github('koyelucd/betaclust',force = TRUE) # library(betaclust) ## ----package, include=TRUE, echo=TRUE, message=FALSE,warning=FALSE------------ library(betaclust) ## ----data,include=TRUE, echo=TRUE--------------------------------------------- data(pca.methylation.data) head(pca.methylation.data) ## ----thresholds,include=TRUE, echo=TRUE--------------------------------------- M <- 3 ## No. of methylation states in a DNA sample N <- 4 ## No. of patients R <- 1 ## No. of DNA samples my.seed <- 190 ## set seed for reproducibility threshold_out <- betaclust(pca.methylation.data[,2:5], M, N, R, model_names = c("K..","KN."), model_selection = "BIC", parallel_process = FALSE, seed = my.seed) ## ----output0,include=TRUE, echo=TRUE------------------------------------------ summary(threshold_out) ## ----wrapperoutput3,include=TRUE, echo=TRUE,fig.width = 4, fig.height = 4,dev = 'png'---- plot(threshold_out, what = "information criterion", plot_type = "ggplot") ## ----output1,include=TRUE, echo=TRUE------------------------------------------ threshold_points <- threshold_out$optimal_model_results$thresholds threshold_points$threholds ## ----output2,include=TRUE, echo=TRUE,fig.width = 5, fig.height = 4,dev = 'png'---- plot(threshold_out, what = "fitted density", threshold = TRUE, data = pca.methylation.data[,2:5], patient_number = 1, plot_type = "ggplot") ## ----output3,include=TRUE, echo=TRUE,fig.width = 5, fig.height = 4,dev = 'png',warning=FALSE---- plot(threshold_out, what = "kernel density", threshold = TRUE, data = pca.methylation.data[,2:5], plot_type = "ggplot") ## ----output4,include=TRUE, echo=TRUE,fig.width = 6, fig.height = 5, dev = 'png'---- plot(threshold_out, what = "uncertainty") ## ----dmc,include=TRUE, echo=TRUE---------------------------------------------- M <- 3 ## No. of methylation states in a DNA sample N <- 4 ## No. of patients R <- 2 ## No. of DNA samples my.seed <- 190 ## set seed for reproducibility dmc_output <- betaclust(pca.methylation.data[,2:9], M, N, R, model_names = "K.R", parallel_process = FALSE, seed = my.seed) ## ----dmcoutput4b, include=TRUE, echo=TRUE------------------------------------- print(dmc_output$optimal_model_results$DM$AUC) print(dmc_output$optimal_model_results$DM$WD) ## ----dmcoutput1,include=TRUE, echo=TRUE--------------------------------------- summary(dmc_output) ## ----dmcoutput2,include=TRUE, echo=TRUE,fig.width = 6.5, fig.height = 5,dev = 'png'---- plot(dmc_output, what = "fitted density", plot_type = "ggplot", data = pca.methylation.data[,2:9], sample_name = c("Benign","Tumour")) ## ----dmcoutput3,include=TRUE, echo=TRUE,fig.width = 6.5, fig.height = 5,dev = 'png'---- plot(dmc_output, what = "kernel density", plot_type = "ggplot", data = pca.methylation.data[,2:9]) ## ----dmcoutput4,include=TRUE, echo=TRUE,fig.width = 6, fig.height = 5, dev = 'png'---- plot(dmc_output, what = "uncertainty", plot_type = "ggplot") ## ----dmcoutput6,include=TRUE, echo=TRUE,fig.width = 6, fig.height = 5, dev = 'png'---- ## the dataframe containing methylation data for the CpG sites identified as the most differentially methylated ones dmc_df <- DMC_identification(dmc_output, data = pca.methylation.data[,2:9], pca.methylation.data[,1], threshold = 0.06, metric="WD") ## ----dmcoutput5,include=TRUE, echo=TRUE,fig.width = 6, fig.height = 5,dev = 'png'---- ##read the legacy data data(legacy.data) head(legacy.data) ## create empty dataframes and matrices to store the DMCs related to the RARB genes ecdf_rarb <- data.frame() cpg_rarb <- data.frame(matrix(NA, nrow = 0, ncol = 6)) colnames(cpg_rarb) <- colnames(legacy.data) ## split the UCSC_RefGene_name column to read the gene name related to that CpG site ## select the CpG sites related to the RARB genes a <- 1 for(i in 1:nrow(legacy.data)) { str_vec = unlist(strsplit(as.character(legacy.data[i,"UCSC_RefGene_Name"]),"[;]")) if(length(str_vec) != 0) { if(str_vec[[1]] == "RARB") { cpg_rarb[a,] <- legacy.data[i,] a <- a+1 } } } ## Read the DMCs related to the RARB genes ecdf_rarb <- dmc_df[dmc_df$IlmnID %in% cpg_rarb$IlmnID,] ## Plot the ecdf of the selected DMCs ecdf.betaclust(ecdf_rarb[,2:9], R = 2, sample_name = c("Benign","Tumour"))