## ----Define function, eval=FALSE----------------------------------------- # correlationMethExprs <- function(multiset, # meth_set_name = NULL, exprs_set_name = NULL, # vars_meth = NULL, vars_exprs = NULL, # vars_meth_types = rep(NA, length(vars_meth)), # vars_exprs_types = rep(NA, length(vars_exprs)), # sel_cpgs, flank = 250000, num_cores = 1, verbose = TRUE){ ## ----Checking mds, eval=FALSE-------------------------------------------- # # [Check 1] Is multiset a MultiDataSet object? # if (!is(multiset, "MultiDataSet")){ # stop("multiset must be a MultiDataSet") # } # # # [Check 2A] Are meth_set_name and exprs_set_name characters? # if (!(is.character(meth_set_name) | is.null(meth_set_name))){ # stop("meth_set_name must be a character.") # } # if (!(is.character(exprs_set_name) | is.null(exprs_set_name))){ # stop("exprs_set_name must be a character.") # } # # # Add the dataset type to the names # meth_set_name <- paste(c("methylation", meth_set_name), collapse = "+") # exprs_set_name <- paste(c("expression", exprs_set_name), collapse = "+") # # # # [Check 2B] Has multiset the right sets? # if (!all(c(meth_set_name, exprs_set_name) %in% names(multiset))){ # stop("multiset must contain meth_set_name and exprs_set_name.") # } # # if (!all(c(meth_set_name, exprs_set_name) %in% rowRangesElements(multiset))){ # stop("multiset must contain meth_set_name and exprs_set_name.") # } # # # [Check 3] Is flank numeric, positive and has only one number? # if (!is(flank, "numeric") || length(flank) > 1 || flank < 0){ # stop("flank must be a positive integer") # } ## ----Common samples, eval=FALSE------------------------------------------ # # Select only our datasets # multiset <- multiset[ , c(meth_set_name, exprs_set_name)] # # ## Select common samples # multiset <- commonSamples(multiset) ## ----Get sets, eval=FALSE------------------------------------------------ # # Obtain the original objects # mset <- multiset[[meth_set_name]] # eset <- multiset[[exprs_set_name]] ## ----Filter CpGs, eval = FALSE------------------------------------------- # if (!missing(sel_cpgs)){ # if (!is.character(sel_cpgs) | length(sel_cpgs) == 0){ # stop("sel_cpgs must be a character vector with the name of the CpGs to be used.") # } else{ # mset <- mset[sel_cpgs, ] # } # } ## ----Get GRanges, eval = FALSE------------------------------------------- # # Get GRanges # rangesMeth <- rowRanges(multiset)[[meth_set_name]] # rangesMeth <- rangesMeth[featureNames(mset)] # rangesExprs <- rowRanges(multiset)[[exprs_set_name]] ## ----Get pairs, eval = FALSE--------------------------------------------- # start(rangesMeth) <- start(rangesMeth) - flank # end(rangesMeth) <- end(rangesMeth) + flank # pairs <- GenomicRanges::findOverlaps(rangesExprs, rangesMeth, type = "within") # pairs <- data.frame(cpg = rownames(mset)[S4Vectors::subjectHits(pairs)], # exprs = rownames(eset)[S4Vectors::queryHits(pairs)], # stringsAsFactors = FALSE) # # # If there are no pairs, do not continue # if (nrow(pairs) == 0){ # warning("There are no expression probes in the range of the cpgs. An empty data.frame will be returned.") # return(data.frame(cpg = character(0), exprs = character(0), Beta = integer(0), # se = integer(0), P.Value = integer(0), adj.P.val = integer(0))) # } # ## ----Get residues, eval = FALSE------------------------------------------ # # Computing residuals # methres <- setResidues(mset, vars_names = vars_meth, vars_types = vars_meth_types) # exprsres <- setResidues(eset, vars_names = vars_exprs, vars_types = vars_exprs_types) ## ----Get association, eval = FALSE--------------------------------------- # ## We define the function to be used in the mclapply # residualsCorr <- function (methy_res, exprs_res){ # fit <- lm(exprs_res ~ methy_res) # return(c(summary(fit)$coef[2, 1], summary(fit)$coef[2,2], summary(fit)$coef[2,4])) # } # # # use mclapply to speed up computation # regvals <- mclapply(1:nrow(pairs), # function(x) residualsCorr(methres[pairs[x, 1], ], exprsres[pairs[x, 2], ]), # mc.cores = num_cores) ## ----Formating results, eval = FALSE------------------------------------- # res <- data.frame(pairs, t(data.frame(regvals))) # colnames(res) <- c("cpg", "exprs", "Beta", "se", "P.Value") # res$adj.P.Val <- p.adjust(res$P.Value, "BH") # res <- res[order(res$adj.P.Val), ] # rownames(res) <- NULL # res ## ----Function definition------------------------------------------------- correlationMethExprs <- function(multiset, meth_set_name = NULL, exprs_set_name = NULL, vars_meth = NULL, vars_exprs = NULL, vars_meth_types = rep(NA, length(vars_meth)), vars_exprs_types = rep(NA, length(vars_exprs)), sel_cpgs, flank = 250000, num_cores = 1, verbose = TRUE){ ###################################################################################### ## Data Checking # Check object is a MultiDataSet if (!is(multiset, "MultiDataSet")){ stop("multiset must be a MultiDataSet") } # Check meth_set_name and exprs_set_name are characters if (!(is.character(meth_set_name) | is.null(meth_set_name))){ stop("meth_set_name must be a character.") } if (!(is.character(exprs_set_name) | is.null(exprs_set_name))){ stop("exprs_set_name must be a character.") } # Add the dataset type to the names meth_set_name <- paste(c("methylation", meth_set_name), collapse = "+") exprs_set_name <- paste(c("expression", exprs_set_name), collapse = "+") # Check our object has the right sets if (!all(c(meth_set_name, exprs_set_name) %in% names(multiset))){ stop("multiset must contain meth_set_name and exprs_set_name.") } if (!all(c(meth_set_name, exprs_set_name) %in% rowRangesElements(multiset))){ stop("multiset must contain meth_set_name and exprs_set_name.") } # Check that flank is numeric, positive and is only one number if (!is(flank, "numeric") || length(flank) > 1 || flank < 0){ stop("flank must be a positive integer") } ##################################################################################### ##################################################################################### ## Preparation of data ############################# ## Preparation of data 1 # Select only our sets multiset <- multiset[, c(meth_set_name, exprs_set_name)] ## Select common samples multiset <- commonSamples(multiset) ############################# ## Preparation of data 2 mset <- multiset[[meth_set_name]] eset <- multiset[[exprs_set_name]] ############################# ## Preparation of data 3 if (!missing(sel_cpgs)){ if (!is.character(sel_cpgs) | length(sel_cpgs) == 0){ stop("sel_cpgs must be a character vector with the name of the CpGs to be used.") } else{ mset <- mset[sel_cpgs, ] } } ############################# ## Preparation of data 4 # Compute Methylation-Expression pairs rangesMeth <- rowRanges(multiset)[[meth_set_name]] rangesMeth <- rangesMeth[featureNames(mset)] rangesExprs <- rowRanges(multiset)[[exprs_set_name]] ##################################################################################### ##################################################################################### ## Implementation of the algorithm ############################# ## Implementation of the algorithm 1 start(rangesMeth) <- start(rangesMeth) - flank end(rangesMeth) <- end(rangesMeth) + flank pairs <- GenomicRanges::findOverlaps(rangesExprs, rangesMeth, type = "within") pairs <- data.frame(cpg = rownames(mset)[S4Vectors::subjectHits(pairs)], exprs = rownames(eset)[S4Vectors::queryHits(pairs)], stringsAsFactors = FALSE) if (nrow(pairs) == 0){ warning("There are no expression probes in the range of the cpgs. An empty data.frame will be returned.") return(data.frame(cpg = character(0), exprs = character(0), Beta = integer(0), se = integer(0), P.Value = integer(0), adj.P.val = integer(0))) } ############################# ## Filter sets to only features in the pairs mset <- mset[unique(pairs[ , 1]), ] eset <- eset[unique(pairs[ , 2]), ] if (verbose){ message("Computing residuals") } ############################# ## Implementation of the algorithm 2 # Computing residuals methres <- setResidues(mset, vars_names = vars_meth, vars_types = vars_meth_types) exprsres <- setResidues(eset, vars_names = vars_exprs, vars_types = vars_exprs_types) ############################# if (verbose){ message("Computing correlation Methylation-Expression") } ############################# ## Implementation of the algorithm 3 residualsCorr <- function (methy_res, exprs_res){ fit <- lm(exprs_res ~ methy_res) return(c(summary(fit)$coef[2, 1], summary(fit)$coef[2,2], summary(fit)$coef[2,4])) } regvals <- mclapply(1:nrow(pairs), function(x) residualsCorr(methres[pairs[x, 1], ], exprsres[pairs[x, 2], ]), mc.cores = num_cores) ##################################################################################### ##################################################################################### ## Foprmatting the results res <- data.frame(pairs, t(data.frame(regvals))) colnames(res) <- c("cpg", "exprs", "Beta", "se", "P.Value") res$adj.P.Val <- p.adjust(res$P.Value, "BH") res <- res[order(res$adj.P.Val), ] rownames(res) <- NULL res ##################################################################################### } ## Function to remove covariables effect of methylation and expression data setResidues <- function(set, vars_names, vars_types){ if (length(vars_names) != 0){ if (ncol(pData(set)) == 0 | nrow(pData(set)) == 0){ warning("set has no phenotypes. No residues will be computed") } else if (!sum(vars_names %in% colnames(pData(set)))){ if (is(set, "ExpressionSet")){ warning("vars_exprs is/are not a valid column of the eset phenoData. No residues will be computed") } else{ warning("vars_meth is/are not a valid column of the mset phenoData. No residues will be computed") } }else{ pData(set) <- preparePhenotype(phenotypes = pData(set), variable_names = vars_names, variable_types = vars_types) model <- createModel(data = pData(set)) if (is(set, "ExpressionSet")){ vals <- exprs(set) } else if (is(set, "RatioSet")){ vals <- minfi::getBeta(set) } else{ vals <- betas(set) ## Convert methylation to M values prior fitting the linear model vals <- minfi::logit2(vals) } res <- residuals(limma::lmFit(vals, model), vals) if (is(set, "MethylationSet")){ res <- minfi::ilogit2(res) } return(res) } } if (is(set, "ExpressionSet")){ return(exprs(set)) } else if (is(set, "RatioSet")){ return(minfi::getBeta(set)) } else{ return(betas(set)) } } ## ----Loading libraries, message=FALSE, warning=FALSE--------------------- library(MultiDataSet) library(GenomicRanges) library(MEALData) library(limma) library(parallel) library(minfi) ## ----Prepare MultiDataSet, warning=FALSE, message = FALSE---------------- multi <- createMultiDataSet() multi <- add_methy(multi, mset) multi <- add_genexp(multi, eset) ## ----Running correlation, message = FALSE-------------------------------- res <- correlationMethExprs(multi, sel_cpgs = c("cg17504453", "cg25946965", "cg07938442")) res