## ----environment setup, include = FALSE--------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", prompt = FALSE ) library(dplyr) library(knitr) library(kableExtra) ## ----install , eval=FALSE----------------------------------------------------- # if (!require("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # # BiocManager::install("DNEA") ## ----Quick Start, eval=TRUE, echo = TRUE-------------------------------------- #Load packages library(DNEA) library(BiocParallel) #load the example data data("TEDDY") data("T1Dmeta") aminos <- c("alanine", "arginine", "asparagine", "aspartic_acid", "cysteine", "glutamine", "glutamic_acid", "glycine", "histidine", "isoleucine", "leucine", "lysine", "methionine", "phenylalanine", "proline", "serine", "threonine", "tryptophan", "tyrosine", "valine") keep <- rownames(TEDDY) %in% aminos TEDDYreduced <- TEDDY[keep,] #initiate BiocParallel BP_plan <- MulticoreParam(workers = 2, RNGseed = 417, progressbar = FALSE) #re-order the metadata to match the sample order of expression_data T1Dmeta <- T1Dmeta[colnames(TEDDYreduced),] #save the group column to be used as group_labels group_labels <- T1Dmeta$group #name each element for its corresponding sample names(group_labels) <- rownames(T1Dmeta) #convert to factor group_labels <- factor(group_labels, levels = c("DM:control", "DM:case")) #Run DNEA TEDDYdat <- createDNEAobject(project_name = "TEDDYmetabolomics", expression_data = TEDDYreduced, group_labels = group_labels) TEDDYdat <- BICtune(object = TEDDYdat, informed = TRUE, interval = 1e-3, BPPARAM = BP_plan) TEDDYdat <- stabilitySelection(object = TEDDYdat, subSample = TRUE, nreps = 1000, BPPARAM = BP_plan, BPOPTIONS=bpoptions(tasks = 8)) TEDDYdat <- getNetworks(object = TEDDYdat, aprox = TRUE, informed = TRUE, interval = 1e-3, BPPARAM = BP_plan) TEDDYdat <- clusterNet(object = TEDDYdat, tau = 0.5, verbose = FALSE) TEDDYdat <- runNetGSA(TEDDYdat) ## ----load example data, eval=1:5, echo=TRUE----------------------------------- #first load DNEA into R library(DNEA) #load the example data data("TEDDY") ## ----TEDDY examine echo, eval=FALSE, echo=TRUE-------------------------------- # TEDDY[1:10, 1:4] ## ----TEDDY examine, echo=FALSE------------------------------------------------ TEDDY_table <- knitr::kable(TEDDY[1:10, 1:4]) kable_paper(TEDDY_table, "striped", position = "center", full_width = FALSE, bootstrap_options = "condensed") #remove table rm(TEDDY_table) ## ----load example metadata, eval=TRUE----------------------------------------- #load T1Dmeta data("T1Dmeta") unique(T1Dmeta$group) ## ----T1Dmeta echo, eval=FALSE, echo=TRUE-------------------------------------- # #View the metadata # T1Dmeta[1:10, c(1,5,6,7)] ## ----T1Dmeta examine, echo=FALSE---------------------------------------------- T1Dmeta_table <- knitr::kable(T1Dmeta[1:10, c(1,5,6,7)]) kable_paper(T1Dmeta_table, "striped", position = "center", full_width = FALSE, bootstrap_options = "condensed") #remove table rm(T1Dmeta_table) ## ----create group_labels, eval=TRUE------------------------------------------- #re-order the metadata to match the sample order of expression_data T1Dmeta <- T1Dmeta[colnames(TEDDY),] #save the group column to be used as group_labels group_labels <- T1Dmeta$group #name each element for its corresponding sample names(group_labels) <- rownames(T1Dmeta) #convert to factor group_labels <- factor(group_labels, levels = c("DM:control", "DM:case")) ## ----start DNEA, eval=TRUE---------------------------------------------------- #initiate DNEA object TEDDYdat <- createDNEAobject(project_name = "TEDDYmetabolomics", expression_data = TEDDY, group_labels = group_labels) ## ----correlation heatmap, eval=TRUE, message=FALSE---------------------------- #load the pheatmap and Hmisc packages library(pheatmap) library(Hmisc) #grab the DM:case data from the DNEA object expr_dat <- expressionData(TEDDYdat, assay = "log_scaled_data")[["DM:case"]] #create a pearson correlation matrix - data should be transposed first so features are in columns cor_dat <- Hmisc::rcorr(t(expr_dat), type = "pearson")$r #cluster the correlations and reorder correlation matrix to better visualize dd <- as.dist((1-cor_dat)/2) hc <- hclust(dd) cor_dat <-cor_dat[hc$order, hc$order] #create pheatmap pheatmap(cor_dat, cluster_rows = FALSE,cluster_cols = FALSE, legend = TRUE,annotation_legend = FALSE, labels_row = '',labels_col = '', main = 'Feature correlations in DM:case group' ) ## ----DNEA summary, eval=TRUE, echo=TRUE--------------------------------------- #show summary of DNEA object TEDDYdat ## ----node list examine echo, eval=FALSE, echo=TRUE---------------------------- # #access node list # nodeList(TEDDYdat)[1:5,] ## ----node list examine, echo=FALSE-------------------------------------------- nodelist_table <- knitr::kable(nodeList(TEDDYdat)[1:5,]) kable_paper(nodelist_table, "striped", position = "center", full_width = FALSE, bootstrap_options = "condensed") #remove table rm(nodelist_table) ## ----setup input for aggregateFeatures, eval=TRUE----------------------------- #save metabolite names metab_names <- rownames(expressionData(TEDDYdat, assay = "input_data")) #create feature_group data.frame TEDDY_groups <- data.frame(features = metab_names, groups = metab_names, row.names = metab_names) #create labels TEDDY_groups$groups[TEDDY_groups$groups %in% c("isoleucine", "leucine", "valine")] <- "BCAAs" TEDDY_groups$groups[grepl("acid", TEDDY_groups$groups)] <- "fatty_acids" ## ----TEDDY groups echo, eval=FALSE, echo=TRUE--------------------------------- # #take a look at the group labels # TEDDY_groups[1:10, ] ## ----node collapse groups, echo=FALSE----------------------------------------- netGSA_table <- knitr::kable(TEDDY_groups[1:10, ]) kable_paper(netGSA_table, "striped", position = "center", full_width = FALSE, bootstrap_options = "condensed") ## ----run aggregateFeatures, eval=TRUE----------------------------------------- #perform feature collapsing collapsed_TEDDY <- aggregateFeatures(object = TEDDYdat, method = "hybrid", correlation_threshold = 0.7, feature_groups = TEDDY_groups) collapsed_TEDDY ## ----addExpressionData, eval=TRUE--------------------------------------------- #log-transform and transpose the TEDDY data TEDDY <- t(log(TEDDY)) #make sure metadata and expression data are in same order T1Dmeta <- T1Dmeta[rownames(TEDDY),] dat <- list('DM:control' = TEDDY[T1Dmeta$group == "DM:control",], 'DM:case' = TEDDY[T1Dmeta$group == "DM:case",]) #log-transform and median center the expression data without scaling newdat <- list() for(cond in seq(length(dat))){ group_dat <- dat[[cond]] for(i in seq(1, ncol(group_dat))){ metab_median = median(group_dat[, i], na.rm = TRUE) metab_range = range(group_dat[, i], na.rm = TRUE) scale_factor = max(abs(metab_range - metab_median)) group_dat[, i] <- (group_dat[, i] - metab_median) / scale_factor rm(metab_median, metab_range, scale_factor) } group_dat <- group_dat[rownames(dat[[cond]]),colnames(dat[[cond]])] group_dat <- t(group_dat) newdat <- append(newdat, list(group_dat)) rm(i, group_dat) } #add names names(newdat) <- names(dat) #add data TEDDYdatCustomInput <- addExpressionData(object = TEDDYdat, dat = newdat, assay_name="median_scaled_data") ## ----BiocParallel, eval=TRUE-------------------------------------------------- #load in BiocParallel library(BiocParallel) #create parallel sockets BPPARAM <- BiocParallel::SnowParam(workers = 2, RNGseed = 417, progressbar = TRUE) ## ----load processed data, include=FALSE--------------------------------------- #load processed results since 500 reps would take ~28 min data("dnw") TEDDYdat <- dnw rm(dnw) ## ----BICtune, eval = FALSE, echo = TRUE, strip.white=TRUE--------------------- # # #optimize lambda # TEDDYdat <- BICtune(TEDDYdat, # informed = TRUE, # interval = 1e-3, # BPPARAM = BPPARAM, # BPOPTIONS = bpoptions(progressbar = FALSE)) ## ----BICtune message, eval=TRUE, echo=FALSE, strip.white=TRUE----------------- message("Optimizing the lambda hyperparameter using Bayesian-Information ", "Criterion outlined in Guo et al. (2011)") message("A Link to this reference can be found in the function documentation ", "by running ?BICtune() in the console") message("The log_scaled_data expression data will be used for analysis.\n") message("Estimating optimal c constant range for asymptotic lambda...\n") message("Fine-tuning Lambda...\n") message("The optimal Lambda hyper-parameter has been set to: 0.05072355!") ## ----BIC plot, eval=TRUE------------------------------------------------------ #load ggplot2 library(ggplot2) #create data frame of values BICtuneData <- rbind(data.frame(lambda = unlist(lambdas2Test(TEDDYdat)), value = vapply(BICscores(TEDDYdat), function(x) x$BIC, double(1)), score = rep("BIC", length(lambdas2Test(TEDDYdat)))), data.frame(lambda = unlist(lambdas2Test(TEDDYdat)), value = vapply(BICscores(TEDDYdat), function(x) x$likelihood, double(1)), score = rep("likelihood", length(lambdas2Test(TEDDYdat))))) #create plot ggplot(data = BICtuneData, aes(x = lambda, y = value)) + geom_line(aes(linetype = score)) + geom_point(aes(shape = score)) + geom_vline(xintercept = optimizedLambda(TEDDYdat), color = "red") + ggtitle("BIC and Likelihood Scores with Respect to Lambda") ## ----stabsel show, eval = FALSE, echo = TRUE, strip.white=TRUE---------------- # #perform stability selection # TEDDYdat <- stabilitySelection(TEDDYdat, # subSample = TRUE, # nreps = 1000, # BPPARAM = BPPARAM, # BPOPTIONS = bpoptions(progressbar=FALSE)) ## ----stabsel message, eval=TRUE, echo=FALSE, strip.white=TRUE----------------- message('The lambda value stored in the DNEA will be used for analysis (this can be ', 'accessed via the optimizedLambda() function') message("Using Lambda hyper-parameter: ", optimizedLambda(TEDDYdat), "!\n", "stabilitySelection will be performed with 1000 replicates!") message("The log_scaled_data expression data will be used for analysis.\n") message("Additional sub-sampling will be performed on uneven groups\n") message("Calculating selection probabilities WITH subsampling for...DM:case...\n") message("Calculating selection probabilities WITH subsampling for...DM:control...") ## ----getNetworks, eval = TRUE------------------------------------------------- #jointly estimate the biological networks TEDDYdat <- getNetworks(TEDDYdat, aprox = TRUE) ## ----edgeList, eval=FALSE, echo=TRUE------------------------------------------ # #save edge list to new object # edge_list <- edgeList(TEDDYdat) # # #access the edge list # edge_list[1:10,] ## ----edge table, echo=FALSE--------------------------------------------------- #save edge list to new object edge_list <- edgeList(TEDDYdat) #create kable edge_table <- knitr::kable(edge_list[1:10,]) kable_paper(edge_table, "striped", position = "center", full_width = FALSE, bootstrap_options = "condensed") #clean up space rm(edge_table) ## ----filterNetworks, eval=FALSE, echo=TRUE------------------------------------ # #filter networks based on an absolute threshold of pcor = 0.166 # TEDDYdat_filtered <- filterNetworks(TEDDYdat, pcor = 0.166) ## ----consensus cluster, eval = TRUE, warning=FALSE---------------------------- #set the seed set.seed(417) #perform consensus clustering TEDDYdat <- clusterNet(TEDDYdat, tau = 0.5, max_iterations = 5, verbose = FALSE) ## ----CCsummary show, eval = FALSE--------------------------------------------- # #view subnetwork summary # CCsummary(TEDDYdat) ## ----CCsummary kable, echo=FALSE---------------------------------------------- clust_table <- knitr::kable(CCsummary(TEDDYdat)) kable_paper(clust_table, "striped", position = "center", full_width = FALSE, bootstrap_options = "condensed") ## ----runNetGSA, eval = 1:2, echo=TRUE----------------------------------------- #perform pathway enrichment using netgsa TEDDYdat <- runNetGSA(TEDDYdat, min_size = 5) ## ----netGSAresults show, eval = FALSE----------------------------------------- # #access netGSA results # netGSAresults(TEDDYdat) ## ----netGSAresults kable, echo=FALSE------------------------------------------ netGSA_table <- knitr::kable(netGSAresults(TEDDYdat)) kable_paper(netGSA_table, "striped", position = "center", full_width = FALSE, bootstrap_options = "condensed") ## ----plotNetworks biological networks----------------------------------------- #names of our experimental conditions networkGroups(TEDDYdat) #create side by side plots par(mfrow = c(1,3)) #plot networks plotNetworks(TEDDYdat, type = "group_networks", subtype = "DM:control", main = "DM:control Network") plotNetworks(TEDDYdat, type = "group_networks", subtype = "All", main = "Joint Network") plotNetworks(TEDDYdat, type = "group_networks", subtype = "DM:case", main = "DM:case Network") ## ----plotNetworks sub networks, message=FALSE--------------------------------- #load igraph library(igraph) #create side by side plots par(mfrow = c(1,2)) #plot subnetworks plotNetworks(TEDDYdat, type = "sub_networks", subtype = 1, layout_func = layout_in_circle, main = "Sub Network 1") plotNetworks(TEDDYdat, type = "sub_networks", subtype = 2, layout_func = layout_in_circle, main = "Sub Network 2") ## ----getNetworkFiles, eval=FALSE---------------------------------------------- # #save files for cytoscape # getNetworkFiles(TEDDYdat) ## ----session info, eval=TRUE-------------------------------------------------- sessionInfo()