## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----eval=FALSE--------------------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("funOmics") ## ----eval=FALSE--------------------------------------------------------------- # if (!require("devtools", quietly = TRUE)) # install.packages("devtools") # # devtools::install_github("elisagdelope/funOmics") ## ----------------------------------------------------------------------------- library(funOmics) ## ----eval=FALSE--------------------------------------------------------------- # ?funOmics::get_kegg_sets ## ----eval=FALSE--------------------------------------------------------------- # ?funOmics::short_sets_detail ## ----------------------------------------------------------------------------- hsa_kegg_sets_entrez <- get_kegg_sets() head(hsa_kegg_sets_entrez) ## ----------------------------------------------------------------------------- hsa_kegg_sets_symbol <- get_kegg_sets(geneid_type = "symbol") hsa_kegg_sets_symbol[1] ## ----------------------------------------------------------------------------- hsa_kegg_sets_ensembl <- get_kegg_sets(geneid_type = "ensembl") hsa_kegg_sets_ensembl[1] ## ----------------------------------------------------------------------------- ecoli_kegg_sets <- get_kegg_sets(organism = "ecj") head(ecoli_kegg_sets) ## ----eval=FALSE--------------------------------------------------------------- # ?funOmics::summarize_pathway_level ## ----------------------------------------------------------------------------- library(SummarizedExperiment) library(airway) data(airway) airway ## ----------------------------------------------------------------------------- assayNames(airway) ## ----------------------------------------------------------------------------- head(assay(airway, "counts")) dim(assay(airway, "counts")) ## ----------------------------------------------------------------------------- kegg_sets <- get_kegg_sets(geneid_type = "ensembl") head(kegg_sets) ## ----------------------------------------------------------------------------- pathway_activity_pca <- summarize_pathway_level(assay(airway, "counts"), kegg_sets, type = "pca") ## ----------------------------------------------------------------------------- print(head(pathway_activity_pca)) ## ----------------------------------------------------------------------------- print(any(assay(airway, "counts")[rowSums(assay(airway, "counts")) <0, ])) X <- assay(airway, "counts")[rowSums(assay(airway, "counts")) >= 10, ] ## ----------------------------------------------------------------------------- pathway_activity_nmf <- summarize_pathway_level(X, kegg_sets, type = "nmf") # note that the NMF operation can take some minutes print(paste("Pathway activity score matrix has dimensions:", nrow(pathway_activity_nmf), ",", ncol(pathway_activity_nmf))) ## ----------------------------------------------------------------------------- head(pathway_activity_nmf) ## ----------------------------------------------------------------------------- library(MultiAssayExperiment) assays_list <- list( counts = assay(airway, "counts"), kegg_nmf_agg = pathway_activity_nmf, kegg_pca_agg = pathway_activity_pca) airwayMultiAssay <- MultiAssayExperiment(experiments=assays_list) colData(airwayMultiAssay) <- colData(airway) airwayMultiAssay ## ----------------------------------------------------------------------------- min <- 12 pathway_activity <- summarize_pathway_level(assay(airway, "counts"), kegg_sets, type = "mean", minsize = min) print(paste("Pathway activity score matrix has dimensions:", nrow(pathway_activity), ",", ncol(pathway_activity))) ## ----------------------------------------------------------------------------- print(head(pathway_activity)) ## ----------------------------------------------------------------------------- short_sets <- short_sets_detail(kegg_sets, min) print(short_sets$short_sets) ## ----------------------------------------------------------------------------- print(short_sets$short_sets_lengths) ## ----------------------------------------------------------------------------- print(short_sets$short_sets_molecules) ## ----------------------------------------------------------------------------- min <- 15 pathway_activity <- summarize_pathway_level(assay(airway, "counts"), kegg_sets, type = "sd", minsize = min) print(paste("Pathway activity score matrix has dimensions:", nrow(pathway_activity), ",", ncol(pathway_activity))) head(pathway_activity) ## ----------------------------------------------------------------------------- min <- 7 pathway_activity <- summarize_pathway_level(assay(airway, "counts"), kegg_sets, type = "median", minsize = min) print(paste("Pathway activity score matrix has dimensions:", nrow(pathway_activity), ",", ncol(pathway_activity))) head(pathway_activity) ## ----------------------------------------------------------------------------- pathway_activity <- summarize_pathway_level(assay(airway, "counts"), kegg_sets, type = "ttest", minsize = 15) print(paste("Pathway activity score matrix has dimensions:", nrow(pathway_activity), ",", ncol(pathway_activity))) ## ----------------------------------------------------------------------------- print(head(pathway_activity)) ## ----------------------------------------------------------------------------- goccdb <- "https://data.broadinstitute.org/gsea-msigdb/msigdb/release/7.5/c5.go.cc.v7.5.entrez.gmt" downdb <- sapply(readLines(goccdb), function(x) strsplit(x, "\t")[[1]]) gocc_sets <- sapply(as.matrix(downdb), function(x) x[3:length(x)]) names(gocc_sets) = sapply(as.matrix(downdb), function(x) x[1]) gocc_sets[1:3] ## ----------------------------------------------------------------------------- min <- 8 short_sets <- short_sets_detail(gocc_sets, min) print(head(short_sets$short_sets_molecules)) ## ----------------------------------------------------------------------------- # Example usage: set.seed(1) g <- 10000 s <- 20 X_expr <- matrix(abs(rnorm(g * s)), nrow = g, dimnames = list(1:g, paste0("s", 1:s))) print(paste("Dimensions of omics matrix X:", dim(X_expr)[1], "*", dim(X_expr)[2])) head(X_expr) ## ----------------------------------------------------------------------------- sd_gocc_expr <- summarize_pathway_level(X_expr, gocc_sets, type="sd", minsize=8) head(sd_gocc_expr) ## ----------------------------------------------------------------------------- sI <- sessionInfo() print(sI, locale = FALSE)