## ----------------------------------------------------------------------------- set.seed(003) library(singIST) ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- gse <- msigdb::getMsigdb(org = "hs", id = c("SYM", "EZID"), version = msigdb::getMsigdbVersions()[1]) ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- # Pathway definition (curated gene set identifier) dc_pathway <- create_pathway(standard_name = "BIOCARTA_DC_PATHWAY", dbsource = "BIOCARTA", collection = "c2", subcollection = "CP") # Superpathway = pathway + modeled cell types + per cell type gene sets dc_superpathway <- create_superpathway(pathway_info = dc_pathway, celltypes = c("Dendritic Cells", "Langerhan Cells", "T-cell"), list()) # Assign gene sets per cell type (retrieve gene set from MSigDB) dc_superpathway <- setGeneSetsCelltype(dc_superpathway, gse = gse) ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- # Pathway definition (curated gene set identifier) cytokine_pathway <- create_pathway( standard_name = "KEGG_CYTOKINE_CYTOKINE_RECEPTOR_INTERACTION", dbsource = "KEGG", collection = "c2", subcollection = "CP" ) # Superpathway = pathway + modeled cell types + per cell type gene sets cytokine_superpathway <- create_superpathway( pathway_info = cytokine_pathway, celltypes = c("Dendritic Cells", "Keratinocytes", "Langerhan Cells", "Melanocytes", "T-cell"), list() ) # Pull the curated gene set (retrieve gene set from MSigDB) gene_set <- pullGeneSet(cytokine_pathway, gse = gse) # Provide a different gene set per cell type cytokine_superpathway$gene_sets_celltype <- list( "Dendritic Cells" = sample(gene_set, 50), "Keratinocytes" = sample(gene_set, 100), "Langerhan Cells" = sample(gene_set, 10), "Melanocytes" = sample(gene_set, 60), "T-cell" = sample(gene_set, 160) ) ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- # Load the pseudobulk matrix file_dc <- system.file("extdata", "DC_counts.rda", package = "singIST") obj_dc <- load(file_dc) counts_dc <- get(obj_dc[1]) # Remove the cell types that we are not going to model counts_dc <- counts_dc[!(sub("_.*$", "", rownames(counts_dc)) %in% c("MastC-other", "Keratinocytes", "Melanocytes")), ] ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- # Load the pseudobulk matrix sample_id_dc <- c("AD1", "AD13", "AD2", "AD3","AD4", "HC1-2", "HC3", "HC4", "HC5") sample_class_dc <- c(rep("AD", 5), rep("HC", 4)) base_class_dc <- "HC" target_class_dc <- "AD" ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- # Initialize hyperparameter object # Choose the array of quantile combinations that will be tested in the Cross # Validation. Ideally one should choose a wide range of quantiles to test, # as a narrow selection might not find the globally optimal model. The more # quantiles one chooses the more computation time it will take, for the sake # of simplicity we choose a small amount of quantiles to test quantile_comb_table_dc <- as.matrix( RcppAlgos::permuteGeneral(seq(0.1, 0.95, by = 0.3), m = length(dc_superpathway$celltypes), TRUE)) # Store tuning + CV design dc_hyperparameters <- create_hyperparameters( quantile_comb_table = quantile_comb_table_dc, outcome_type = "binary", number_PLS = as.integer(3), folds_CV = as.integer(1), #LOOCV repetition_CV = as.integer(1) ) ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- dc_superpathway_input <- create_superpathway_input( superpathway_info = dc_superpathway, hyperparameters_info = dc_hyperparameters, pseudobulk_lognorm = counts_dc, sample_id = sample_id_dc, sample_class = sample_class_dc, base_class = base_class_dc, target_class = target_class_dc ) ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- # Load the pseudobulk matrix file_cytokine <- system.file("extdata", "cytokine_counts.rda", package = "singIST") obj_cytokine <- load(file_cytokine) counts_cytokine <- get(obj_cytokine[1]) # Remove the cell types that we are not going to model counts_cytokine <- counts_cytokine[!(sub("_.*$", "", rownames(counts_cytokine)) %in% c("MastC-other")), ] ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- # Load the pseudobulk matrix sample_id_cytokine <- c("AD1", "AD13", "AD2", "AD3","AD4", "HC1-2", "HC3", "HC4", "HC5") sample_class_cytokine <- c(rep("AD", 5), rep("HC", 4)) base_class_cytokine <- "HC" target_class_cytokine <- "AD" ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- # Initialize hyperparameter object quantile_comb_table <- as.matrix( RcppAlgos::permuteGeneral(seq(0.05, 0.95, by = 0.2), m = length(cytokine_superpathway$celltypes), TRUE)) # Store tuning + CV design cytokine_hyperparameters <- create_hyperparameters( quantile_comb_table = quantile_comb_table, outcome_type = "binary", number_PLS = as.integer(2), folds_CV = as.integer(5), repetition_CV = as.integer(5)) ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- cytokine_superpathway_input <- create_superpathway_input( superpathway_info = cytokine_superpathway, hyperparameters_info = cytokine_hyperparameters, pseudobulk_lognorm = counts_cytokine, sample_id = sample_id_cytokine, sample_class = sample_class_cytokine, base_class = base_class_cytokine, target_class = target_class_cytokine) ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- file_oxa <- system.file("extdata", "OXA.rda", package = "singIST") obj_oxa <- load(file_oxa) oxazolone <- get(obj_oxa[1]) ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- # The example object included in singIST uses these column positions colnames(slot(oxazolone, "meta.data"))[c(5,11,1)] <- c("class", "celltype_cluster", "donor") # If the object is a SingleCellExperiment, metadata live in colData() ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- celltype_mapping <- list( "Dendritic Cells" = c("cDC2", "cDC1", "migratory DCs"), "Keratinocytes" = c("Keratinocytes"), "Langerhan Cells" = c("LC"), "Melanocytes" = c(), "T-cell" = c("DETC", "T")) ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- oxa_org <- create_mapping_organism(organism = "Mus musculus", target_class = "OXA", base_class = "ETOH", celltype_mapping = list( "Dendritic Cells" = c("cDC2", "cDC1", "migratory DCs"), "Keratinocytes" = c("Keratinocytes"), "Langerhan Cells" = c("LC"), "Melanocytes" = c(), "T-cell" = c("DETC", "T")), counts = oxazolone) ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- # Register a parallel backend (SnowParam works on Windows/macOS/Linux); # on Linux you may prefer MulticoreParam. BiocParallel::register(BiocParallel::SnowParam(workers = 2, exportglobals = FALSE, progressbar = FALSE), default = TRUE) # Combine all human training inputs into a list superpathways <- list(dc_superpathway_input, cytokine_superpathway_input) # Fit Optimal asmbPLS-DA models + compute validation metrics # parallel: logical vector indicating whether the CV design should be parallel # npermut: number of permutations used for permutation based tests # type : validation strategy (jackknife or subsampling) # nsubsampling: number of subsampling repeats (only used when type includes # subsampling). superpathways_fit <- multiple_fitOptimal( superpathways, parallel = c(TRUE, FALSE), npermut = c(100), type = c("jackknife","subsampling"), nsubsampling = c(NULL, 5) ) # Restore serial backend (good practice) BiocParallel::register(BiocParallel::SerialParam(), default = TRUE) ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- # Compute singIST recapitulations for all fitted superpathways recapitulations_multiple <- multiple_singISTrecapitulations(oxa_org, superpathways_fit) ## ----echo = TRUE, message = FALSE, warning = FALSE---------------------------- final_output <- render_multiple_outputs(objects= list(recapitulations_multiple)) ## ----------------------------------------------------------------------------- utils::sessionInfo()