## ----MAE_SE_install, eval = FALSE--------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("MultiAssayExperiment") # BiocManager::install("SummarizedExperiment") ## ----load_LegATo, eval = FALSE------------------------------------------------ # BiocManager::install("wejlab/LegATo") ## ----------------------------------------------------------------------------- library(LegATo) ## ----------------------------------------------------------------------------- library(ggeffects) library(emmeans) ## ----------------------------------------------------------------------------- counts <- system.file("extdata", "counts.csv", package = "LegATo") |> read.csv(row.names = 1) |> dplyr::rename_with(function(x) stringr::str_replace(x, "\\.", "-")) tax <- system.file("extdata", "tax.csv", package = "LegATo") |> read.csv(row.names = 1) sample <- system.file("extdata", "sample.csv", package = "LegATo") |> read.csv(row.names = 1) ## ----------------------------------------------------------------------------- ndim <- 5 counts[seq_len(ndim), seq_len(ndim)] |> knitr::kable(caption = "Counts Table Preview", label = NA) tax[seq_len(ndim), ] |> knitr::kable(caption = "Taxonomy Table Preview", label = NA) sample[seq_len(ndim), ] |> knitr::kable(caption = "Sample Table Preview", label = NA) ## ----------------------------------------------------------------------------- output <- create_formatted_MAE(counts_dat = counts, tax_dat = tax, metadata_dat = sample) class(output) MultiAssayExperiment::assays(output) SummarizedExperiment::assays(output[["MicrobeGenetics"]]) ## ----------------------------------------------------------------------------- dat <- system.file("extdata", "MAE.RDS", package = "LegATo") |> readRDS() dat_subsetted <- MultiAssayExperiment::subsetByColData(dat, dat$MothChild == "Infant") ## ----------------------------------------------------------------------------- dat_cleaned <- clean_MAE(dat_subsetted) ## ----------------------------------------------------------------------------- dat_filt_1 <- filter_MAE(dat_cleaned, relabu_threshold = 0.05, occur_pct_cutoff = 10) ## ----------------------------------------------------------------------------- dat_filt <- filter_animalcules_MAE(dat_cleaned, 0.05) ## ----------------------------------------------------------------------------- parsed <- parse_MAE_SE(dat_filt, which_assay = "MicrobeGenetics", type = "MAE") parsed$counts[seq_len(5), seq_len(5)] |> knitr::kable(caption = "Counts Table") parsed$sam[seq_len(5), ] |> knitr::kable(caption = "Sample Metadata") parsed$tax[seq_len(5), ] |> knitr::kable(caption = "Taxonomy Table") ## ----------------------------------------------------------------------------- group_vars <- c("HIVStatus", "MothChild") get_summary_table(dat_filt, group_vars) |> knitr::kable(caption = "Summary Table", label = NA) ## ----------------------------------------------------------------------------- best_genus <- get_top_taxa(dat_filt, "genus") best_genus |> knitr::kable(caption = "Table of genera, ranked by abundance") ## ----------------------------------------------------------------------------- longdat <- get_long_data(dat_filt, "genus", log = TRUE, counts_to_CPM = TRUE) stackeddat <- get_stacked_data(dat_filt, "genus", covariate_1 = "HIVStatus", covariate_time = "timepoint") ## ----------------------------------------------------------------------------- this_palette <- c("#709AE1", "#8A9197", "#D2AF81", "#FD7446", "#D5E4A2", "#197EC0", "#F05C3B", "#46732E", "#71D0F5", "#370335", "#075149", "#C80813", "#91331F", "#1A9993", "#FD8CC1") |> magrittr::extract(seq_len(nrow(best_genus))) ## ----------------------------------------------------------------------------- plot_alluvial(dat = dat_filt, taxon_level = "genus", covariate_1 = "HIVStatus", covariate_time = "timepoint", palette_input = this_palette, subtitle = "Alluvial plot") ## ----results = "asis", message = FALSE---------------------------------------- plot_spaghetti(dat = dat_filt, covariate_time = "timepoint", covariate_1 = "HIVStatus", unit_var = "Subject", taxon_level = "genus", which_taxon = "Staphylococcus", palette_input = this_palette, title = "Spaghetti Plot", subtitle = NULL) + ggplot2::xlab("Infant Age (timepoint)") + ggplot2::ylab("Relative Abundance (log CPM)") ## ----------------------------------------------------------------------------- plot_stacked_bar(dat_filt, "genus", "HIVStatus", "timepoint", palette_input = this_palette) ## ----------------------------------------------------------------------------- plot_stacked_area(dat_filt, "genus", "HIVStatus", "timepoint", palette_input = this_palette) ## ----------------------------------------------------------------------------- this_taxon <- parsed$counts |> animalcules::upsample_counts(parsed$tax, "genus") |> animalcules::counts_to_logcpm() p <- plot_heatmap(inputData = this_taxon, annotationData = dplyr::select(parsed$sam, "timepoint", "HIVStatus", "pairing"), name = "Data", plot_title = "Example", plottingColNames = NULL, annotationColNames = NULL, colList = list(), scale = FALSE, showColumnNames = FALSE, showRowNames = FALSE, colorSets = c("Set1", "Set2", "Set3", "Pastel1", "Pastel2", "Accent", "Dark2", "Paired"), choose_color = c("blue", "gray95", "red"), split_heatmap = "none", column_order = NULL ) ## ----------------------------------------------------------------------------- dat_1 <- filter_MAE(dat_cleaned, 0.05, 10, "species") NMIT(dat_1, unit_var = "Subject", fixed_cov = "HIVStatus", covariate_time = "timepoint", method = "kendall", dist_type = "F", heatmap = TRUE, classify = FALSE, fill_na = 0) ## ----------------------------------------------------------------------------- test_hotelling_t2(dat = dat_1, test_index = which(dat_filt$MothChild == "Infant" & dat_filt$timepoint == 6), taxon_level = "genus", num_taxa = 6, paired = TRUE, grouping_var = "HIVStatus", pairing_var = "pairing") ## ----------------------------------------------------------------------------- test_hotelling_t2(dat = dat_1, test_index = which(dat_filt$timepoint == 0), taxon_level = "genus", num_taxa = 6, grouping_var = "HIVStatus", unit_var = "Subject", paired = FALSE) ## ----------------------------------------------------------------------------- output <- run_gee_model(dat_1, unit_var = "Subject", fixed_cov = c("HIVStatus", "timepoint"), corstr = "ar1", plot_out = FALSE, plotsave_loc = ".", plot_terms = NULL) head(output) |> knitr::kable(caption = "GEE Outputs") ## ----out.width = "50%", fig.align = "center", echo = FALSE-------------------- tempfolder <- tempdir() # Trying out plotting output <- run_gee_model(dat_1, unit_var = "Subject", taxon_level = "genus", fixed_cov = c("HIVStatus", "timepoint"), corstr = "ar1", plot_out = TRUE, plotsave_loc = tempfolder, plot_terms = "timepoint", width = 6, height = 4, units = "in", scale = 0.7) list.files(tempfolder) |> head() ## ----------------------------------------------------------------------------- # If there are issues with matrix being positive definite # Revisit filtering parameters in filter_MAE output <- run_lmm_model(dat_1, unit_var = "Subject", taxon_level = "genus", fixed_cov = c("timepoint", "HIVStatus"), plot_out = FALSE, plotsave_loc = ".", plot_terms = NULL) head(output) |> knitr::kable(caption = "LMM Outputs") ## ----------------------------------------------------------------------------- sessionInfo()