## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, message=FALSE----------------------------------------------------- library(mpactr) library(viridis) library(plotly) library(data.table) library(tidyverse) library(Hmisc) library(corrplot) library(ggdendro) library(ggtext) ## ----------------------------------------------------------------------------- data <- import_data(example_path("cultures_peak_table.csv"), example_path("cultures_metadata.csv"), format = "Progenesis" ) ## ----------------------------------------------------------------------------- data_filtered <- data |> filter_mispicked_ions(merge_peaks = TRUE, merge_method = "sum") |> filter_group(group_to_remove = "Solvent_Blank") |> filter_group(group_to_remove = "Media") |> filter_cv(cv_threshold = 0.2, cv_param = "median") ## ----------------------------------------------------------------------------- plot_qc_tree(data_filtered) ## ----------------------------------------------------------------------------- get_raw_data(data_filtered) %>% select(Compound, mz, rt) %>% head() ## ----------------------------------------------------------------------------- qc_summary(data_filtered) %>% head() ## ----------------------------------------------------------------------------- get_raw_data(data_filtered) %>% mutate(Compound = as.character(Compound)) %>% select(Compound, mz, rt) %>% left_join(qc_summary(data_filtered), by = join_by("Compound" == "compounds") ) %>% head() ## ----------------------------------------------------------------------------- get_raw_data(data_filtered) %>% mutate(Compound = as.character(Compound)) %>% select(Compound, mz, rt) %>% left_join(qc_summary(data_filtered), by = join_by("Compound" == "compounds") ) %>% ggplot() + aes(x = rt, y = mz, color = status) + geom_point() + viridis::scale_color_viridis(discrete = TRUE) + labs( x = "Retention time (min)", y = "m/z", color = "Ion Status" ) + theme_bw() + theme(legend.position = "top") ## ----------------------------------------------------------------------------- feature_plot <- get_raw_data(data_filtered) %>% mutate(Compound = as.character(Compound)) %>% select(Compound, mz, rt) %>% left_join(qc_summary(data_filtered), by = join_by("Compound" == "compounds") ) %>% ggplot() + aes( x = rt, y = mz, color = status, text = paste0("Compound: ", Compound) ) + geom_point() + viridis::scale_color_viridis(discrete = TRUE) + labs( x = "Retention time (min)", y = "m/z", color = "Ion Status" ) + theme_bw() + theme(legend.position = "top") ggplotly(feature_plot) ## ----------------------------------------------------------------------------- ft <- get_peak_table(data_filtered) ft[1:5, 1:7] ## ----------------------------------------------------------------------------- counts <- ft %>% select(Compound, all_of(get_meta_data(data_filtered)$Injection)) %>% column_to_rownames(var = "Compound") %>% select(where(~ sum(.x) != 0)) counts[1:5, 1:2] ## ----------------------------------------------------------------------------- counts_cor <- rcorr(as.matrix(counts), type = "spearman") ## ----------------------------------------------------------------------------- counts_cor$r[, 1] ## ----warning=FALSE------------------------------------------------------------ corrplot(counts_cor$r, type = "lower", method = "square", order = "hclust", col = COL2("BrBG", 10), tl.col = "black", tl.cex = .5 ) ## ----------------------------------------------------------------------------- meta <- get_meta_data(data_filtered) counts %>% rownames_to_column(var = "Compound") %>% pivot_longer( cols = starts_with("102623"), names_to = "Injection", values_to = "Intensity" ) %>% head() ## ----------------------------------------------------------------------------- counts %>% rownames_to_column(var = "Compound") %>% pivot_longer( cols = starts_with("102623"), names_to = "Injection", values_to = "Intensity" ) %>% left_join(meta, by = "Injection") %>% head() ## ----------------------------------------------------------------------------- counts %>% rownames_to_column(var = "Compound") %>% pivot_longer( cols = starts_with("102623"), names_to = "Injection", values_to = "Intensity" ) %>% left_join(meta, by = "Injection") %>% summarise( mean_intensity = mean(Intensity), .by = c(Compound, Sample_Code) ) %>% head() ## ----warning=FALSE------------------------------------------------------------ sample_counts <- counts %>% rownames_to_column(var = "Compound") %>% pivot_longer( cols = starts_with("102623"), names_to = "Injection", values_to = "Intensity" ) %>% left_join(meta, by = "Injection") %>% summarise( mean_intensity = mean(Intensity), .by = c(Compound, Sample_Code) ) %>% pivot_wider( names_from = Sample_Code, values_from = mean_intensity ) %>% column_to_rownames(var = "Compound") sample_counts[1:5, 1:5] ## ----------------------------------------------------------------------------- sample_counts_cor <- rcorr(as.matrix(sample_counts), type = "spearman") ## ----------------------------------------------------------------------------- corrplot(sample_counts_cor$r, type = "lower", method = "square", order = "alphabet", col = COL2("BrBG", 10), tl.col = "black" ) ## ----warning=FALSE------------------------------------------------------------ group_counts <- counts %>% rownames_to_column(var = "Compound") %>% pivot_longer( cols = starts_with("102623"), names_to = "Injection", values_to = "Intensity" ) %>% left_join(meta, by = "Injection") %>% summarise( mean_intensity = mean(Intensity), .by = c(Compound, Biological_Group) ) %>% pivot_wider( names_from = Biological_Group, values_from = mean_intensity ) %>% column_to_rownames(var = "Compound") ## ----------------------------------------------------------------------------- group_counts_cor <- rcorr(as.matrix(group_counts), type = "spearman") ## ----------------------------------------------------------------------------- corrplot(group_counts_cor$r, type = "lower", method = "square", order = "alphabet", col = COL2("BrBG", 10), tl.col = "black" ) ## ----------------------------------------------------------------------------- dist <- stats::dist(t(counts), method = "euclidian") cluster <- stats::hclust(dist, method = "complete") ## ----------------------------------------------------------------------------- dendro <- as.dendrogram(cluster) ## ----------------------------------------------------------------------------- den_data <- dendro_data(dendro, type = "rectangle") ## ----------------------------------------------------------------------------- ggplot(segment(den_data)) + geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + geom_text( data = den_data$labels, aes(x = x, y = y, label = label), size = 3, hjust = "outward" ) + coord_cartesian(ylim = c((min(segment(den_data)$y) + 10), (max(segment(den_data)$y)))) + coord_flip() + scale_y_reverse(expand = c(.5, 0)) + theme_dendro() ## ----------------------------------------------------------------------------- get_group_averages(data_filtered) %>% filter(Biological_Group == "Coculture" | Biological_Group == "ANG18") %>% select(Compound, Biological_Group, average) %>% pivot_wider(names_from = Biological_Group, values_from = average) %>% mutate(fc = Coculture / ANG18) %>% head() ## ----------------------------------------------------------------------------- get_group_averages(data_filtered) %>% filter(Biological_Group == "Coculture" | Biological_Group == "ANG18") %>% select(Compound, Biological_Group, average) %>% pivot_wider(names_from = Biological_Group, values_from = average) %>% mutate(nonzero_compound = if_else(Coculture == 0 & ANG18 == 0, FALSE, TRUE)) %>% filter(nonzero_compound == TRUE) %>% mutate(fc = Coculture / ANG18) %>% head() ## ----------------------------------------------------------------------------- get_group_averages(data_filtered) %>% filter(Biological_Group == "Coculture" | Biological_Group == "ANG18") %>% select(Compound, Biological_Group, average) %>% head() ## ----------------------------------------------------------------------------- get_group_averages(data_filtered) %>% filter(Biological_Group == "Coculture" | Biological_Group == "ANG18") %>% select(Compound, Biological_Group, average) %>% head() ## ----------------------------------------------------------------------------- get_group_averages(data_filtered) %>% filter(Biological_Group == "Coculture" | Biological_Group == "ANG18") %>% select(Compound, Biological_Group, average) %>% mutate(average = average + 0.001) %>% pivot_wider(names_from = Biological_Group, values_from = average) %>% head() ## ----------------------------------------------------------------------------- get_group_averages(data_filtered) %>% filter(Biological_Group == "Coculture" | Biological_Group == "ANG18") %>% select(Compound, Biological_Group, average) %>% mutate(average = average + 0.001) %>% pivot_wider(names_from = Biological_Group, values_from = average) %>% mutate(nonzero_compound = if_else(Coculture == 0.001 & ANG18 == 0.001, FALSE, TRUE)) %>% filter(nonzero_compound == TRUE) %>% head() ## ----------------------------------------------------------------------------- get_group_averages(data_filtered) %>% filter(Biological_Group == "Coculture" | Biological_Group == "ANG18") %>% select(Compound, Biological_Group, average) %>% mutate(average = average + 0.001) %>% pivot_wider(names_from = Biological_Group, values_from = average) %>% mutate(nonzero_compound = if_else(Coculture == 0.001 & ANG18 == 0.001, FALSE, TRUE)) %>% filter(nonzero_compound == TRUE) %>% mutate(fc = Coculture / ANG18) %>% head() ## ----------------------------------------------------------------------------- foldchanges <- get_group_averages(data_filtered) %>% filter(Biological_Group == "Coculture" | Biological_Group == "ANG18") %>% select(Compound, Biological_Group, average) %>% mutate(average = average + 0.001) %>% pivot_wider(names_from = Biological_Group, values_from = average) %>% mutate(nonzero_compound = if_else(Coculture == 0.001 & ANG18 == 0.001, FALSE, TRUE)) %>% filter(nonzero_compound == TRUE) %>% mutate(fc = Coculture / ANG18, logfc = log2(fc)) head(foldchanges) ## ----------------------------------------------------------------------------- fc_plotting <- foldchanges %>% left_join(select(ft, Compound, mz, rt), by = "Compound") plot_ly(fc_plotting, x = ~logfc, y = ~rt, z = ~mz, marker = list(color = ~logfc, showscale = TRUE) ) %>% add_markers() %>% layout( scene = list( xaxis = list(title = "log2 Fold Change"), yaxis = list(title = "Retention Time (min)"), zaxis = list(title = "m/z") ), annotations = list( x = 1.13, y = 1.05, text = "log2 Fold Change", xref = "paper", yref = "paper", showarrow = FALSE ) ) ## ----------------------------------------------------------------------------- # Satterwaite calc_samplesize_ws <- function(sd1, n1, sd2, n2) { s1 <- sd1 / (n1^0.5) s2 <- sd2 / (n2^0.5) n <- (s1^2 / n1 + s2^2 / n2)^2 d1 <- s1^4 / ((n1^2) * (n1 - 1)) d2 <- s2^4 / ((n2^2) * (n2 - 1)) d1[which(!is.finite(d1))] <- 0 d2[which(!is.finite(d2))] <- 0 d <- d1 + d2 return(n / d) } my_comp <- c("Coculture", "ANG18") stats <- get_group_averages(data_filtered) %>% mutate( combRSD = sqrt(techRSD^2 + BiolRSD^2), combASD = combRSD * average, combASD = if_else(is.na(combASD), 0, combASD), BiolRSD = if_else(is.na(BiolRSD), 0, BiolRSD), techRSD = if_else(is.na(techRSD), 0, techRSD), neff = calc_samplesize_ws(techRSD, techn, BiolRSD, Bioln) + 1 ) %>% filter(Biological_Group %in% my_comp) head(stats) ## ----------------------------------------------------------------------------- denom <- stats %>% summarise(den = combASD^2 / (neff), .by = c("Compound", "Biological_Group")) %>% mutate(den = if_else(!is.finite(den), 0, den)) %>% summarise(denom = sqrt(sum(den)), .by = c("Compound")) t_test <- stats %>% select(Compound, Biological_Group, average) %>% pivot_wider(names_from = Biological_Group, values_from = average) %>% mutate(numerator = (Coculture - ANG18)) %>% # experimental - control left_join(denom, by = "Compound") %>% mutate(t = abs(numerator / denom)) head(t_test) ## ----------------------------------------------------------------------------- df <- stats %>% select(Compound, Biological_Group, neff) %>% mutate(neff = if_else(!is.finite(neff), 0, neff)) %>% pivot_wider(names_from = Biological_Group, values_from = neff) %>% mutate(deg = Coculture + ANG18 - 2) %>% select(Compound, deg) head(df) ## ----------------------------------------------------------------------------- t <- t_test %>% left_join(df, by = "Compound") %>% mutate( p = (1 - pt(t, deg)) * 2, logp = log10(p), neg_logp = -logp ) %>% select(Compound, t, deg, p, logp, neg_logp) head(t) ## ----------------------------------------------------------------------------- num_ions <- t %>% filter(p <= 1) %>% count() %>% pull() fc <- foldchanges %>% left_join(t, by = "Compound") %>% arrange(p) %>% mutate( qval = seq_len(length(p)), qval = p * num_ions / qval ) %>% arrange(desc(p)) min_qval <- 1 for (i in seq_len(nrow(fc))) { if (!is.finite(fc$qval[i])) { next } if (fc$qval[i] < min_qval) { min_qval <- fc$qval[i] } else { fc$qval[i] <- min_qval } } fc2 <- fc %>% mutate(neg_logq = -log10(qval)) ## ----------------------------------------------------------------------------- fc2 %>% ggplot() + aes(x = logfc, y = neg_logp) + geom_point() + labs(x = "log2 Fold Change", y = "-Log~10~ *P*", color = "Differential Abundance") + theme_bw() ## ----------------------------------------------------------------------------- fc2 %>% ggplot() + aes(x = logfc, y = neg_logp) + geom_point() + geom_hline(yintercept = -log10(0.05), linetype = "dashed") + labs(x = "log2 Fold Change", y = "-Log~10~ *P*", color = "Differential Abundance") + theme_bw() ## ----------------------------------------------------------------------------- fc2 %>% ggplot() + aes(x = logfc, y = neg_logp) + geom_point() + geom_hline(yintercept = -log10(0.05), linetype = "dashed") + labs(x = "log2 Fold Change", y = "-Log~10~ *P*", color = "Differential Abundance") + geom_vline(xintercept = log2(1.5), linetype = "dashed") + geom_vline(xintercept = -log2(1.5), linetype = "dashed") + theme_bw() ## ----------------------------------------------------------------------------- fc2 %>% mutate( sig = case_when( p > 0.05 ~ "not_sig", p <= 0.05 & logfc > log2(1.5) ~ "Increased", p <= 0.05 & logfc < -log2(1.5) ~ "Decreased", TRUE ~ "Inconclusive" ), sig = factor(sig, levels = c("Increased", "Decreased", "Inconclusive", "not_sig"), labels = c("Increased", "Decreased", "Inconclusive", "Not significant") ) ) %>% select(Compound, ANG18, Coculture, fc, logfc, p, sig) %>% head() ## ----------------------------------------------------------------------------- fc2 %>% mutate( sig = case_when( p > 0.05 ~ "not_sig", p <= 0.05 & logfc > log2(1.5) ~ "Increased", p <= 0.05 & logfc < -log2(1.5) ~ "Decreased", TRUE ~ "Inconclusive" ), sig = factor(sig, levels = c("Increased", "Decreased", "Inconclusive", "not_sig"), labels = c("Increased", "Decreased", "Inconclusive", "Not significant") ) ) %>% ggplot() + aes(x = logfc, y = neg_logp, color = sig) + geom_point() + geom_hline(yintercept = -log10(0.05), linetype = "dashed") + geom_vline(xintercept = log2(1.5), linetype = "dashed") + geom_vline(xintercept = -log2(1.5), linetype = "dashed") + scale_color_manual(values = c("red", "blue", "grey", "black")) + labs( x = "log2 Fold Change", y = "-Log~10~ *P*", color = "Differential Abundance" ) + theme_bw() ## ----------------------------------------------------------------------------- fc2 %>% mutate( sig = case_when( p > 0.05 ~ "not_sig", p <= 0.05 & logfc > log2(1.5) ~ "Increased", p <= 0.05 & logfc < -log2(1.5) ~ "Decreased", TRUE ~ "Inconclusive" ), sig = factor(sig, levels = c("Increased", "Decreased", "Inconclusive", "not_sig"), labels = c("Increased", "Decreased", "Inconclusive", "Not significant") ) ) %>% ggplot() + aes(x = logfc, y = neg_logp, color = sig) + geom_point() + geom_hline(yintercept = -log10(0.05), linetype = "dashed") + geom_vline(xintercept = log2(1.5), linetype = "dashed") + geom_vline(xintercept = -log2(1.5), linetype = "dashed") + scale_color_manual(values = c("red", "blue", "grey", "black")) + labs( x = "log2 Fold Change", y = "-Log~10~ *P*", color = "Differential Abundance" ) + theme_bw() + theme( axis.title = element_markdown(size = 20), axis.text = element_text(size = 15) ) ## ----------------------------------------------------------------------------- volcano <- fc2 %>% mutate( sig = case_when( p > 0.05 ~ "not_sig", p <= 0.05 & logfc > log2(1.5) ~ "Increased", p <= 0.05 & logfc < -log2(1.5) ~ "Decreased", TRUE ~ "Inconclusive" ), sig = factor(sig, levels = c("Increased", "Decreased", "Inconclusive", "not_sig"), labels = c("Increased", "Decreased", "Inconclusive", "Not significant") ) ) %>% ggplot() + aes( x = logfc, y = neg_logp, color = sig, text = paste0("Compound: ", Compound) ) + geom_point() + geom_hline(yintercept = -log10(0.05), linetype = "dashed") + geom_vline(xintercept = log2(1.5), linetype = "dashed") + geom_vline(xintercept = -log2(1.5), linetype = "dashed") + scale_color_manual(values = c("red", "blue", "grey", "black")) + labs( x = "log2 Fold Change", y = "-Log~10~ *P*", color = "Differential Abundance" ) + theme_bw() + theme( axis.title = element_markdown(size = 20), axis.text = element_text(size = 15) ) ggplotly(volcano) ## ----------------------------------------------------------------------------- fc2 %>% mutate( sig = case_when( qval > 0.05 ~ "not_sig", qval <= 0.05 & logfc > log2(1.5) ~ "Increased", qval <= 0.05 & logfc < -log2(1.5) ~ "Decreased", TRUE ~ "Inconclusive" ), sig = factor(sig, levels = c("Increased", "Decreased", "Inconclusive", "not_sig"), labels = c("Increased", "Decreased", "Inconclusive", "Not significant") ) ) %>% ggplot() + aes(x = logfc, y = neg_logq, color = sig) + geom_point() + geom_hline(yintercept = -log10(0.05), linetype = "dashed") + geom_vline(xintercept = log2(1.5), linetype = "dashed") + geom_vline(xintercept = -log2(1.5), linetype = "dashed") + scale_color_manual(values = c("red", "blue", "grey", "black")) + labs( x = "log2 Fold Change", y = "-Log~10~ q-value", color = "Differential Abundance" ) + theme_bw() + theme( axis.title = element_markdown(size = 20), axis.text = element_text(size = 15) )