## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE, nobreak = TRUE) ## ----message=FALSE, warning=FALSE, echo = FALSE------------------------------- library(ProteinGymR) library(ComplexHeatmap) library(stringr) library(dplyr) library(ggplot2) library(ggExtra) ## ----ACE2 default heatmap, fig.wide = TRUE------------------------------------ ace2_dms <- plot_dms_heatmap( assay_name = "ACE2_HUMAN_Chan_2020", start_pos = 1, end_pos = 100) print(ace2_dms) ## ----SHOC2 heatmap, fig.wide = TRUE------------------------------------------- plot_dms_heatmap(assay_name = "SHOC2_HUMAN_Kwon_2022", start_pos = 10, end_pos = 60, cluster_rows = TRUE) ## ----fig.wide = TRUE---------------------------------------------------------- ace2_model <- plot_zeroshot_heatmap( assay_name = "ACE2_HUMAN_Chan_2020", model = "GEMME", start_pos = 1, end_pos = 100) print(ace2_model) ## ----fig.wide = TRUE---------------------------------------------------------- ComplexHeatmap::draw(ace2_dms %v% ace2_model) ## ----fig.wide = TRUE---------------------------------------------------------- plot_structure(assay_name = "ACE2_HUMAN_Chan_2020") ## ----fig.wide = TRUE---------------------------------------------------------- plot_structure(assay_name = "ACE2_HUMAN_Chan_2020", full_structure = TRUE) ## ----fig.wide = TRUE---------------------------------------------------------- plot_structure(assay_name = "C6KNH7_9INFA_Lee_2018", aggregate_fun = min) ## ----fig.wide = TRUE---------------------------------------------------------- dms_data <- dms_substitutions() df <- dms_data[["C6KNH7_9INFA_Lee_2018"]] ggplot(df, aes(DMS_score)) + geom_histogram(alpha = 0.8) + theme_minimal() + theme(text = element_text(size = 20)) ## ----fig.wide = TRUE---------------------------------------------------------- plot_structure(assay_name = "ACE2_HUMAN_Chan_2020", start_pos = 10, end_pos = 50, full_structure = FALSE) ## ----fig.wide = TRUE---------------------------------------------------------- plot_structure(assay_name = "ACE2_HUMAN_Chan_2020", color_scheme = "EVE") ## ----warning = FALSE, fig.wide = TRUE----------------------------------------- dms_corr_plot(uniprotId = "Q9NV35") ## ----------------------------------------------------------------------------- dms_corr_plot(uniprotId = "Q9NV35") ## ----------------------------------------------------------------------------- ## Load model scores model_data <- zeroshot_substitutions() ## Load DMS scores dms_data <- dms_substitutions() ## ----------------------------------------------------------------------------- ## Specify the chosen assay and model to explore. assay_name <- "A0A140D2T1_ZIKV_Sourisseau_2019" model <- "GEMME" ## Filter selected assay model_df <- as.data.frame(model_data[[assay_name]]) dms_df <- as.data.frame(dms_data[[assay_name]]) ## Filter out multiple aa sites model_df <- model_df |> filter(!str_detect(.data$mutant, ":")) dms_df <- dms_df |> filter(!str_detect(.data$mutant, ":")) ## Select chosen model model_df <- model_df |> dplyr::select( mutant, all_of(model) ) model_df <- model_df |> dplyr::select(all_of(model), mutant) ## ----fig.wide = TRUE---------------------------------------------------------- ## Wrangle the data dms_df <- dms_df |> dplyr::select(DMS_score, mutant) merged_table <- left_join( model_df, dms_df, by = c("mutant"), relationship = "many-to-many" ) |> na.omit() |> select(mutant, DMS_score, model = all_of(model)) ## Corr function pg_correlate <- function(merged_table) { cor_results <- cor.test( merged_table$DMS_score, merged_table$model, method=c("spearman"), exact = FALSE ) cor_results } cor_results <- pg_correlate(merged_table) ## Correlation density plot pg_density_plot <- merged_table |> ggplot( aes(x = .data$DMS_score, y = .data$model) ) + geom_bin2d(bins = 60) + geom_point(alpha = 0) + scale_fill_continuous(type = "viridis") + xlab("DMS score") + ylab(paste0(model, "\nzero shot score")) + theme_classic() + theme( axis.text.x = element_text(size = 16), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 16, vjust = 2), axis.title.x = element_text(size = 16, vjust = 0), legend.title = element_text(size = 16), legend.text = element_text(size = 16) ) # Add marginal density plots pg_density_plot <- ggMarginal( pg_density_plot, type = "densigram", # Can also use "histogram" fill = "#B0C4DE", color = "black" # Change color as needed ) pg_density_plot print(paste0("r = ", format(round(cor_results$estimate, 2)), "; Pval = ", cor_results$p.value)) ## ----fig.wide = TRUE---------------------------------------------------------- ## Select assay and models to compare assay_name <- "A0A140D2T1_ZIKV_Sourisseau_2019" model1 <- "EVE_ensemble" model2 <- "S3F_MSA" ## Filter selected assay model_df <- model_data[[assay_name]] ## Filter out multiple aa sites model_df <- model_df |> filter(!grepl(":", .data$mutant)) ## Select chosen model model_df <- model_df |> dplyr::select( mutant, all_of(c(model1 = model1, model2 = model2)) ) ## Corr function pg_correlate <- function(model_df) { cor_results <- cor.test( model_df$model1, model_df$model2, method=c("spearman"), exact = FALSE ) cor_results } cor_results <- pg_correlate(model_df = model_df) ## Correlation density plot pg_density_plot <- model_df |> ggplot( aes(x = .data$model1, y = .data$model2) ) + geom_bin2d(bins = 60) + geom_point(alpha = 0) + scale_fill_continuous(type = "viridis") + xlab(paste0(model1, "\nzero shot score")) + ylab(paste0(model2, "\nzero shot score")) + theme_classic() + theme( axis.text.x = element_text(size = 16), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 16, vjust = 2), axis.title.x = element_text(size = 16, vjust = 0), legend.title = element_text(size = 16), legend.text = element_text(size = 16) ) ## Add marginal density plots pg_density_plot <- ggMarginal( pg_density_plot, type = "densigram", # Can also use "histogram" fill = "#B0C4DE", color = "black" # Change color as needed ) pg_density_plot print(paste0("r = ", format(round(cor_results$estimate, 2)), "; Pval = ", cor_results$p.value)) ## ----------------------------------------------------------------------------- sessionInfo()