## ----setup, include=FALSE, results = "asis"----------------------------------- BiocStyle::markdown() knitr::opts_chunk$set(echo = TRUE, dev = "png") ## ----eval= FALSE-------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # BiocManager::install("musicatk") ## ----eval = FALSE------------------------------------------------------------- # if (!requireNamespace("devtools", quietly = TRUE)) { # install.packages("devtools") # } # # library(devtools) # install_github("campbio/musicatk") ## ----eval = TRUE, message = FALSE--------------------------------------------- library(musicatk) ## ----extract_variants, message = FALSE---------------------------------------- # Extract variants from a MAF File lusc_maf <- system.file("extdata", "public_TCGA.LUSC.maf", package = "musicatk") lusc.variants <- extract_variants_from_maf_file(maf_file = lusc_maf) # Extract variants from an individual VCF file luad_vcf <- system.file("extdata", "public_LUAD_TCGA-97-7938.vcf", package = "musicatk" ) luad.variants <- extract_variants_from_vcf_file(vcf_file = luad_vcf) # Extract variants from multiple files and/or objects melanoma_vcfs <- list.files(system.file("extdata", package = "musicatk"), pattern = glob2rx("*SKCM*vcf"), full.names = TRUE ) variants <- extract_variants(c(lusc_maf, luad_vcf, melanoma_vcfs)) ## ----select_genome------------------------------------------------------------ g <- select_genome("hg38") ## ----create_musica------------------------------------------------------------ musica <- create_musica_from_variants(x = variants, genome = g) ## ----build_tables------------------------------------------------------------- build_standard_table(musica, g = g, table_name = "SBS96") ## ----combine_tables----------------------------------------------------------- data(dbs_musica) build_standard_table(dbs_musica, g, "SBS96", overwrite = TRUE) build_standard_table(dbs_musica, g, "DBS78", overwrite = TRUE) # Subset SBS table to DBS samples so they cam be combined count_tables <- tables(dbs_musica) overlap_samples <- which(colnames(count_tables$SBS96@count_table) %in% colnames(count_tables$DBS78@count_table)) count_tables$SBS96@count_table <- count_tables$SBS96@count_table[, overlap_samples] tables(dbs_musica) <- count_tables combine_count_tables( musica = dbs_musica, to_comb = c("SBS96", "DBS78"), name = "sbs_dbs", description = "An example combined table, combining SBS96 and DBS", overwrite = TRUE ) ## ----------------------------------------------------------------------------- annotate_transcript_strand(musica, "19", build_table = FALSE) build_custom_table( musica = musica, variant_annotation = "Transcript_Strand", name = "Transcript_Strand", description = "A table of transcript strand of variants", data_factor = c("T", "U"), overwrite = TRUE ) ## ----create_musica_from_counts------------------------------------------------ luad_count_table_path <- system.file("extdata", "luad_tcga_count_table.csv", package = "musicatk" ) luad_count_table <- as.matrix(read.csv(luad_count_table_path)) musica_from_counts <- create_musica_from_counts(luad_count_table, "SBS96") ## ----discover_sigs------------------------------------------------------------ discover_signatures( musica = musica, modality = "SBS96", num_signatures = 3, algorithm = "nmf", model_id = "ex_result", nstart = 10 ) ## ----result_accessors--------------------------------------------------------- # Extract the exposure matrix expos <- exposures(musica, "result", "SBS96", "ex_result") expos[1:3, 1:3] # Extract the signature matrix sigs <- signatures(musica, "result", "SBS96", "ex_result") sigs[1:3, 1:3] ## ----compare_k_vals----------------------------------------------------------- k_comparison <- compare_k_vals(musica, "SBS96", reps = 5, min_k = 2, max_k = 4, algorithm = "nmf" ) ## ----plot_sigs---------------------------------------------------------------- plot_signatures(musica, model_id = "ex_result", modality = "SBS96") ## ----name_sigs---------------------------------------------------------------- name_signatures(musica, "ex_result", c("Smoking", "APOBEC", "UV")) plot_signatures(musica, model_id = "ex_result", modality = "SBS96") ## ----exposures_raw------------------------------------------------------------ plot_exposures(musica, "ex_result", plot_type = "bar") ## ----exposures_prop----------------------------------------------------------- plot_exposures(musica, "ex_result", plot_type = "bar", proportional = TRUE) ## ----sample_counts------------------------------------------------------------ samples <- sample_names(musica) plot_sample_counts(musica, sample_names = samples[c(3, 4, 5)], modality = "SBS96") ## ----compare_cosmic----------------------------------------------------------- compare_cosmic_v2(musica, "ex_result", threshold = 0.75) ## ----predict_cosmic----------------------------------------------------------- # Load COSMIC V2 data data("cosmic_v2_sigs") # Predict pre-existing exposures using the "lda" method predict_exposure( musica = musica, modality = "SBS96", signature_res = cosmic_v2_sigs, model_id = "ex_pred_result", signatures_to_use = c(1, 4, 7, 13), algorithm = "lda" ) # Plot exposures plot_exposures(musica, "ex_pred_result", plot_type = "bar") ## ----subtype_map-------------------------------------------------------------- cosmic_v2_subtype_map("lung") ## ----predict_previous--------------------------------------------------------- model <- get_model(musica, "result", "SBS96", "ex_result") predict_exposure( musica = musica, modality = "SBS96", signature_res = model, algorithm = "lda", model_id = "pred_our_sigs" ) ## ----predict_compare---------------------------------------------------------- compare_results(musica, model_id = "ex_pred_result", other_model_id = "pred_our_sigs", threshold = 0.60 ) ## ----annotations-------------------------------------------------------------- annot <- read.table(system.file("extdata", "sample_annotations.txt", package = "musicatk"), sep = "\t", header = TRUE) samp_annot(musica, "Tumor_Subtypes") <- annot$Tumor_Subtypes ## ----sample_names------------------------------------------------------------- sample_names(musica) ## ----plot_exposures_by_subtype------------------------------------------------ plot_exposures(musica, "ex_result", plot_type = "bar", group_by = "annotation", annotation = "Tumor_Subtypes" ) ## ----plot_exposures_box_annot------------------------------------------------- plot_exposures(musica, "ex_result", plot_type = "box", group_by = "annotation", annotation = "Tumor_Subtypes") ## ----plot_exposures_box_sig--------------------------------------------------- plot_exposures(musica, "ex_result", plot_type = "box", group_by = "signature", color_by = "annotation", annotation = "Tumor_Subtypes" ) ## ----umap_create-------------------------------------------------------------- create_umap(musica, "ex_result") ## ----umap_plot---------------------------------------------------------------- plot_umap(musica, "ex_result") ## ----umap_plot_same_scale----------------------------------------------------- plot_umap(musica, "ex_result", same_scale = FALSE) ## ----umap_plot_annot---------------------------------------------------------- plot_umap(musica, "ex_result", color_by = "annotation", annotation = "Tumor_Subtypes", add_annotation_labels = TRUE ) ## ----plotly------------------------------------------------------------------- plot_signatures(musica, "ex_result", plotly = TRUE) plot_exposures(musica, "ex_result", plotly = TRUE) plot_umap(musica, "ex_result", plotly = TRUE) ## ----reproducible_prediction-------------------------------------------------- seed <- 1 withr::with_seed(seed, predict_exposure( musica = musica, modality = "SBS96", signature_res = model, algorithm = "lda", model_id = "reproducible_ex" )) ## ----session------------------------------------------------------------------ sessionInfo()