## ----include = FALSE---------------------------------------------------------- library(knitr) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----echo=TRUE, eval=FALSE---------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("ELViS") ## ----setup-------------------------------------------------------------------- library(ELViS) library(ggplot2) library(glue) library(dplyr) library(ComplexHeatmap) theme_set(theme_bw()) ## ----------------------------------------------------------------------------- analysis_dir = tempdir() dir.create(analysis_dir,showWarnings = FALSE) package_name = "ELViS" # load toy example meta data data(toy_example,package = package_name) # get lust of bam file paths ext_path = system.file("extdata",package = package_name) bam_files = list.files(ext_path,full.names = TRUE,pattern = "bam$") ## ----------------------------------------------------------------------------- os_name = Sys.info()["sysname"] if( os_name == "Windows" ){ N_cores <- 1L }else{ N_cores <- 2L } # the name of the reference viral sequence the reads were aligned to target_virus_name = "gi|333031|lcl|HPV16REF.1|" # temporary file directory tmpdir=tempdir() # generate read depth matrix system.time({ mtrx_samtools_reticulate__example = get_depth_matrix( bam_files = bam_files,coord_or_target_virus_name = target_virus_name ,mode = "samtools_basilisk" ,N_cores = N_cores ,min_mapq = 30 ,tmpdir=tmpdir ,condaenv = "env_samtools" ,condaenv_samtools_version="1.21" ) }) ## ----eval=FALSE, , eval=FALSE, include=FALSE---------------------------------- # # # Actual depth matrix of 120 samples had been generated using the follpwing code # # SKIP=1 # if(!SKIP){ # # # 120 bams # # 5.5 sec # system.time({ # mtrx_Rsamtools = # get_depth_matrix( # bam_files = bam_files,coord_or_target_virus_name = target_virus_name # ,mode = "Rsamtools" # ,N_cores = N_cores # ,max_depth = 1e5 # ,min_mapq = 30 # ,tmpdir="tmpdir" # ) # }) # # } # # # # 4.7 sec # system.time({ # mtrx_samtools_reticulate = # get_depth_matrix( # bam_files = bam_files,coord_or_target_virus_name = target_virus_name # ,mode = "samtools_basilisk" # ,N_cores = N_cores # ,min_mapq = 30 # ,tmpdir="tmpdir" # ,condaenv = "env_samtools" # ,condaenv_samtools_version="1.21" # ) # }) # # # # SKIP=1 # if(!SKIP){ # # #2.3 sec # system.time({ # mtrx_samtools = # get_depth_matrix( # bam_files = bam_files,coord_or_target_virus_name = target_virus_name # ,mode = "samtools_custom" # ,N_cores = N_cores # ,min_mapq = 30 # ,tmpdir="tmpdir" # ,modules="samtools/samtools_bcftools_1.20__bedtools_2.31.1" # ) # }) # } # # use_data(mtrx_samtools_reticulate) # ## ----fig.width=5,fig.height=3------------------------------------------------- # loading precalculated depth matrix data(mtrx_samtools_reticulate) # threshold th = 50 # histogram with adjustable thresholds for custom function depth_hist(mtrx_samtools_reticulate,th=th,smry_fun=max) depth_hist(mtrx_samtools_reticulate,th=th,smry_fun=quantile,prob=0.75) # filtered matrix base_resol_depth = filt_samples(mtrx_samtools_reticulate,th=th,smry_fun=max) print(base_resol_depth[1:4,1:4]) ## ----echo=TRUE, eval=FALSE---------------------------------------------------- # system.time({ # result = run_ELViS( # X = base_resol_depth # ,N_cores=N_cores # ,reduced_output=TRUE # ) # # }) # # ELViS_toy_run_result = result # use_data(ELViS_toy_run_result) # # # 4min for 120 samples using 10 threads ## ----fig.width=7,fig.height=5------------------------------------------------- # ELViS run result data(ELViS_toy_run_result) result = ELViS_toy_run_result # Directory where figures will be saved figure_dir = glue("{analysis_dir}/figures") dir.create(figure_dir) # give the gff3 file of the virus of your interest. Sequence name or chromosome name should match with that in the reference genome FASTA file. gff3_fn = system.file("extdata","HPV16REF_PaVE.gff",package = package_name) ## ----fig.width=7,fig.height=5------------------------------------------------- # Plotting raw depth profile gg_lst_x = plot_pileUp_multisample( result = result, X_raw = base_resol_depth, plot_target = "x", gff3 = gff3_fn, baseline=1, exclude_genes = c("E6*","E1^E4","E8^E2"), ) # Save to pdf file, set SKIP = FALSE if you want to save as pdf SKIP = TRUE if(!SKIP){ pdf(glue("{figure_dir}/Raw_Depth_CNV_call.pdf"),height=4,width=6) gg_lst_x dev.off() } # an example of raw read depth line plot print(gg_lst_x[[1]]) ## ----fig.width=7,fig.height=5------------------------------------------------- # set the longest segment as a new baseline new_baseline = get_new_baseline(result,mode="longest") # Plotting raw depth profile with new baseline gg_lst_x = plot_pileUp_multisample( result = result, X_raw = base_resol_depth, plot_target = "x", gff3 = gff3_fn, baseline=new_baseline, exclude_genes = c("E6*","E1^E4","E8^E2"), ) # Save to pdf file, set SKIP = FALSE if you want to save as pdf SKIP = TRUE if(!SKIP){ # Save to pdf file pdf("figures/Raw_Depth_new_baseline_CNV_call.pdf",height=4,width=6) gg_lst_x dev.off() } # an example of raw read depth line plot with new baseline gg_lst_x[[1]] ## ----fig.width=7,fig.height=5------------------------------------------------- # Plotting normalized depth profile gg_lst_y = plot_pileUp_multisample( result = result, X_raw = base_resol_depth, plot_target = "y", gff3 = gff3_fn, baseline=new_baseline, exclude_genes = c("E6*","E1^E4","E8^E2"), ) # Save to pdf file SKIP = TRUE if(!SKIP){ pdf("figures/Normalized_Depth_CNV_call.pdf",height=4,width=6) gg_lst_y dev.off() } # an example of normalized read depth line plot with new baseline gg_lst_y[[1]] ## ----fig.width=7,fig.height=5------------------------------------------------- # Plotting robust Z-score profile gg_lst_z = plot_pileUp_multisample( result = result, X_raw = base_resol_depth, plot_target = "z", gff3 = gff3_fn, baseline=new_baseline, exclude_genes = c("E6*","E1^E4","E8^E2") ) SKIP = TRUE if(!SKIP){ # Save to pdf file pdf("figures/Robust-Z-score_CNV_call.pdf",height=4,width=6) gg_lst_z dev.off() } # an example of Z-score line plot with new baseline gg_lst_z[[1]] ## ----eval=FALSE, include=FALSE,echo = FALSE----------------------------------- # set.seed(54374373); total_aligned_base__host_and_virus = # c( # sample( (4:6)*(10^8),80,replace = TRUE), # sample( (7:10)*(10^8),20,replace = TRUE), # sample( (1:3)*(10^8),20,replace = TRUE) # ) # # use_data(total_aligned_base__host_and_virus,overwrite = TRUE) ## ----fig.width=5,fig.height=4------------------------------------------------- data(total_aligned_base__host_and_virus) viral_load = (10^6)*(apply(base_resol_depth,2,\(x) sum(x)) )/total_aligned_base__host_and_virus # distribtuion of overall viral load viral_load %>%log10 %>% hist ## ----------------------------------------------------------------------------- exclude_genes = c("E6*","E1^E4","E8^E2") integ_ht_result = integrative_heatmap( X_raw = base_resol_depth, result = result, gff3_fn = gff3_fn, exclude_genes = exclude_genes, baseline=1, total_aligned_base__host_and_virus = total_aligned_base__host_and_virus ) # top annotation top_ant = HeatmapAnnotation( `Log2 Overall\nViral Load` = anno_points(log2(viral_load)), annotation_name_side = "left",annotation_name_rot=0) ## ----------------------------------------------------------------------------- gene_ref="E7" gene_cn = gene_cn_heatmaps( X_raw = base_resol_depth, result = result, gff3_fn = gff3_fn, baseline = new_baseline, gene_ref = gene_ref, exclude_genes = exclude_genes ) ## ----fig.width=7,fig.height=8------------------------------------------------- draw(top_ant %v% integ_ht_result$Heatmap %v% gene_cn$Heatmaps$intact_gene_cn %v% gene_cn$Heatmaps$rel_dosage) ## ----------------------------------------------------------------------------- sessionInfo()