## ----showcase, fig.cap="Example of ggseqalign visualization. Showcase of the package's capability to highlight differences between 2000 bp long DNA sequences.", echo=FALSE, warning=FALSE, message=FALSE---- ### This chunk dynamically creates fig:showcase but is hidden in the vignette. # It is meant to give an initial impression of a real use case without # presenting overwhelming code at the start. This chunk is recreated in # 'ggplot-mod' in its entirety aside from the global figure output setting. library(Biostrings) library(ggseqalign) library(ggplot2) knitr::opts_chunk$set(fig.dim = c(6, 4)) dna <- readDNAStringSet(system.file("extdata", "dm3_upstream2000.fa.gz", package = "Biostrings" )) q <- dna[2:4] s <- dna[5] q[1] <- as( replaceLetterAt(q[[1]], c(5, 200, 400), "AGC"), "DNAStringSet" ) q[2] <- as( c(substr(q[[2]], 300, 1500), substr(q[[2]], 1800, 2000)), "DNAStringSet" ) q[3] <- as( replaceAt( q[[3]], 1500, paste(rep("A", 1000), collapse = "") ), "DNAStringSet" ) names(q) <- c("mismatches", "deletions", "insertion") names(s) <- "reference" plot_sequence_alignment(alignment_table(q, s)) + theme(text = element_text(size = 15)) ## ----installbioc, eval = FALSE------------------------------------------------ # if (!requireNamespace("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # BiocManager::install(version = "devel") # BiocManager::valid() # checks for out of date packages # BiocManager::install("ggseqalign") ## ----installgit, eval = FALSE------------------------------------------------- # devtools::install_git("https://github.com/simeross/ggseqalign.git") ## ----minimal-example, fig.cap="Output of the minimal example code", warning=FALSE---- library(ggseqalign) library(ggplot2) query_strings <- (c("boo", "fibububuzz", "bozz", "baofuzz")) subject_string <- "boofizz" alignment <- alignment_table(query_strings, subject_string) plot_sequence_alignment(alignment) + theme(text = element_text(size = 15)) ## ----read-in-fasta, message= FALSE, warning=FALSE----------------------------- library(ggseqalign) library(Biostrings) library(ggplot2) query_sequences <- Biostrings::readDNAStringSet(system.file("extdata", "fastaEx.fa", package = "Biostrings" )) subject_sequence <- DNAStringSet(paste0("AAACGATCGATCGTAGTCGACTGATGT", "AGTATATACGTCGTACGTAGCATCGTC", "AGTTACTGCATGCCGG")) alignment <- alignment_table(query_sequences, subject_sequence) plot_sequence_alignment(alignment) + theme(text = element_text(size = 15)) ## ----noisefig, fig.cap="Example of a case where ggseqalign fails. If there are too many differences, the mismatches overlap each other and become noisy.", echo=TRUE, warning=FALSE---- # load dna <- Biostrings::readDNAStringSet(system.file("extdata", "dm3_upstream2000.fa.gz", package = "Biostrings" )) q <- as( c(substr(dna[[1]], 100, 300)), "DNAStringSet" ) s <- as( c(substr(dna[[2]], 100, 300)), "DNAStringSet" ) names(q) <- c("noisy alignment") names(s) <- "reference" plot_sequence_alignment(alignment_table(q, s)) + theme(text = element_text(size = 15)) ## ----noisefignolab, fig.cap="Hiding mismatches. Hiding character mismatches reduces visual noise if alignments have many character mismatches and preserves structural information.", echo=TRUE, warning=FALSE---- plot_sequence_alignment(alignment_table(q, s), hide_mismatches = TRUE) + theme(text = element_text(size = 15)) ## ----ggplot-mod, fig.cap="Styling with ggplot2. In this example, text size was increased, axis labels were added, x-axis text rotated and the color scheme changed.", warning=FALSE---- library(ggseqalign) library(ggplot2) library(Biostrings) dna <- readDNAStringSet(system.file("extdata", "dm3_upstream2000.fa.gz", package = "Biostrings" )) q <- dna[2:4] s <- dna[5] q[1] <- as( replaceLetterAt(q[[1]], c(5, 200, 400), "AGC"), "DNAStringSet" ) q[2] <- as( c(substr(q[[2]], 300, 1500), substr(q[[2]], 1800, 2000)), "DNAStringSet" ) q[3] <- as( replaceAt( q[[3]], 1500, paste(rep("A", 1000), collapse = "") ), "DNAStringSet" ) names(q) <- c("mismatches", "deletions", "insertion") names(s) <- "reference" pl <- plot_sequence_alignment(alignment_table(q, s)) pl <- pl + ylab("Sequence variants") + xlab("Length in bp") + scale_color_viridis_d() + theme( text = element_text(size = 20), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), axis.title = element_text() ) pl ## ----ggplot-layer-mod, fig.cap="Modifying ggplot2 layers. In this example, deletion bars were adjusted to match background color and mismatch indicators were modified using plot layer modification", warning=FALSE---- # Define background color bg <- "grey90" # Change plot background pl <- pl + theme(panel.background = element_rect( fill = bg, colour = bg )) # Match deletion to background pl$layers[[2]]$aes_params$colour <- bg # Increase mismatch indicator size and change shape pl$layers[[8]]$aes_params$size <- 2 pl$layers[[8]]$aes_params$shape <- 4 pl$layers[[8]]$aes_params$colour <- "black" pl ## ----minimal-example-mod, fig.cap="Modified alignment parameters.", warning=FALSE---- library(ggseqalign) library(ggplot2) query_strings <- (c("boo", "fibububuzz", "bozz", "baofuzz")) subject_string <- "boofizz" alignment <- alignment_table(query_strings, subject_string, gapOpening = 20) plot_sequence_alignment(alignment) + theme(text = element_text(size = 15)) ## ----session------------------------------------------------------------------ sessionInfo()