## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----install_dependencies, eval = FALSE--------------------------------------- # install.packages(c( # "data.table", # "seqinr", # "openxlsx", # "dplyr" # )) # if (!require("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # BiocManager::install(c( # "IRanges", # "GenomicRanges", # "GenomicAlignments", # "Rsamtools", # "Biostrings", # "GenomeInfoDb", # "BSgenome", # "rtracklayer" # )) ## ----install_PICB, eval = FALSE----------------------------------------------- # BiocManager::install("PICB") ## ----install_PICB_dev, eval = FALSE------------------------------------------- # devtools::install_github("HaaseLab/PICB") ## ----load_PICB---------------------------------------------------------------- library(PICB) ## ----load_BSgenome1, results="hide", message=FALSE---------------------------- # Example for Drosophila melanogaster (make sure to install the package first), replace with your own genome library(BSgenome.Dmelanogaster.UCSC.dm6) myGenome <- "BSgenome.Dmelanogaster.UCSC.dm6" ## ----load_myGenome2----------------------------------------------------------- myGenome <- GenomeInfoDb::Seqinfo( seqnames = c("chr2L", "chr2R", "chr3L", "chr3R", "chr4", "chrX", "chrY"), seqlengths = c(23513712, 25286936, 28110227, 32079331, 1348131, 23542271, 3667352) ) ## ----load_myGenome3----------------------------------------------------------- myGenome <- GenomeInfoDb::Seqinfo(genome = "dm6") ## ----load_myGenome4, eval = FALSE--------------------------------------------- # myGenome <- PICBloadfasta(FASTA.NAME = "/path/to/your/genome.fa") ## ----load_alignments---------------------------------------------------------- # demonstration bam_file is in the "inst/extdata/" folder of the PICB package bam_file <- system.file("extdata", "Fly_Ov1_chr2L_20To21mb_filtered.bam", package = "PICB") myAlignments <- PICBload( BAMFILE = bam_file, REFERENCE.GENOME = myGenome ) ## ----build_clusters----------------------------------------------------------- myClusters <- PICBbuild( IN.ALIGNMENTS = myAlignments, REFERENCE.GENOME = myGenome, LIBRARY.SIZE = 12799826 #usually not necessary )$clusters ## ----optimize_parameters------------------------------------------------------ parameterExploration <- PICBoptimize( IN.ALIGNMENTS = myAlignments, REFERENCE.GENOME = myGenome, MIN.UNIQUE.ALIGNMENTS.PER.WINDOW = c(1, 2, 3, 4, 5), LIBRARY.SIZE = 12799826, #usually not necessary VERBOSITY = 1 ) ## ----example_specify_parameter------------------------------------------------ x_column <- "MIN.UNIQUE.ALIGNMENTS.PER.WINDOW" # change parameter to optimize, if applicable ## ----example_plot_optimize_parameters, message=FALSE-------------------------- library(ggplot2) # Determine a scaling factor for the secondary axis scaling_factor <- max(parameterExploration$fraction.of.library.explained.by.clusters) / max(parameterExploration$fraction.of.genome.space.clusters) # plot graph ggplot(parameterExploration, aes(x = .data[[x_column]])) + geom_line(aes(y = fraction.of.library.explained.by.clusters * 100, color = "piRNAs Explained"), linewidth = 1) + geom_point(aes(y = fraction.of.library.explained.by.clusters * 100, color = "piRNAs Explained"), size = 3) + geom_line(aes(y = fraction.of.genome.space.clusters * scaling_factor * 100, color = "Genome Space"), linewidth = 1) + geom_point(aes(y = fraction.of.genome.space.clusters * scaling_factor * 100, color = "Genome Space"), size = 3) + scale_y_continuous(name = "piRNAs Explained by Clusters (%, piRNA sample)", sec.axis = sec_axis(~ . / scaling_factor, name = "Total piRNA cluster-length (Genome, %)")) + scale_x_reverse(name = paste0("Parameter chosen: ", x_column), breaks = parameterExploration[[x_column]], labels = parameterExploration[[x_column]]) + scale_color_manual(name = "Metrics", values = c("piRNAs Explained" = "#00a100", "Genome Space" = "black")) + theme_classic() + theme(axis.title.y.left = element_text(color = "#00a100"), axis.title.y.right = element_text(color = "black"), legend.position = "top") ## ----strand_specific_analysis------------------------------------------------- myClustersWithStrandAnalysis <- PICBstrandanalysis( IN.ALIGNMENTS = myAlignments, IN.RANGES = myClusters ) ## ----output_clusters---------------------------------------------------------- myClustersWithStrandAnalysis ## ----export_results----------------------------------------------------------- PICBexporttoexcel( IN.RANGES = myClustersWithStrandAnalysis, EXCEL.FILE.NAME = "myClusters_demonstration.xlsx" ) ## ----import_results----------------------------------------------------------- myClustersWithStrandAnalysis <- PICBimportfromexcel(EXCEL.FILE.NAME = "myClusters_demonstration.xlsx") ## ----SessionInfo-------------------------------------------------------------- sessionInfo()