## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, out.width = "80%", fig.align = "center", echo = TRUE, crop = NULL # suppress "The magick package is required to crop" issue ) library(BiocStyle) ## ----eval=FALSE--------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # BiocManager::install("peakCombiner") ## ----eval=TRUE---------------------------------------------------------------- library("peakCombiner") library("ggplot2") ## ----eval=TRUE---------------------------------------------------------------- utils::data("syn_data_tibble") ## ----eval=TRUE---------------------------------------------------------------- data_prepared <- prepareInputRegions( data = syn_data_tibble, outputFormat = "tibble", showMessages = FALSE ) centerExpandRegions( data = data_prepared, centerBy = "center_column", expandBy = NULL, genome = NA, trim_start = TRUE, outputFormat = "GenomicRanges", showMessages = FALSE ) data_center_expand <- centerExpandRegions( data = data_prepared, centerBy = "center_column", expandBy = NULL, genome = NA, trim_start = TRUE, outputFormat = "tibble", showMessages = FALSE ) data_filtered <- filterRegions( data = data_center_expand, includeByChromosomeName = NULL, excludeByBlacklist = NULL, includeAboveScoreCutoff = NULL, includeTopNScoring = NULL, outputFormat = "tibble", showMessages = FALSE ) consensus_peak <- combineRegions( data = data_filtered, foundInSamples = 2, combinedCenter = "nearest", annotateWithInputNames = FALSE, combinedSampleName = "my_new_sample_name", outputFormat = "tibble", showMessages = FALSE ) consensus_final <- centerExpandRegions( data = consensus_peak, expandBy = 350, outputFormat = "tibble", showMessages = FALSE ) consensus_final ## ----eval=FALSE--------------------------------------------------------------- # rtracklayer::export.bed(consensus_final, paste0(here::here(), "/lists/consensus_regions.bed"), format = "bed") ## ----eval=TRUE---------------------------------------------------------------- utils::data("syn_sample_sheet", package = "peakCombiner") syn_sample_sheet ## ----eval=TRUE---------------------------------------------------------------- utils::data("syn_data_control01", package = "peakCombiner") syn_data_control01 ## ----eval=TRUE---------------------------------------------------------------- utils::data("syn_data_treatment01", package = "peakCombiner") syn_data_treatment01 ## ----eval=TRUE---------------------------------------------------------------- combined_input <- syn_data_control01 |> dplyr::mutate(sample_name = "control-rep1") |> rbind(syn_data_treatment01 |> dplyr::mutate(sample_name = "treatment-rep1")) combined_input |> dplyr::group_by(sample_name) |> dplyr::summarize(num_regions = dplyr::n()) ## ----eval=TRUE---------------------------------------------------------------- prepareInputRegions( data = combined_input, outputFormat = "tibble", startsAreBased = 1, showMessages = FALSE ) ## ----eval=TRUE---------------------------------------------------------------- prepareInputRegions( data = combined_input, outputFormat = "tibble", startsAreBased = 0, showMessages = FALSE ) ## ----eval=FALSE--------------------------------------------------------------- # file_names ## ----eval=TRUE---------------------------------------------------------------- utils::data("syn_sample_sheet", package = "peakCombiner") sample_sheet <- syn_sample_sheet sample_sheet ## ----eval = FALSE------------------------------------------------------------- # prepareInputRegions( # data = sample_sheet, # startsAreBased = 0, # showMessages = FALSE # ) ## ----eval=TRUE---------------------------------------------------------------- utils::data(syn_control_rep1_narrowPeak) syn_control_rep1_narrowPeak ## ----eval=TRUE---------------------------------------------------------------- utils::data(syn_treatment_rep1_narrowPeak) syn_treatment_rep1_narrowPeak ## ----eval=TRUE---------------------------------------------------------------- control <- syn_control_rep1_narrowPeak |> dplyr::mutate(sample_name = "control-rep1") control ## ----eval=TRUE---------------------------------------------------------------- treatment <- syn_treatment_rep1_narrowPeak |> dplyr::mutate(sample_name = "treatment-rep1") treatment ## ----eval=TRUE---------------------------------------------------------------- combined_input <- control |> rbind(treatment) combined_input ## ----eval=TRUE---------------------------------------------------------------- combined_input |> dplyr::group_by(sample_name) |> dplyr::count(name = "number_of_entries") ## ----eval=TRUE---------------------------------------------------------------- prepareInputRegions( data = combined_input, outputFormat = "tibble", startsAreBased = 1, showMessages = FALSE ) ## ----eval=TRUE---------------------------------------------------------------- utils::data("syn_data_granges") syn_data_granges ## ----eval=TRUE---------------------------------------------------------------- GenomicRanges_data <- GenomicRanges::makeGRangesFromDataFrame( syn_data_granges, keep.extra.columns = TRUE ) GenomicRanges_data ## ----eval=TRUE---------------------------------------------------------------- prepareInputRegions( data = GenomicRanges_data, outputFormat = "GenomicRanges", showMessages = FALSE ) ## ----eval=TRUE---------------------------------------------------------------- utils::data("syn_data_bed") syn_data_bed |> dplyr::arrange(sample_name) ## ----eval=TRUE---------------------------------------------------------------- syn_data_bed |> dplyr::group_by(sample_name) |> dplyr::summarize(num_regions = dplyr::n()) ## ----eval=TRUE---------------------------------------------------------------- prepareInputRegions( data = syn_data_bed, outputFormat = "data.frame", startsAreBased = 0, showMessages = FALSE ) ## ----eval=TRUE---------------------------------------------------------------- centerExpandRegions( data = data_prepared, centerBy = "center_column", expandBy = NULL, outputFormat = "tibble", showMessage = FALSE ) ## ----eval=TRUE---------------------------------------------------------------- centerExpandRegions( data = data_prepared, centerBy = "center_column", expandBy = NULL, outputFormat = "tibble", showMessages = TRUE ) ## ----eval=TRUE---------------------------------------------------------------- centerExpandRegions( data = data_prepared, centerBy = "center_column", expandBy = c(500), outputFormat = "tibble", showMessages = FALSE ) ## ----eval=TRUE---------------------------------------------------------------- centerExpandRegions( data = data_prepared, centerBy = "center_column", expandBy = c(100, 1000), outputFormat = "tibble", showMessages = FALSE ) ## ----eval=TRUE---------------------------------------------------------------- backlist <- tibble::tibble(chrom = c("chr1"), start = c(100), end = c(1000)) backlist ## ----eval=TRUE---------------------------------------------------------------- data_filtered <- filterRegions( data = data_center_expand, excludeByBlacklist = backlist, outputFormat = "tibble", showMessages = FALSE ) data_filtered ## ----eval=TRUE---------------------------------------------------------------- filterRegions( data = data_center_expand, includeByChromosomeName = c("chr1", "chr2", "chr4"), excludeByBlacklist = backlist, includeAboveScoreCutoff = 2.5, includeTopNScoring = 6, outputFormat = "tibble", showMessages = TRUE ) ## ----eval=TRUE---------------------------------------------------------------- input_chrom <- data_center_expand |> dplyr::select(chrom) |> unique() input_chrom ## ----eval=TRUE---------------------------------------------------------------- include_chrom <- input_chrom |> dplyr::filter(grepl("^chr[0-9]$|^chr[1-2][0-9]$|^chr[XYM]", chrom)) |> dplyr::pull(chrom) |> unique() |> sort() include_chrom ## ----eval=TRUE---------------------------------------------------------------- data_filtered_chr <- filterRegions( data = data_center_expand, includeByChromosomeName = include_chrom, excludeByBlacklist = NULL, includeAboveScoreCutoff = NULL, includeTopNScoring = NULL, outputFormat = "tibble", showMessages = FALSE ) data_filtered_chr ## ----eval=TRUE---------------------------------------------------------------- data_center_expand |> dplyr::group_by(sample_name) |> dplyr::summarize(num_regions = dplyr::n()) ## ----eval=TRUE---------------------------------------------------------------- data_filtered_chr |> dplyr::group_by(chrom) |> dplyr::summarize(num_regions = dplyr::n()) ## ----download_blacklist_bed, eval=FALSE--------------------------------------- # # Download the blacklist BED file from ENCODE and save it as "blacklist_human.bed" # download.file( # url = "https://www.encodeproject.org/files/ENCFF356LFX/@@download/ENCFF356LFX.bed.gz", # destfile = "blacklist_human.bed.gz", # mode = "wb" # ) # # # # Import the BED file # blacklist_granges <- readr::read_tsv("blacklist_human.bed.gz", col_names = c("chrom", "start", "end")) # blacklist_granges ## ----apply_downloaded_blacklist_bed, eval=FALSE------------------------------- # filterRegions( # data = data_center_expand, # excludeByBlacklist = blacklist_granges, # showMessages = FALSE) ## ----------------------------------------------------------------------------- # Load library (AnnotationHub would need to be added to DESCRIPTION under Suggests if used in the vignette) library(AnnotationHub) # Get annotation hub handle (will download index if called for the first time) ah <- AnnotationHub::AnnotationHub() # List ENCODE related data providers unique(grep("ENCODE", ah$dataprovider, value = TRUE)) # Query for "blacklist" dm <- AnnotationHub::query(ah, "blacklist") dm # get specific object (will be downloaded if called for the first time) blacklist_ah <- dm[["AH107343"]] # get info for a specific object AnnotationHub::recordStatus(ah, "AH107343") AnnotationHub::getInfoOnIds(ah, "AH107343") mcols(dm)["AH107343",] mcols(dm)["AH107343","description"] ## ----view_black_list_annotationhub, eval=FALSE-------------------------------- # blacklist_ah ## ----apply_annotationhub_blacklist, eval=FALSE-------------------------------- # filterRegions( # data = data_center_expand, # excludeByBlacklist = blacklist_ah, # showMessages = FALSE) ## ----eval=TRUE---------------------------------------------------------------- data_center_expand |> ggplot2::ggplot(ggplot2::aes(x = score)) + ggplot2::geom_histogram(binwidth = 10) + ggplot2::xlim(0,NA) ## ----eval=TRUE---------------------------------------------------------------- data_filtered_cutoff <- filterRegions( data = data_center_expand, includeAboveScoreCutoff = 35, outputFormat = "tibble", showMessages = FALSE ) ## ----eval=TRUE---------------------------------------------------------------- dim(data_center_expand) dim(data_filtered_cutoff) ## ----eval=TRUE---------------------------------------------------------------- range(data_center_expand |> dplyr::pull(score)) range(data_filtered_cutoff |> dplyr::pull(score)) ## ----eval=TRUE---------------------------------------------------------------- data_filtered_cutoff |> ggplot2::ggplot(ggplot2::aes(x = score)) + ggplot2::geom_histogram(binwidth = 10) + ggplot2::xlim(0, NA) ## ----eval=TRUE---------------------------------------------------------------- data_center_expand |> dplyr::group_by(sample_name) |> dplyr::summarize(num_regions = dplyr::n()) filterRegions( data = data_center_expand, includeTopNScoring = 8, outputFormat = "tibble", showMessages = FALSE ) |> dplyr::group_by(sample_name) |> dplyr::summarize(num_regions = dplyr::n()) ## ----eval=TRUE---------------------------------------------------------------- combineRegions( data = data_filtered, foundInSamples = 2, combinedCenter = "nearest", annotateWithInputNames = FALSE, combinedSampleName = "my_new_sample_name", outputFormat = "tibble", showMessage = FALSE ) ## ----eval=TRUE---------------------------------------------------------------- consensus_peak_list <- combineRegions( data = data_filtered, foundInSamples = 2, combinedCenter = "nearest", annotateWithInputNames = TRUE, combinedSampleName = "consensus", outputFormat = "tibble", showMessages = TRUE ) consensus_peak_list ## ----eval=TRUE---------------------------------------------------------------- combineRegions( data = data_filtered, foundInSamples = 2, outputFormat = "tibble", showMessages = FALSE, combinedSampleName = "foundInSamples_2_example" ) ## ----eval=TRUE---------------------------------------------------------------- combineRegions( data = data_filtered, foundInSamples = 1, outputFormat = "tibble", showMessages = FALSE, combinedSampleName = "foundInSamples_1_example" ) ## ----eval=TRUE---------------------------------------------------------------- combineRegions( data = data_filtered, combinedCenter = "strongest", outputFormat = "tibble", showMessages = FALSE ) |> dplyr::select("center", "score") ## ----eval=TRUE---------------------------------------------------------------- combineRegions( data = data_filtered, combinedCenter = "middle", outputFormat = "tibble", showMessages = FALSE ) |> dplyr::select("center", "score") ## ----eval=TRUE---------------------------------------------------------------- combineRegions( data = data_filtered, combinedCenter = "nearest", outputFormat = "tibble", showMessages = FALSE ) |> dplyr::select("center", "score") ## ----session, eval=TRUE------------------------------------------------------- sessionInfo()