Genomic ranges of problematic genomic regions that should be avoided when working with genomic data. For human, mouse, and selected model organisms.
TL;DR - For human hg38 genome assembly, Anshul recommends ENCFF356LFX exclusion list regions.
BED files of exclusion regions are available on the ENCODE project website (Amemiya, Kundaje, and Boyle 2019). Human (hg19, hg38) and mouse (mm9, mm10) exclusion regions are available. However, exclusion lists generated by multiple labs often create uncertainty what to use. The purpose of this package is to provide a unified place for informed retrieval of exclusion regions.
Naming convention: <genome assembly>.<lab>.<original file name>, e.g.,
hg19.Birney.wgEncodeDacMapabilityConsensusExcludable.
See make-data.R how to create the excluderanges GRanges objects.
excluderangesif (!requireNamespace("BiocManager", quietly = TRUE)) {
    install.packages("BiocManager")
}
# Install the development version of Bioconductor (need 3.14 and above)
# BiocManager::install(version = "devel")
# Check that you have a valid Bioconductor installation
# BiocManager::valid()
# Install the package
BiocManager::install("excluderanges", version = "devel")
# BiocManager::install("mdozmorov/excluderanges")Get an overview of what’s available
suppressMessages(library(AnnotationHub))
ah <- AnnotationHub()
#> snapshotDate(): 2021-10-18
query_data <- query(ah, "excluderanges")
query_data
#> AnnotationHub with 42 records
#> # snapshotDate(): 2021-10-18
#> # $dataprovider: UCSC, ENCODE, mitra.stanford.edu/kundaje/akundaje/release/b...
#> # $species: Homo sapiens, Mus musculus, Drosophila melanogaster, Caenorhabdi...
#> # $rdataclass: GRanges
#> # additional mcols(): taxonomyid, genome, description,
#> #   coordinate_1_based, maintainer, rdatadateadded, preparerclass, tags,
#> #   rdatapath, sourceurl, sourcetype 
#> # retrieve records with, e.g., 'object[["AH95908"]]' 
#> 
#>             title                                                    
#>   AH95908 | ce10.Kundaje.ce10-Excludable.rds                         
#>   AH95909 | dm3.Kundaje.dm3-Excludable.rds                           
#>   AH95910 | hg19.Bernstein.Mint_Excludable_hg19.rds                  
#>   AH95911 | hg19.Birney.wgEncodeDacMapabilityConsensusExcludable.rds 
#>   AH95912 | hg19.Crawford.wgEncodeDukeMapabilityRegionsExcludable.rds
#>   ...       ...                                                      
#>   AH95945 | mm10.UCSC.telomere.rds                                   
#>   AH95946 | mm9.UCSC.centromere.rds                                  
#>   AH95947 | mm9.UCSC.contig.rds                                      
#>   AH95948 | mm9.UCSC.fragment.rds                                    
#>   AH95949 | mm10.UCSC.scaffold.rdshg38 excluderanges coordinates recommended by Anshul
# Check titles
# as.data.frame(mcols(query_data[1:10])["title"]) 
excludeGR.hg38.Kundaje.1 <- query_data[["AH95917"]]
#> downloading 1 resources
#> retrieving 1 resource
#> loading from cache
excludeGR.hg38.Kundaje.1
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: S4Vectors
#> 
#> Attaching package: 'S4Vectors'
#> The following objects are masked from 'package:base':
#> 
#>     I, expand.grid, unname
#> Loading required package: IRanges
#> Loading required package: GenomeInfoDb
#> GRanges object with 910 ranges and 0 metadata columns:
#>         seqnames            ranges strand
#>            <Rle>         <IRanges>  <Rle>
#>     [1]     chr1     628903-635104      *
#>     [2]     chr1   5850087-5850571      *
#>     [3]     chr1   8909610-8910014      *
#>     [4]     chr1   9574580-9574997      *
#>     [5]     chr1 32043823-32044203      *
#>     ...      ...               ...    ...
#>   [906]     chrY 11290797-11334278      *
#>   [907]     chrY 11493053-11592850      *
#>   [908]     chrY 11671014-11671046      *
#>   [909]     chrY 11721528-11749472      *
#>   [910]     chrY 56694632-56889743      *
#>   -------
#>   seqinfo: 24 sequences from hg38 genomeSave the data in a BED file, if needed.
rtracklayer::export(excludeGR.hg38.Kundaje.1, "hg38.Kundaje.GRCh38_unified_Excludable.bed", format = "bed")We can load other excludable regions for the hg38 genome assembly and compare them.
query_data <- query(ah, c("excluderanges", "hg38", "Exclusion regions"))
query_data
#> AnnotationHub with 6 records
#> # snapshotDate(): 2021-10-18
#> # $dataprovider: ENCODE
#> # $species: Homo sapiens
#> # $rdataclass: GRanges
#> # additional mcols(): taxonomyid, genome, description,
#> #   coordinate_1_based, maintainer, rdatadateadded, preparerclass, tags,
#> #   rdatapath, sourceurl, sourcetype 
#> # retrieve records with, e.g., 'object[["AH95915"]]' 
#> 
#>             title                                                       
#>   AH95915 | hg38.Bernstein.Mint_Excludable_GRCh38.rds                   
#>   AH95916 | hg38.Kundaje.GRCh38.Excludable.rds                          
#>   AH95917 | hg38.Kundaje.GRCh38_unified_Excludable.rds                  
#>   AH95918 | hg38.Reddy.wgEncodeDacMapabilityConsensusExcludable.hg38.rds
#>   AH95919 | hg38.Wold.hg38mitoExcludable.rds                            
#>   AH95920 | hg38.Yeo.eCLIP_Excludableregions.hg38liftover.bed.fixed.rds
excludeGR.hg38.Bernstein <- query_data[["AH95915"]]
#> downloading 1 resources
#> retrieving 1 resource
#> loading from cache
excludeGR.hg38.Kundaje.2 <- query_data[["AH95916"]]
#> downloading 1 resources
#> retrieving 1 resource
#> loading from cache
excludeGR.hg38.Reddy     <- query_data[["AH95918"]]
#> downloading 1 resources
#> retrieving 1 resource
#> loading from cache
excludeGR.hg38.Wold      <- query_data[["AH95919"]]
#> downloading 1 resources
#> retrieving 1 resource
#> loading from cache
excludeGR.hg38.Yeo       <- query_data[["AH95920"]]
#> downloading 1 resources
#> retrieving 1 resource
#> loading from cacheCompare the number of excludable regions.
library(ggplot2)
mtx_to_plot <- data.frame(Count = c(length(excludeGR.hg38.Bernstein), 
                                    length(excludeGR.hg38.Kundaje.1), 
                                    length(excludeGR.hg38.Kundaje.2), 
                                    length(excludeGR.hg38.Reddy), 
                                    length(excludeGR.hg38.Wold), 
                                    length(excludeGR.hg38.Yeo)),
                          Source = c("Bernstein.Mint_Excludable_GRCh38", 
                                     "Kundaje.GRCh38_unified_Excludable", 
                                     "Kundaje.GRCh38.Excludable", 
                                     "Reddy.wgEncodeDacMapabilityConsensusExcludable", 
                                     "Wold.hg38mitoExcludable", 
                                     "Yeo.eCLIP_Excludableregions.hg38liftover.bed"))
# Order Source by the number of regions
mtx_to_plot$Source <- factor(mtx_to_plot$Source, levels = mtx_to_plot$Source[order(mtx_to_plot$Count)])
ggplot(mtx_to_plot, aes(x = Source, y = Count, fill = Source)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  theme_bw() + theme(legend.position = "none")# ggsave("man/figures/excluderanges_hg38_count.png", width = 5.5, height = 2)Compare the width of excludable regions. log2 scale because of heavy right tail distributions.
library(ggridges)
mtx_to_plot <- data.frame(Width = c(width(excludeGR.hg38.Bernstein), 
                                    width(excludeGR.hg38.Kundaje.1), 
                                    width(excludeGR.hg38.Kundaje.2), 
                                    width(excludeGR.hg38.Reddy), 
                                    width(excludeGR.hg38.Wold), 
                                    width(excludeGR.hg38.Yeo)),
                          Source = c(rep("Bernstein.Mint_Excludable_GRCh38", length(excludeGR.hg38.Bernstein)),
                                     rep("Kundaje.GRCh38_unified_Excludable", length(excludeGR.hg38.Kundaje.1)),
                                     rep("Kundaje.GRCh38.Excludable", length(excludeGR.hg38.Kundaje.2)),
                                     rep("Reddy.wgEncodeDacMapabilityConsensusExcludable", length(excludeGR.hg38.Reddy)),
                                     rep("Wold.hg38mitoExcludable", length(excludeGR.hg38.Wold)),
                                     rep("Yeo.eCLIP_Excludableregions.hg38liftover.bed", length(excludeGR.hg38.Yeo))))
ggplot(mtx_to_plot, aes(x = log2(Width), y = Source, fill = Source)) +
  geom_density_ridges() +
  theme_bw() + theme(legend.position = "none")
#> Picking joint bandwidth of 0.372# ggsave("man/figures/excluderanges_hg38_width.png", width = 5.5, height = 2)We can investigate the total width of each set of excludable ranges.
mtx_to_plot <- data.frame(TotalWidth = c(sum(width(excludeGR.hg38.Bernstein)), 
                                         sum(width(excludeGR.hg38.Kundaje.1)), 
                                         sum(width(excludeGR.hg38.Kundaje.2)), 
                                         sum(width(excludeGR.hg38.Reddy)), 
                                         sum(width(excludeGR.hg38.Wold)), 
                                         sum(width(excludeGR.hg38.Yeo))), 
                          Source = c("Bernstein.Mint_Excludable_GRCh38", 
                                     "Kundaje.GRCh38_unified_Excludable", 
                                     "Kundaje.GRCh38.Excludable", 
                                     "Reddy.wgEncodeDacMapabilityConsensusExcludable", 
                                     "Wold.hg38mitoExcludable", 
                                     "Yeo.eCLIP_Excludableregions.hg38liftover"))
ggplot(mtx_to_plot, aes(x = TotalWidth, y = Source, fill = Source)) + 
  geom_bar(stat="identity") + scale_x_log10() + scale_y_discrete(label=abbreviate) +
  xlab("log10 total width")# ggsave("man/figures/excluderanges_hg38_sumwidth.png", width = 6.5, height = 2)We can compare Jaccard overlap between those sets of excludable regions.
library(pheatmap)
library(stringr)
# Jaccard calculations
jaccard <- function(gr_a, gr_b) {
  intersects <- GenomicRanges::intersect(gr_a, gr_b, ignore.strand = TRUE)
  intersection <- sum(width(intersects))
  union <- sum(width(GenomicRanges::union(gr_a, gr_b, ignore.strand = TRUE)))
  DataFrame(intersection, union, 
            jaccard = intersection/union,
             n_intersections = length(intersects))
}
# List and names of all excludable regions
all_excludeGR_list <- list(excludeGR.hg38.Bernstein, 
                        excludeGR.hg38.Kundaje.1, 
                        excludeGR.hg38.Kundaje.2,
                        excludeGR.hg38.Reddy,
                        excludeGR.hg38.Wold,
                        excludeGR.hg38.Yeo)
all_excludeGR_name <- c("Bernstein.Mint_Excludable_GRCh38", 
                     "Kundaje.GRCh38_unified_Excludable", 
                     "Kundaje.GRCh38.Excludable", 
                     "Reddy.wgEncodeDacMapabilityConsensusExcludable", 
                     "Wold.hg38mitoExcludable", 
                     "Yeo.eCLIP_Excludableregions.hg38liftover")
# Correlation matrix, empty
mtx_to_plot <- matrix(data = 0, nrow = length(all_excludeGR_list), ncol = length(all_excludeGR_list))
# Fill it in
for (i in 1:length(all_excludeGR_list)) {
  for (j in 1:length(all_excludeGR_list)) {
    # If diagonal, set to zero
    if (i == j) mtx_to_plot[i, j] <- 0
    # Process only one half, the other is symmetric
    if (i > j) {
      mtx_to_plot[i, j] <- mtx_to_plot[j, i] <- jaccard(all_excludeGR_list[[i]], all_excludeGR_list[[j]])[["jaccard"]]
    }
  }
}
# Trim row/colnames
rownames(mtx_to_plot) <- colnames(mtx_to_plot) <- str_trunc(all_excludeGR_name, width = 25) 
# Save the plot
# png("man/figures/excluderanges_hg38_jaccard.png", width = 1000, height = 900, res = 200)
pheatmap(data.matrix(mtx_to_plot))# dev.off()Note that some excludable ranges objects contain six columns, implying there may be some interesting metadata. Let’s explore one.
mcols(excludeGR.hg38.Reddy)
#> DataFrame with 401 rows and 2 columns
#>                       name     score
#>                <character> <numeric>
#> 1   High_Mappability_isl..      1000
#> 2         Satellite_repeat      1000
#> 3                 BSR/Beta      1000
#> 4   Low_mappability_island      1000
#> 5                 (CATTC)n      1000
#> ...                    ...       ...
#> 397                   TAR1      1000
#> 398       Satellite_repeat      1000
#> 399               (CATTC)n      1000
#> 400               (CATTC)n      1000
#> 401                   TAR1      1000
mtx_to_plot <- as.data.frame(table(mcols(excludeGR.hg38.Reddy)[["name"]]))
colnames(mtx_to_plot) <- c("Type", "Number")
mtx_to_plot <- mtx_to_plot[order(mtx_to_plot$Number), ]
mtx_to_plot$Type <- factor(mtx_to_plot$Type, levels = mtx_to_plot$Type)
ggplot(mtx_to_plot, aes(x = Number, y = Type, fill = Type)) +
  geom_bar(stat="identity") +
  theme_bw() + theme(legend.position = "none")# ggsave("man/figures/excluderanges_hg38_Reddy_metadata.png", width = 5, height = 2.5)One may decide to combine the excludable ranges from all labs, although from previous
results we may decide to follow Anshul’s advice advice about the ENCFF356LFX exclusion list regions
and use the excludeGR.hg38.Kundaje.1 object.
excludeGR.hg38.all <- reduce(c(excludeGR.hg38.Bernstein, excludeGR.hg38.Kundaje.1, excludeGR.hg38.Kundaje.2, excludeGR.hg38.Reddy, excludeGR.hg38.Wold, excludeGR.hg38.Yeo))
#> Warning in valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE): GRanges object contains 1 out-of-bound range located on sequence
#>   chr4_GL000008v2_random. Note that ranges located on a sequence whose
#>   length is unknown (NA) or on a circular sequence are not considered
#>   out-of-bound (use seqlengths() and isCircular() to get the lengths and
#>   circularity flags of the underlying sequences). You can use trim() to
#>   trim these ranges. See ?`trim,GenomicRanges-method` for more
#>   information.
# Keep only standard chromosomes
excludeGR.hg38.all <- keepStandardChromosomes(excludeGR.hg38.all, pruning.mode = "coarse")
print(length(excludeGR.hg38.all))
#> [1] 13239
summary(width(excludeGR.hg38.all))
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>       5    1778    2306    8153    2859 5407757Besides the ENCODE-produced excludable regions, we may want to exclude centromeres, telomeres, and other gap locations. The “Gap Locations” track for Homo Sapiens is available for the GRcH37/hg19 genome assembly as a UCSC ‘gap’ table. It can be retrieved from AnnotationHub, but lacks the metadata columns needed to decide the type of gaps.
# Search for the gap track
# ahData <- query(ah, c("gap", "Homo sapiens", "hg19"))
# ahData[ahData$title == "Gap"]
gaps <- ahData[["AH6444"]]The UCSC ‘gap’ table provides better granularity about the types of gaps available. E.g., for human, hg19, we have the following types and the number of gaps.
Those objects are provided as individual GRanges.
Naming convention: <genome assembly>.UCSC.<gap type>, e.g.,
hg19.UCSC.gap_centromere.
We can similarly load any gap type object.
query_data <- query(ah, c("excluderanges", "UCSC", "Homo Sapiens", "hg19"))
query_data
#> AnnotationHub with 7 records
#> # snapshotDate(): 2021-10-18
#> # $dataprovider: UCSC
#> # $species: Homo sapiens
#> # $rdataclass: GRanges
#> # additional mcols(): taxonomyid, genome, description,
#> #   coordinate_1_based, maintainer, rdatadateadded, preparerclass, tags,
#> #   rdatapath, sourceurl, sourcetype 
#> # retrieve records with, e.g., 'object[["AH95927"]]' 
#> 
#>             title                        
#>   AH95927 | hg19.UCSC.centromere.rds     
#>   AH95928 | hg19.UCSC.clone.rds          
#>   AH95929 | hg19.UCSC.contig.rds         
#>   AH95930 | hg19.UCSC.heterochromatin.rds
#>   AH95931 | hg19.UCSC.scaffold.rds       
#>   AH95932 | hg19.UCSC.short_arm.rds      
#>   AH95933 | hg19.UCSC.telomere.rds
gapsGR_hg19_centromere <- query_data[["AH95927"]]
#> downloading 1 resources
#> retrieving 1 resource
#> loading from cache
gapsGR_hg19_centromere
#> GRanges object with 24 ranges and 6 metadata columns:
#>       seqnames              ranges strand |       bin        ix           n
#>          <Rle>           <IRanges>  <Rle> | <numeric> <numeric> <character>
#>     2     chr1 121535434-124535434      * |        23      1270           N
#>   184    chr21   11288129-14288129      * |        10        22           N
#>   199    chr22   13000000-16000000      * |        10         3           N
#>   206    chr19   24681782-27681782      * |         1       410           N
#>   224     chrY   10104553-13104553      * |        10       105           N
#>   ...      ...                 ...    ... .       ...       ...         ...
#>   439     chr6   58830166-61830166      * |        16       628           N
#>   453     chr5   46405641-49405641      * |        14       452           N
#>   460     chr4   49660117-52660117      * |         1       447           N
#>   476     chr3   90504854-93504854      * |         2       784           N
#>   481     chr2   92326171-95326171      * |        20       770           N
#>            size        type      bridge
#>       <numeric> <character> <character>
#>     2     3e+06  centromere          no
#>   184     3e+06  centromere          no
#>   199     3e+06  centromere          no
#>   206     3e+06  centromere          no
#>   224     3e+06  centromere          no
#>   ...       ...         ...         ...
#>   439     3e+06  centromere          no
#>   453     3e+06  centromere          no
#>   460     3e+06  centromere          no
#>   476     3e+06  centromere          no
#>   481     3e+06  centromere          no
#>   -------
#>   seqinfo: 24 sequences from hg19 genomeNote that the UCSC ‘gap’ table for the hg38 human genome assembly does not contain genomic coordinates for the “centromere” gap type. These can be obtained from the rCGH package as follows:
suppressPackageStartupMessages(library(rCGH))
suppressPackageStartupMessages(library(GenomicRanges))
# hg38 # data.frame
# Adjust chromosome names
hg38$chrom[hg38$chrom == 23] <- "X"
hg38$chrom[hg38$chrom == 24] <- "Y"
hg38$chrom <- paste0("chr", hg38$chrom)
# Make GRanges object
hg38.UCSC.centromere <- makeGRangesFromDataFrame(hg38, seqnames.field = "chrom", start.field = "centromerStart", end.field = "centromerEnd")
# Assign seqinfo data
seqlengths(hg38.UCSC.centromere) <- hg38$length
genome(hg38.UCSC.centromere)     <- "hg38"
# Resulting object
hg38.UCSC.centromere
#> GRanges object with 24 ranges and 0 metadata columns:
#>        seqnames              ranges strand
#>           <Rle>           <IRanges>  <Rle>
#>    [1]     chr1 121535434-124535434      *
#>    [2]     chr2   92326171-95326171      *
#>    [3]     chr3   90504854-93504854      *
#>    [4]     chr4   49660117-52660117      *
#>    [5]     chr5   46405641-49405641      *
#>    ...      ...                 ...    ...
#>   [20]    chr20   26369569-29369569      *
#>   [21]    chr21   11288129-14288129      *
#>   [22]    chr22   13000000-16000000      *
#>   [23]     chrX   58632012-61632012      *
#>   [24]     chrY   10104553-13104553      *
#>   -------
#>   seqinfo: 24 sequences from hg38 genomeThe rCGH package also contains data for the hg19 and
hg18 genomes. The hg19 centromere data is equivalent to the hg19.UCSC.centromere
object provided in our excluderanges package.
| Object | Number.of.regions | Assembly | Lab | Number.of.columns | Source | 
|---|---|---|---|---|---|
| ce10.Kundaje.ce10-Excludable.rds | 122 | ce10 | Anshul Kundaje, Stanford | 3 | http://mitra.stanford.edu/kundaje/akundaje/release/Excludables/ce10-C.elegans | 
| dm3.Kundaje.dm3-Excludable.rds | 492 | dm3 | Anshul Kundaje, Stanford | 3 | http://mitra.stanford.edu/kundaje/akundaje/release/Excludables/dm3-D.melanogaster/ | 
| hg19.Bernstein.Mint_Excludable_hg19.rds | 9035 | hg19 | Bradley Bernstein, Broad | 6 | https://www.encodeproject.org/files/ENCFF200UUD/ | 
| hg19.Birney.wgEncodeDacMapabilityConsensusExcludable.rds | 411 | hg19 | Ewan Birney, EBI | 6 | https://www.encodeproject.org/files/ENCFF001TDO/ | 
| hg19.Crawford.wgEncodeDukeMapabilityRegionsExcludable.rds | 1649 | hg19 | Gregory Crawford, Duke | 6 | https://www.encodeproject.org/files/ENCFF001THR/ | 
| hg19.Wold.hg19mitoExcludable.rds | 295 | hg19 | Barbara Wold, Caltech | 3 | https://www.encodeproject.org/files/ENCFF055QTV/ | 
| hg19.Yeo.eCLIP_Excludableregions.hg19.rds | 57 | hg19 | Gene Yeo, UCSD | 6 | https://www.encodeproject.org/files/ENCFF039QTN/ | 
| hg38.Bernstein.Mint_Excludable_GRCh38.rds | 12052 | hg38 | Bradley Bernstein, Broad | 6 | https://www.encodeproject.org/files/ENCFF023CZC/ | 
| hg38.Kundaje.GRCh38.Excludable.rds | 38 | hg38 | Anshul Kundaje, Stanford | 3 | https://www.encodeproject.org/files/ENCFF356LFX/ | 
| hg38.Kundaje.GRCh38_unified_Excludable.rds | 910 | hg38 | Anshul Kundaje, Stanford | 3 | https://www.encodeproject.org/files/ENCFF419RSJ/ | 
| hg38.Reddy.wgEncodeDacMapabilityConsensusExcludable.hg38.rds | 401 | hg38 | Tim Reddy, Duke | 6 | https://www.encodeproject.org/files/ENCFF220FIN/ | 
| hg38.Wold.hg38mitoExcludable.rds | 299 | hg38 | Barbara Wold, Caltech | 3 | https://www.encodeproject.org/files/ENCFF940NTE/ | 
| hg38.Yeo.eCLIP_Excludableregions.hg38liftover.bed.fixed.rds | 56 | hg38 | Gene Yeo, UCSD | 6 | https://www.encodeproject.org/files/ENCFF269URO/ | 
| mm10.Hardison.Excludable.full.rds | 7865 | mm10 | Ross Hardison, PennState | 3 | https://www.encodeproject.org/files/ENCFF790DJT/ | 
| mm10.Hardison.psuExcludable.mm10.rds | 5552 | mm10 | Ross Hardison, PennState | 3 | https://www.encodeproject.org/files/ENCFF226BDM/ | 
| mm10.Kundaje.anshul.Excludable.mm10.rds | 3010 | mm10 | Anshul Kundaje, Stanford | 3 | https://www.encodeproject.org/files/ENCFF999QPV/ | 
| mm10.Kundaje.mm10.Excludable.rds | 164 | mm10 | Anshul Kundaje, Stanford | 3 | https://www.encodeproject.org/files/ENCFF547MET/ | 
| mm10.Wold.mm10mitoExcludable.rds | 123 | mm10 | Barbara Wold, Caltech | 3 | https://www.encodeproject.org/files/ENCFF759PJK/ | 
| mm9.Wold.mm9mitoExcludable.rds | 123 | mm9 | Barbara Wold, Caltech | 3 | https://www.encodeproject.org/files/ENCFF299EZH/ | 
Download all data from the Google Drive folder
R session information.#> R version 4.1.1 (2021-08-10)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 20.04.3 LTS
#> 
#> Matrix products: default
#> BLAS:   /home/biocbuild/bbs-3.14-bioc/R/lib/libRblas.so
#> LAPACK: /home/biocbuild/bbs-3.14-bioc/R/lib/libRlapack.so
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_GB              LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] rCGH_1.23.0          stringr_1.4.0        pheatmap_1.0.12     
#>  [4] ggridges_0.5.3       ggplot2_3.3.5        GenomicRanges_1.45.0
#>  [7] GenomeInfoDb_1.29.10 IRanges_2.27.2       S4Vectors_0.31.5    
#> [10] AnnotationHub_3.1.6  BiocFileCache_2.1.1  dbplyr_2.1.1        
#> [13] BiocGenerics_0.39.2  BiocStyle_2.21.4    
#> 
#> loaded via a namespace (and not attached):
#>   [1] colorspace_2.0-2                        
#>   [2] rjson_0.2.20                            
#>   [3] ellipsis_0.3.2                          
#>   [4] mclust_5.4.7                            
#>   [5] DNAcopy_1.67.0                          
#>   [6] XVector_0.33.0                          
#>   [7] farver_2.1.0                            
#>   [8] affyio_1.63.2                           
#>   [9] bit64_4.0.5                             
#>  [10] interactiveDisplayBase_1.31.2           
#>  [11] AnnotationDbi_1.55.2                    
#>  [12] fansi_0.5.0                             
#>  [13] xml2_1.3.2                              
#>  [14] splines_4.1.1                           
#>  [15] cachem_1.0.6                            
#>  [16] knitr_1.36                              
#>  [17] jsonlite_1.7.2                          
#>  [18] Rsamtools_2.9.1                         
#>  [19] cluster_2.1.2                           
#>  [20] png_0.1-7                               
#>  [21] shiny_1.7.1                             
#>  [22] TxDb.Hsapiens.UCSC.hg18.knownGene_3.2.2 
#>  [23] BiocManager_1.30.16                     
#>  [24] compiler_4.1.1                          
#>  [25] httr_1.4.2                              
#>  [26] assertthat_0.2.1                        
#>  [27] Matrix_1.3-4                            
#>  [28] fastmap_1.1.0                           
#>  [29] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2 
#>  [30] limma_3.49.4                            
#>  [31] later_1.3.0                             
#>  [32] htmltools_0.5.2                         
#>  [33] prettyunits_1.1.1                       
#>  [34] tools_4.1.1                             
#>  [35] gtable_0.3.0                            
#>  [36] glue_1.4.2                              
#>  [37] GenomeInfoDbData_1.2.7                  
#>  [38] affy_1.71.0                             
#>  [39] dplyr_1.0.7                             
#>  [40] rappdirs_0.3.3                          
#>  [41] Rcpp_1.0.7                              
#>  [42] TxDb.Hsapiens.UCSC.hg38.knownGene_3.14.0
#>  [43] Biobase_2.53.0                          
#>  [44] jquerylib_0.1.4                         
#>  [45] vctrs_0.3.8                             
#>  [46] Biostrings_2.61.2                       
#>  [47] multtest_2.49.0                         
#>  [48] aCGH_1.71.0                             
#>  [49] preprocessCore_1.55.2                   
#>  [50] rtracklayer_1.53.1                      
#>  [51] xfun_0.27                               
#>  [52] mime_0.12                               
#>  [53] lifecycle_1.0.1                         
#>  [54] restfulr_0.0.13                         
#>  [55] XML_3.99-0.8                            
#>  [56] org.Hs.eg.db_3.14.0                     
#>  [57] MASS_7.3-54                             
#>  [58] zlibbioc_1.39.0                         
#>  [59] scales_1.1.1                            
#>  [60] hms_1.1.1                               
#>  [61] promises_1.2.0.1                        
#>  [62] MatrixGenerics_1.5.4                    
#>  [63] parallel_4.1.1                          
#>  [64] SummarizedExperiment_1.23.5             
#>  [65] RColorBrewer_1.1-2                      
#>  [66] yaml_2.2.1                              
#>  [67] curl_4.3.2                              
#>  [68] memoise_2.0.0                           
#>  [69] sass_0.4.0                              
#>  [70] biomaRt_2.49.7                          
#>  [71] stringi_1.7.5                           
#>  [72] RSQLite_2.2.8                           
#>  [73] BiocVersion_3.14.0                      
#>  [74] highr_0.9                               
#>  [75] BiocIO_1.3.0                            
#>  [76] GenomicFeatures_1.45.2                  
#>  [77] filelock_1.0.2                          
#>  [78] BiocParallel_1.27.17                    
#>  [79] rlang_0.4.12                            
#>  [80] pkgconfig_2.0.3                         
#>  [81] bitops_1.0-7                            
#>  [82] matrixStats_0.61.0                      
#>  [83] evaluate_0.14                           
#>  [84] lattice_0.20-45                         
#>  [85] purrr_0.3.4                             
#>  [86] GenomicAlignments_1.29.0                
#>  [87] labeling_0.4.2                          
#>  [88] bit_4.0.4                               
#>  [89] tidyselect_1.1.1                        
#>  [90] plyr_1.8.6                              
#>  [91] magrittr_2.0.1                          
#>  [92] bookdown_0.24                           
#>  [93] R6_2.5.1                                
#>  [94] magick_2.7.3                            
#>  [95] generics_0.1.0                          
#>  [96] DelayedArray_0.19.4                     
#>  [97] DBI_1.1.1                               
#>  [98] pillar_1.6.4                            
#>  [99] withr_2.4.2                             
#> [100] survival_3.2-13                         
#> [101] KEGGREST_1.33.0                         
#> [102] RCurl_1.98-1.5                          
#> [103] tibble_3.1.5                            
#> [104] crayon_1.4.1                            
#> [105] utf8_1.2.2                              
#> [106] rmarkdown_2.11                          
#> [107] progress_1.2.2                          
#> [108] grid_4.1.1                              
#> [109] blob_1.2.2                              
#> [110] digest_0.6.28                           
#> [111] xtable_1.8-4                            
#> [112] httpuv_1.6.3                            
#> [113] munsell_0.5.0                           
#> [114] bslib_0.3.1Amemiya, Haley M, Anshul Kundaje, and Alan P Boyle. 2019. “The Encode Blacklist: Identification of Problematic Regions of the Genome.” Sci Rep 9 (1): 9354. https://doi.org/10.1038/s41598-019-45839-z.