## ----docSetup, bootstrap.show.code = FALSE, dev = device, bootstrap.show.message=FALSE----
## knitrBoostrap and device chunk options
load_install('knitr')
opts_chunk$set(bootstrap.show.code = FALSE, dev = device)
if(!outputIsHTML) opts_chunk$set(bootstrap.show.code = FALSE, dev = device, echo = FALSE)

## ----setup, bootstrap.show.message=FALSE-----------------------------------
#### Libraries needed

## Bioconductor
load_install('bumphunter')
load_install('derfinder')
load_install('derfinderPlot')
load_install('GenomeInfoDb')
load_install('GenomicRanges')
load_install('ggbio')

## Transcription database to use by default
if(is.null(txdb)) {
    load_install('TxDb.Hsapiens.UCSC.hg19.knownGene')
    txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene
}

## CRAN
load_install('ggplot2')
if(!is.null(theme)) theme_set(theme)
load_install('grid')
load_install('gridExtra')
load_install('knitr')
load_install('RColorBrewer')
load_install('mgcv')
load_install('whisker')
load_install('DT')
load_install('devtools')

## Working behind the scenes
# load_install('knitcitations')
# load_install('rmarkdown')
## Optionally
# load_install('knitrBootstrap')

#### Code setup

## For ggplot
tmp <- regions
names(tmp) <- seq_len(length(tmp))
regions.df <- as.data.frame(tmp)
regions.df$width <- width(tmp)
rm(tmp)

## Special subsets: need at least 3 points for a density plot
keepChr <- table(regions.df$seqnames) > 2
regions.df.plot <- subset(regions.df, seqnames %in% names(keepChr[keepChr]))

if(hasSignificant) {
    ## Keep only those sig
    regions.df.sig <- regions.df[significantVar, ]
    keepChr <- table(regions.df.sig$seqnames) > 2
    regions.df.sig <- subset(regions.df.sig, seqnames %in% names(keepChr[keepChr]))
}

## Find which chrs are present in the data set
chrs <- levels(seqnames(regions))

## areaVar initialize
areaVar <- NULL

## ----pvaluePlots, echo=FALSE, results='asis', eval=hasPvalueVars, echo=hasPvalueVars----
#  for(i in seq_len(length(pvalueVars))) {
#      densityVarName <- names(pvalueVars[i])
#      densityVarName <- ifelse(is.null(densityVarName), pvalueVars[i], densityVarName)
#     cat(knit_child(text = whisker.render(templatePvalueDensityInUse, list(varName = pvalueVars[i], densityVarName = densityVarName)), quiet = TRUE), sep = '\n')
#  }

## ----regLen, fig.width=14, fig.height=14, eval=hasSignificant, echo=hasSignificant----
#  xrange <- range(log10(regions.df.plot$width)) * c(0.95, 1.05)
#  p2a <- ggplot(regions.df.plot, aes(x=log10(width), colour=seqnames)) +
#      geom_line(stat='density') + labs(title='Density of region lengths') +
#      xlab('Region width (log10)') + scale_colour_discrete(limits=chrs) +
#      xlim(xrange) + theme(legend.title=element_blank())
#  p2b <- ggplot(regions.df.sig, aes(x=log10(width), colour=seqnames)) +
#      geom_line(stat='density') +
#      labs(title='Density of region lengths (significant only)') +
#      xlab('Region width (log10)') + scale_colour_discrete(limits=chrs) +
#      xlim(xrange) + theme(legend.title=element_blank())
#  grid.arrange(p2a, p2b)

## ----regLen2, fig.width=10, fig.height=10, eval=!hasSignificant, echo=!hasSignificant----
p2a <- ggplot(regions.df.plot, aes(x=log10(width), colour=seqnames)) +
    geom_line(stat='density') + labs(title='Density of region lengths') +
    xlab('Region width (log10)') + scale_colour_discrete(limits=chrs) +
    theme(legend.title=element_blank())
p2a

## ----density-area, fig.width=14, fig.height=14, eval=hasSignificant, echo=hasSignificant----
#  xrange <- range(regions.df.plot[, 'area']) * c(0.95, 1.05)
#  p3aarea <- ggplot(regions.df.plot[is.finite(regions.df.plot[, 'area']), ], aes(x=area, colour=seqnames)) +
#      geom_line(stat='density') + labs(title='Density of Area') +
#      xlab('Area') + scale_colour_discrete(limits=chrs) +
#      xlim(xrange) + theme(legend.title=element_blank())
#  p3barea <- ggplot(regions.df.sig[is.finite(regions.df.sig[, 'area']), ], aes(x=area, colour=seqnames)) +
#      geom_line(stat='density') +
#      labs(title='Density of Area (significant only)') +
#      xlab('Area') + scale_colour_discrete(limits=chrs) +
#      xlim(xrange) + theme(legend.title=element_blank())
#  grid.arrange(p3aarea, p3barea)

## ----density-solo-area, fig.width=10, fig.height=10, eval=!hasSignificant, echo=!hasSignificant----
p3aarea <- ggplot(regions.df.plot[is.finite(regions.df.plot[, 'area']), ], aes(x=area, colour=seqnames)) +
    geom_line(stat='density') + labs(title='Density of Area') +
    xlab('Area') + scale_colour_discrete(limits=chrs) +
    theme(legend.title=element_blank())
p3aarea

## ----density-value, fig.width=14, fig.height=14, eval=hasSignificant, echo=hasSignificant----
#  xrange <- range(regions.df.plot[, 'value']) * c(0.95, 1.05)
#  p3avalue <- ggplot(regions.df.plot[is.finite(regions.df.plot[, 'value']), ], aes(x=value, colour=seqnames)) +
#      geom_line(stat='density') + labs(title='Density of Value') +
#      xlab('Value') + scale_colour_discrete(limits=chrs) +
#      xlim(xrange) + theme(legend.title=element_blank())
#  p3bvalue <- ggplot(regions.df.sig[is.finite(regions.df.sig[, 'value']), ], aes(x=value, colour=seqnames)) +
#      geom_line(stat='density') +
#      labs(title='Density of Value (significant only)') +
#      xlab('Value') + scale_colour_discrete(limits=chrs) +
#      xlim(xrange) + theme(legend.title=element_blank())
#  grid.arrange(p3avalue, p3bvalue)

## ----density-solo-value, fig.width=10, fig.height=10, eval=!hasSignificant, echo=!hasSignificant----
p3avalue <- ggplot(regions.df.plot[is.finite(regions.df.plot[, 'value']), ], aes(x=value, colour=seqnames)) +
    geom_line(stat='density') + labs(title='Density of Value') +
    xlab('Value') + scale_colour_discrete(limits=chrs) +
    theme(legend.title=element_blank())
p3avalue

## ----density-clusterL, fig.width=14, fig.height=14, eval=hasSignificant, echo=hasSignificant----
#  xrange <- range(regions.df.plot[, 'clusterL']) * c(0.95, 1.05)
#  p3aclusterL <- ggplot(regions.df.plot[is.finite(regions.df.plot[, 'clusterL']), ], aes(x=clusterL, colour=seqnames)) +
#      geom_line(stat='density') + labs(title='Density of Cluster Length') +
#      xlab('Cluster Length') + scale_colour_discrete(limits=chrs) +
#      xlim(xrange) + theme(legend.title=element_blank())
#  p3bclusterL <- ggplot(regions.df.sig[is.finite(regions.df.sig[, 'clusterL']), ], aes(x=clusterL, colour=seqnames)) +
#      geom_line(stat='density') +
#      labs(title='Density of Cluster Length (significant only)') +
#      xlab('Cluster Length') + scale_colour_discrete(limits=chrs) +
#      xlim(xrange) + theme(legend.title=element_blank())
#  grid.arrange(p3aclusterL, p3bclusterL)

## ----density-solo-clusterL, fig.width=10, fig.height=10, eval=!hasSignificant, echo=!hasSignificant----
p3aclusterL <- ggplot(regions.df.plot[is.finite(regions.df.plot[, 'clusterL']), ], aes(x=clusterL, colour=seqnames)) +
    geom_line(stat='density') + labs(title='Density of Cluster Length') +
    xlab('Cluster Length') + scale_colour_discrete(limits=chrs) +
    theme(legend.title=element_blank())
p3aclusterL

## ----densityPlots, echo=FALSE, results='asis', eval=hasDensityVars, echo=hasDensityVars----
for(i in seq_len(length(densityVars))) {
    densityVarName <- names(densityVars[i])
    densityVarName <- ifelse(is.null(densityVarName), densityVars[i], densityVarName)
   cat(knit_child(text = whisker.render(templateDensityInUse, list(varName = densityVars[i], densityVarName = densityVarName)), quiet = TRUE), sep = '\n')
}

## ----genomeOverview1, message=FALSE, fig.width=7, fig.height=9, dpi=300, eval=hasSignificant, echo=hasSignificant----
#  ## Choose what variable to show on the top
#  tmp <- regions
#  tmp$significant <- factor(significantVar, levels = c('TRUE', 'FALSE'))
#  if(!'area' %in% colnames(mcols(tmp))) {
#      if(hasDensityVars) {
#          tmp$area <- mcols(tmp)[[densityVars[1]]]
#          areaVar <- densityVars[1]
#          areaVar <- ifelse(is.null(names(areaVar)), densityVars[1], names(areaVar))
#      } else {
#          tmp$area <- 0
#          areaVar <- NULL
#      }
#  } else {
#      areaVar <- 'area'
#  }
#  plotOverview(regions=tmp, type='pval', base_size=overviewParams$base_size, areaRel=overviewParams$areaRel, legend.position=c(0.97, 0.12))
#  rm(tmp)

## ----manhattanPlots, echo=FALSE, results='asis', eval=hasPvalueVars, echo=hasPvalueVars----
#  for(i in seq_len(length(pvalueVars))) {
#      densityVarName <- names(pvalueVars[i])
#      densityVarName <- ifelse(is.null(densityVarName), pvalueVars[i], densityVarName)
#     cat(knit_child(text = whisker.render(templateManhattanInUse, list(varName = pvalueVars[i], densityVarName = densityVarName)), quiet = TRUE), sep = '\n')
#  }

## ----genomeOverview2, message=FALSE, fig.width=7, fig.height=9, dpi=300----
## Annotate regions with bumphunter
if(is.null(annotation)) {
    genes <- annotateTranscripts(txdb = txdb)
    annotation <- matchGenes(x = regions, subject = genes)
}


## Make the plot
plotOverview(regions=regions, annotation=annotation, type='annotation', base_size=overviewParams$base_size, areaRel=overviewParams$areaRel, legend.position=c(0.97, 0.12))

## ----annoReg, results='asis'-----------------------------------------------
annoReg <- table(annotation$region, useNA='always')
annoReg.df <- data.frame(Region=names(annoReg), Count=as.vector(annoReg))
if(outputIsHTML) {
    kable(annoReg.df, format = 'markdown', align=rep('c', 3))
} else {
    kable(annoReg.df)
}

## ----genomeOverview3, message=FALSE, fig.width=7, fig.height=9, dpi=300, eval=hasSignificant, echo=hasSignificant----
#  plotOverview(regions=regions[significantVar, ], annotation=annotation[significantVar, ], type='annotation', base_size=overviewParams$base_size, areaRel=overviewParams$areaRel, legend.position=c(0.97, 0.12))

## ----countTable, results='asis'--------------------------------------------
## Construct genomic state object
genomicState <- makeGenomicState(txdb = txdb, chrs = chrs, verbose = FALSE)

## Annotate regions by genomic state
annotatedRegions <- annotateRegions(regions, genomicState$fullGenome, verbose = FALSE)

## Genomic states table
info <- do.call(rbind, lapply(annotatedRegions$countTable, function(x) { data.frame(table(x)) }))
colnames(info) <- c('Number of Overlapping States', 'Frequency')
info$State <- gsub('\\..*', '', rownames(info))
rownames(info) <- NULL
if(outputIsHTML) {
    kable(info, format = 'markdown', align=rep('c', 4))
} else {
    kable(info)
}

## ----vennDiagram, fig.width=7, fig.height=7--------------------------------
## Venn diagram for all regions
venn <- vennRegions(annotatedRegions, counts.col = 'blue', 
    main = 'Regions overlapping genomic states')

## ----vennDiagramSignificant, eval = hasSignificant, echo = hasSignificant, fig.width=7, fig.height=7----
#  ## Venn diagram for all regions
#  vennSig <- vennRegions(annotatedRegions, counts.col = 'blue',
#      main = 'Significant regions overlapping genomic states',
#      subsetIndex = significantVar)

## ----bestRegionInfo, results='asis'----------------------------------------
## Add annotation information
regions.df <- cbind(regions.df, annotation)

## Rank by p-value (first pvalue variable supplied)
if(hasPvalueVars){
    topRegions <- head(regions.df[order(regions.df[, pvalueVars[1]], 
        decreasing = FALSE), ], nBestRegions)
    topRegions <- cbind(data.frame('pvalueRank' = seq_len(nrow(topRegions))), 
        topRegions)
} else {
    topRegions <- head(regions.df, nBestRegions)
}

## Clean up -Inf, Inf if present
## More details at https://github.com/ramnathv/rCharts/issues/259
replaceInf <- function(df, colsubset=seq_len(ncol(df))) {
    for(i in colsubset) {
        inf.idx <- !is.finite(df[, i])
        if(any(inf.idx)) {
            inf.sign <- sign(df[inf.idx, i])
            df[inf.idx, i] <- inf.sign * 1e100
        }
    }
    return(df)
}
topRegions <- replaceInf(topRegions, which(sapply(topRegions, function(x) {
    class(x) %in% c('numeric', 'integer')})))

## Make the table
greptext <- 'value$|area$|mean|log2FoldChange'
greppval <- 'pvalues$|qvalues$|fwer$'
if(hasPvalueVars) {
    greppval <- paste0(paste(pvalueVars, collapse = '$|'), '$|', greppval)
}
if(hasDensityVars) {
    greptext <- paste0(paste(densityVars, collapse = '$|'), '$|', greptext)
}

for(i in which(grepl(greppval, colnames(topRegions)))) topRegions[, i] <- format(topRegions[, i], scientific = TRUE)

if(outputIsHTML) {
    datatable(topRegions, options = list(pagingType='full_numbers', pageLength=10, scrollX='100%'), rownames = FALSE) %>% formatRound(which(grepl(greptext, colnames(topRegions))), digits)
} else {
    ## Only print the top part if your output is a PDF file
    df_top <- head(topRegions, 20)
    for(i in which(grepl(greptext, colnames(topRegions)))) df_top[, i] <- round(df_top[, i], digits)
    kable(df_top)
}

## ----child = customCode, eval = hasCustomCode------------------------------
#  NA

## ----thecall, echo=FALSE---------------------------------------------------
theCall

## ----reproducibility1, echo=FALSE------------------------------------------
## Date the report was generated
Sys.time()

## ----reproducibility2, echo=FALSE------------------------------------------
## Processing time in seconds
totalTime <- diff(c(startTime, Sys.time()))
round(totalTime, digits=3)

## ----reproducibility3, echo=FALSE-------------------------------------------------------------------------------------
## Session info
options(width = 120)
session_info()

## ----bibliography, results='asis', echo=FALSE, warning = FALSE--------------------------------------------------------
## Print bibliography
bibliography()