### R code from vignette source 'A12_Bioconductor.Rnw'

###################################################
### code chunk number 1: S4-view (eval = FALSE)
###################################################
## selectMethod(countOverlaps, c("GRanges", "GRanges"))


###################################################
### code chunk number 2: GRanges
###################################################
library(GenomicRanges)
getClass("GRanges")


###################################################
### code chunk number 3: countOverlaps
###################################################
showMethods("countOverlaps")


###################################################
### code chunk number 4: countOverlapsWorks
###################################################
gr0 <- GRanges("chr1", IRanges(start=c(10, 20), width = 5), "+")
gr1 <- GRanges("chr1", IRanges(start=12, end=18), "+")
countOverlaps(gr0, gr1)


###################################################
### code chunk number 5: S4
###################################################
library(Biostrings)
showMethods(complement)


###################################################
### code chunk number 6: S4-help (eval = FALSE)
###################################################
## class ? DNAStringSet
## method ? "complement,DNAStringSet"


###################################################
### code chunk number 7: gc-genome
###################################################
library(BSgenome.Dmelanogaster.UCSC.dm3)
Dmelanogaster
library(SequenceAnalysisData)
data(ex)
ex[1]
nm <- "chr3L"
chr <- Dmelanogaster[[nm]]
v <- Views(chr, start=start(ex[[1]]), end=end(ex[[1]]))


###################################################
### code chunk number 8: gcFunction-definition
###################################################
gcFunction <- 
    function(x)
{
    alf <- alphabetFrequency(x, as.prob=TRUE)
    rowSums(alf[,c("G", "C")])
}


###################################################
### code chunk number 9: gcFunction-genome (eval = FALSE)
###################################################
## subjectGC <- gcFunction(v)


###################################################
### code chunk number 10: fastq-format
###################################################
bigdata <- system.file("bigdata", package="SequenceAnalysisData")
fls <- dir(file.path(bigdata, "fastq"), full=TRUE)
cat(noquote(readLines(fls[[1]], 4)), sep="\n")


###################################################
### code chunk number 11: ascii
###################################################
cat(rawToChar(as.raw(32+1:47)),
    rawToChar(as.raw(32+48:94)), sep="\n")


###################################################
### code chunk number 12: fastq-discovery
###################################################
library(ShortRead)
bigdata <- system.file("bigdata", package="SequenceAnalysisData")
dir(bigdata)
fls <- dir(file.path(bigdata, "fastq"), full=TRUE)


###################################################
### code chunk number 13: fastq-input-gc
###################################################
invisible(gc())


###################################################
### code chunk number 14: fastq-input
###################################################
fq <- readFastq(fls[1])


###################################################
### code chunk number 15: gcC
###################################################
alf0 <- alphabetFrequency(sread(fq), as.prob=TRUE, collapse=TRUE)
sum(alf0[c("G", "C")])


###################################################
### code chunk number 16: gc-reads
###################################################
gc <- gcFunction(sread(fq))
hist(gc)


###################################################
### code chunk number 17: abc
###################################################
abc <- alphabetByCycle(sread(fq))
matplot(t(abc[c("A", "C", "G", "T"),]), type="l")


###################################################
### code chunk number 18: abc-mclapply (eval = FALSE)
###################################################
## library(parallel)
## gc0 <- mclapply(fls, function(fl) {
##   fq <- readFastq(fl)
##   gc <- gcFunction(sread(fq))
##   table(cut(gc, seq(0, 1, .05)))
## })
## ## simplify list of length 2 to 2-D array
## gc <- simplify2array(gc0)
## matplot(gc, type="s")


###################################################
### code chunk number 19: quality
###################################################
head(quality(fq))
qual <- as(quality(fq), "matrix")
dim(qual)
plot(colMeans(qual), type="b")


###################################################
### code chunk number 20: setup
###################################################
library(GenomicFeatures)
plotRanges <- function(x, xlim = x, main = deparse(substitute(x)),
                       col = "black", sep = 0.5, ...)
{
    height <- 1
    if (is(xlim, "Ranges"))
        xlim <- c(min(start(xlim)), max(end(xlim)))
    bins <- disjointBins(IRanges(start(x), end(x) + 1))
    plot.new()
    par(mai=c(0.6, 0.2, 0.6, 0.2))
    plot.window(xlim, c(0, max(bins)*(height + sep)))
    ybottom <- bins * (sep + height) - height
    rect(start(x)-0.5, ybottom, end(x)+0.5, ybottom + height, col = col, ...)
    title(main, cex.main=2.8, font.main=1)
    axis(1)
}


###################################################
### code chunk number 21: GRanges-genes
###################################################
genes <- GRanges(seqnames=c("3R", "X"),
                 ranges=IRanges(
                     start=c(19967117, 18962306),
                     end=c(19973212, 18962925)),
                 strand=c("+", "-"),
                 seqlengths=c(`3R`=27905053L, `X`=22422827L))


###################################################
### code chunk number 22: GRanges-display
###################################################
genes


###################################################
### code chunk number 23: GRanges-help (eval = FALSE)
###################################################
## ?GRanges


###################################################
### code chunk number 24: GRanges-vignettes (eval = FALSE)
###################################################
## vignette(package="GenomicRanges")


###################################################
### code chunk number 25: GRanges-basics
###################################################
genes[2]
strand(genes)
width(genes)
length(genes)
names(genes) <- c("FBgn0039155", "FBgn0085359")
genes  # now with names


###################################################
### code chunk number 26: ranges-ir
###################################################
ir <- IRanges(start=c(7, 9, 12, 14, 22:24),
              end=c(15, 11, 12, 18, 26, 27, 28))


###################################################
### code chunk number 27: ranges-ir-plot
###################################################
png("ranges-ir-plot.png", width=800, height=160)
plotRanges(ir, xlim=c(5, 35), main="Original")
dev.off()
png("ranges-shift-ir-plot.png", width=800, height=160)
plotRanges(shift(ir, 5), xlim=c(5, 35), main="Shift")
dev.off()
png("ranges-reduce-ir-plot.png", width=800, height=160)
plotRanges(reduce(ir), xlim=c(5, 35), main="Reduce")
dev.off()


###################################################
### code chunk number 28: ranges-shift-ir
###################################################
shift(ir, 5)


###################################################
### code chunk number 29: ranges-shift-ir-plot
###################################################
png("ranges-shift-ir-plot.png", width=800, height=160)
plotRanges(shift(ir, 5), xlim=c(5, 35), main="Shift")
dev.off()


###################################################
### code chunk number 30: ranges-reduce-ir
###################################################
reduce(ir)


###################################################
### code chunk number 31: ranges-reduce-ir-plot
###################################################
png("ranges-reduce-ir-plot.png", width=800, height=160)
plotRanges(reduce(ir), xlim=c(5, 35), main="Reduce")
dev.off()


###################################################
### code chunk number 32: coverage
###################################################
coverage(ir)


###################################################
### code chunk number 33: GRanges-mcols
###################################################
mcols(genes) <- DataFrame(EntrezId=c("42865", "2768869"),
                          Symbol=c("kal-1", "CG34330"))


###################################################
### code chunk number 34: GRanges-metadata
###################################################
metadata(genes) <-
    list(CreatedBy="A. User", Date=date())


###################################################
### code chunk number 35: GRangesList-eg-setup
###################################################
library(TxDb.Dmelanogaster.UCSC.dm3.ensGene)
txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene # alias
fbgn <- exonsBy(txdb, "gene")["FBgn0039155"]
seqlevels(fbgn) <- "chr3R"


###################################################
### code chunk number 36: GRangesList-eg
###################################################
fbgn


###################################################
### code chunk number 37: txdb
###################################################
library(TxDb.Dmelanogaster.UCSC.dm3.ensGene)
txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene # alias
ex0 <- exonsBy(txdb, "gene")
head(table(elementLengths(ex0)))
ids <- c("FBgn0002183", "FBgn0003360", "FBgn0025111", "FBgn0036449")
ex <- reduce(ex0[ids])


###################################################
### code chunk number 38: SAM
###################################################
fl <- system.file("extdata", "ex1.sam", package="Rsamtools")
strsplit(readLines(fl, 1), "\t")[[1]]


###################################################
### code chunk number 39: readGAlignments
###################################################
alnFile <- system.file("extdata", "ex1.bam", package="Rsamtools")
aln <- readGAlignments(alnFile)
head(aln, 3)


###################################################
### code chunk number 40: GAlignments-accessors
###################################################
table(strand(aln))
table(width(aln))
head(sort(table(cigar(aln)), decreasing=TRUE))


###################################################
### code chunk number 41: bam-ex-fls
###################################################
bigdata <- system.file("bigdata", package="SequenceAnalysisData")
fls <- dir(file.path(bigdata, "bam"), ".bam$", full=TRUE) #$
names(fls) <- sub("_.*", "", basename(fls))


###################################################
### code chunk number 42: bam-ex-input
###################################################
## input
aln <- readGAlignments(fls[1])
xtabs(~seqnames + strand, as.data.frame(aln))


###################################################
### code chunk number 43: bam-ex-roi
###################################################
data(ex)             # from an earlier exercise


###################################################
### code chunk number 44: bam-ex-strand
###################################################
strand(aln) <- "*"   # protocol not strand-aware


###################################################
### code chunk number 45: bam-ex-hits
###################################################
hits <- countOverlaps(aln, ex)
table(hits)


###################################################
### code chunk number 46: bam-ex-cnt
###################################################
cnt <- countOverlaps(ex, aln[hits==1])


###################################################
### code chunk number 47: bam-count-fun
###################################################
counter <- 
    function(filePath, range)
{
    aln <- readGAlignments(filePath)
    strand(aln) <- "*"
    hits <- countOverlaps(aln, range)
    countOverlaps(range, aln[hits==1])
}


###################################################
### code chunk number 48: bam-count-all
###################################################
counts <- sapply(fls, counter, ex)


###################################################
### code chunk number 49: bam-count-mclapply (eval = FALSE)
###################################################
## if (require(parallel))
##     simplify2array(mclapply(fls, counter, ex))