###################################################
### chunk number 1: getsam
###################################################
library(ind1KG)
if (!exists("n240"))
  data(n240)


###################################################
### chunk number 2: lksc
###################################################
names(n240[[1]])


###################################################
### chunk number 3: lkccc
###################################################
sapply(n240[[1]], class)


###################################################
### chunk number 4: lkddd
###################################################
lapply(n240[[1]], "[", 1:5)


###################################################
### chunk number 5: lkgf
###################################################
library(GenomicFeatures)
library(GenomicFeatures.Hsapiens.UCSC.hg18)
library(IRanges)
genes = geneHuman()
genes[1:2,]


###################################################
### chunk number 6: lktr
###################################################
tx6 <- transcripts(genes, proximal=300)
chr6a <- ranges(tx6)[["chr6"]][1:50]
chr6a


###################################################
### chunk number 7: getrs eval=FALSE
###################################################
## library(Rsamtools)
## p1 <- ScanBamParam(which=RangesList(`6`=chr6a))
## fl = "/mnt/data/stvjc/1000GENOMES/NA19240.chrom6.SLX.maq.SRP000032.2009_07.bam"
## unix.time(cnt <- countBam(fl, param=p1))
## sum(cnt$records) # 2103439


###################################################
### chunk number 8: dosc eval=FALSE
###################################################
## res <- scanBam(fl, param=p1)
## length(res)
## names(res[[1]])


###################################################
### chunk number 9: lknd
###################################################
data(pup240_disc)
pup240_disc[1:5,]


###################################################
### chunk number 10: om
###################################################
pup240_disc = pup240_disc[ pup240_disc$ref != "*", ]
pup240_disc = pup240_disc[ pup240_disc$ref %in% c("A", "C", "T", "G"), ]
table(pup240_disc$indiv)


###################################################
### chunk number 11: lknd
###################################################
data(c6snp)
sum( !( pup240_disc$loc  %in% c6snp$chrPosFrom ) )


###################################################
### chunk number 12: lkh
###################################################
nov = pup240_disc[ !( pup240_disc$loc  %in% c6snp$chrPosFrom ), ]
table(nov$indiv)


###################################################
### chunk number 13: lkd
###################################################
data(yri240_6)
names(yri240_6)


###################################################
### chunk number 14: lkhhh
###################################################
snps = as(yri240_6[[1]], "character")
hets = which(snps == "A/B") 
rshet = colnames(snps)[hets]
smet = yri240_6[[2]]
smethet = smet[hets,]
smethet[1:5,]


###################################################
### chunk number 15: lkpi
###################################################
data(pup240_500k)
pup240_500k[1:2,]


###################################################
### chunk number 16: lkdu
###################################################
pup240_500ku = pup240_500k[ !duplicated(pup240_500k[,1]),]


###################################################
### chunk number 17: hp
###################################################
hpup = pup240_500ku[ pup240_500ku[,1] %in% smethet[,"Position"], ]


###################################################
### chunk number 18: lkhom
###################################################
hom = hpup[ hpup[,2] == hpup[,3], ]
hom
smethet[ smethet[,"Position"] %in% hom[,1], ]


###################################################
### chunk number 19: getd
###################################################
data(gw6c6.snp240)
gw6c6.snp240[1:4,]


###################################################
### chunk number 20: lkloc
###################################################
hloc6 = gw6c6.snp240[ gw6c6.snp240$call240 == 2, "physical_pos" ]


###################################################
### chunk number 21: lkll
###################################################
inds = which(pup240_500k[,1] %in% hloc6)
table(pup240_500k[inds, 3])


###################################################
### chunk number 22: lko
###################################################
oloc6 = gw6c6.snp240[ gw6c6.snp240$call240 %in% c(1,3), "physical_pos" ]
oinds = which(pup240_500k[,1] %in% oloc6)
table(pup240_500k[oinds, 3])


###################################################
### chunk number 23: dobr eval=FALSE
###################################################
## library(IRanges)
## nloc = nov$loc
## nrd = RangedData( IRanges(nloc, nloc) )
## names(nrd) = "chr6"
## library(rtracklayer)
## br = browserSession("UCSC")
## br[["novo"]] = nrd
## v1 = browserView(br, GenomicRanges(1, 1e7, "chr6"))


###################################################
### chunk number 24: lrnam eval=FALSE
###################################################
## tn = trackNames(br)
## grep("Genes", names(tn), value=TRUE) # many different gene sets
## tn["UCSC Genes"]   # resolve indirection


###################################################
### chunk number 25: doal eval=FALSE
###################################################
## rsg = track(br, "refGene")
## rsgdf = as.data.frame(rsg)


###################################################
### chunk number 26: lkrs
###################################################
data(rsgdf)
names(rsgdf)
rsgdf[1:3,1:7]


###################################################
### chunk number 27: lkres
###################################################
library(org.Hs.eg.db)
rsgn = as.character(rsgdf$name)
eid = mget(rsgn, revmap(org.Hs.egREFSEQ), ifnotfound=NA)
eid = na.omit(unlist(eid))
sym = mget(eid, org.Hs.egSYMBOL, ifnotfound=NA)
unlist(sym)[1:10]


###################################################
### chunk number 28: zzz
###################################################
nloc = nov$loc  # this one is evaluated
nranges = IRanges(nloc, nloc)
granges = IRanges(rsgdf$start, rsgdf$end) # no guarantee of annotation
length(nranges)
length(granges)
sum(nranges %in% granges)
match(nranges,granges)[1:200]