## ---- echo = FALSE-------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

## ----eval=FALSE----------------------------------------------------------
## system.file("extdata","aneuploidy.bam",package="MADSEQ")
## system.file("extdata","aneuploidy.vcf.gz",package="MADSEQ")

## ------------------------------------------------------------------------
## load the package
suppressMessages(library("MADSEQ"))

## get path to the location of example data
aneuploidy_bam = system.file("extdata","aneuploidy.bam",package="MADSEQ")
normal_bam = system.file("extdata","normal.bam",package="MADSEQ")
target = system.file("extdata","target.bed",package="MADSEQ")

## Note: for your own data, just specify the path to the location 
## of your file using character.

## prepare coverage and GC content for each targeted region
# aneuploidy sample
aneuploidy_cov = prepareCoverageGC(target_bed=target, 
                                    bam=aneuploidy_bam, 
                                    "hg19")

# normal sample
normal_cov = prepareCoverageGC(target_bed=target, 
                                bam=normal_bam, 
                                "hg19")

## view the first two rows of prepared coverage data (A GRanges Object)
aneuploidy_cov[1:2]

normal_cov[1:2]

## ------------------------------------------------------------------------
## normalize coverage data
## set plot=FALSE here because similar plot will show in the following example
normalizeCoverage(aneuploidy_cov,writeToFile=TRUE, destination=".",plot=FALSE)

## ------------------------------------------------------------------------
## normalize coverage data
aneuploidy_normed = normalizeCoverage(aneuploidy_cov,writeToFile=FALSE,
                                      plot=FALSE)

## a GRangesList object will be produced by the function, look at it by
names(aneuploidy_normed)
aneuploidy_normed[["aneuploidy_cov"]]

## ------------------------------------------------------------------------
## normalize coverage data
normalizeCoverage(aneuploidy_cov, normal_cov,
                  writeToFile =TRUE, destination = ".", plot=FALSE)

## ----fig.height=6, fig.width=6,fig.align='center'------------------------
## normalize coverage data
normed_without_control = normalizeCoverage(aneuploidy_cov, normal_cov, 
                                           writeToFile=FALSE, plot=TRUE)

## a GRangesList object will be produced by the function
length(normed_without_control)
names(normed_without_control)

## subsetting
normed_without_control[["aneuploidy_cov"]]
normed_without_control[["normal_cov"]]

## ------------------------------------------------------------------------
## normalize coverage data, normal_cov is the control sample
normalizeCoverage(aneuploidy_cov, control=normal_cov,
                  writeToFile=TRUE, destination = ".",plot=FALSE)

## ------------------------------------------------------------------------
normed_with_control = normalizeCoverage(aneuploidy_cov, control=normal_cov, 
                                        writeToFile =FALSE, plot=FALSE)

## a GRangesList object will be produced by the function
length(normed_without_control)
names(normed_with_control)

## ------------------------------------------------------------------------
## specify the path to vcf.gz file
aneuploidy_vcf = system.file("extdata","aneuploidy.vcf.gz",package="MADSEQ")

## target bed file specified before

## If you choose to write the output to file (recommended)
prepareHetero(aneuploidy_vcf, target, genome="hg19", 
              writeToFile=TRUE, destination=".")

## If you don't want to write output to file
aneuploidy_hetero = prepareHetero(aneuploidy_vcf, target,
                                  genome="hg19", writeToFile=FALSE)


## ----fig.height=4, fig.width=6,fig.align='center'------------------------
## specify the path to processed files
aneuploidy_hetero = "./aneuploidy.vcf.gz_filtered_heterozygous.txt"
aneuploidy_normed_cov = "./aneuploidy_cov_normed_depth.txt"

## run the model
aneuploidy_chr18 = runMadSeq(hetero=aneuploidy_hetero, 
                             coverage=aneuploidy_normed_cov, 
                             target_chr="chr18",
                             nChain=1, nStep=1000, thinSteps=1,
                             adapt=100,burnin=200)

## An MadSeq object will be returned
aneuploidy_chr18

## ----eval=FALSE----------------------------------------------------------
## ## subset normalized coverage for aneuploidy sample from the GRangesList
## ## returned by normalizeCoverage function
## aneuploidy_normed_cov = normed_with_control[["aneuploidy_cov"]]
## 
## ## run the model
## aneuploidy_chr18 = runMadSeq(hetero=aneuploidy_hetero,
##                              coverage=aneuploidy_normed_cov,
##                              target_chr="chr18")
## 
## ## An MadSeq object will be returned
## aneuploidy_chr18

## ----echo=FALSE,warning=FALSE--------------------------------------------
BIC = c("[0,2]","(2,6]","(6,10]",">10")
evidence = c("Not worth more than a bare mention","Positive",
             "Strong","Very Strong")
table = data.frame(BIC,evidence)
library(knitr)
kable(table,col.names =c("deltaBIC","Evidence against higher BIC") ,align="c")

## ---- fig.height=4, fig.width=6,fig.align='center'-----------------------
## plot the posterior distribution for all the parameters in selected model
plotMadSeq(aneuploidy_chr18)

## ---- fig.height=4, fig.width=4,fig.align='center'-----------------------
## plot the histogram for the estimated fraction of aneuploidy
plotFraction(aneuploidy_chr18, prob=0.95)

## ---- fig.height=4, fig.width=5,fig.align='center'-----------------------
## plot the distribution of AAF as estimated by the model
plotMixture(aneuploidy_chr18)

## ----echo=FALSE,warning=FALSE--------------------------------------------
parameters = c("f","m","mu[1]","mu[2]","mu[3] (LOH model)",
               "mu[3] (meiotic trisomy model)","mu[4]","kappa","p[1]","p[2]",
               "p[3]","p[4]","p[5]","m_cov","p_cov","r_cov")
explains = c("Fraction of mosaic aneuploidy",
             "The midpoint of the alternative allele frequency (AAF) for all heterozygous sites",
             "Mean AAF of mixture 1: the AAFs of this mixture shifted from midpoint to some higher values", 
             "Mean AAF of mixture 2: the AAFs of this mixture shifted from midpoint to some lower values",
             "Mean AAF of mixture 3: In LOH model, mu[3] indicates normal sites without loss of heterozygosity",
             "Mean AAF of mixture 3: In meiotic model, the AAFs of this mixture shifted from 0 to some higher value",
             "Mean AAF of mixture 4: the AAFs of this mixture shifted from 1 to some lower value (only in meiotic model)",
             "Indicate variance of the AAF mixtures: larger kappa means smaller variance",
             "Weight of mixture 1: indicate the proportion of heterozygous sites in the mixture 1",
             "Weight of mixture 2: indicate the proportion of heterozygous sites in the mixture 2",
             "Weight of mixture 3: indicate the proportion of heterozygous sites in the mixture 3 (only in LOH and meiotic model)",
             "Weight of mixture 4: indicate the proportion of heterozygous sites in the mixture 4 (only in meiotic model)",
             "Weight of outlier component: the AAF of 1% sites might not well behaved, so these sites are treated as noise.",
             "Mean coverage of all the sites from the chromosome, estimated from a negative binomial distribution",
             "Prob of the negative binomial distribution for the coverage",
             "Another parameter (r) for the negative binomial disbribution of the coverage, small r means large variance")
table = data.frame(parameters,explains)
kable(table,col.names =c("parameters","description") ,align="c")