\name{cma.scores}
\alias{cma.scores}
\title{Cancer Mutation Prevalence Analysis Scores}
\description{
  Computes Gene-specific Scores for Cancer Mutation Prevalence Analysis.
}
\usage{
cma.scores(cma.data,
	   passenger.rates = t(data.frame(0.55*rep(1.0e-6,25))),
           number.genes,
           compute.poisson.BF=FALSE,
           compute.binomial.posterior=FALSE,
           allow.separate.rates = TRUE,
           filter.above=0, 
           filter.below=0, 
           filter.threshold=0,
	   filter.mutations=0,
           aa=1e-10, 
           bb=1e-10,
           priorH0=1-300/13020, 
           prior.a0=100,
           prior.a1=5,
           prior.fold=10)}
\arguments{
  \item{cma.data}{Data frame with mutation information broken down by
    gene, phase and mutation type. See \code{WoodMutationsBreast} for an example. }
  \item{passenger.rates}{Data frame of passenger mutation rates per
    nucleotide, by type, or "context". Columns denote types and must be
    in the same order as the first 25 columns in cma.data objects. If two
    rows are present, they must have row names "Discovery" and
    "Validation"}
  \item{number.genes}{The total number of genes analyzed, including
    those for whom no mutation were found.}
  \item{compute.poisson.BF}{If TRUE, computes Bayes Factors (BF) using a
    Poisson model for mutation counts and a gamma priors on rates.}
  \item{compute.binomial.posterior}{If TRUE, computes the posterior
    probability that a gene's  mutation rates above the
    specified passenger rates using a binomial model.}
  \item{allow.separate.rates}{If TRUE, allows for use separate rates for
    discovery and validation screens.}
  \item{filter.threshold}{This and the following three input control
    filtering of genes, allowing to exclude genes from analysis, by size
    and number of mutations. Different criteria can be set above and
    below this threshold. The threshold is a gene size in base pairs.}
  \item{filter.above}{Minimum number of mutations per
    Mb, applied to genes of size greater than \code{threshold.size}.}
  \item{filter.below}{Minimum number of mutations per
    Mb, applied to genes of size lower than \code{threshold.size}.}
  \item{filter.mutations}{Only consider genes
    whose total number of mutations is greater than or equal to
    \code{filter.mutations}.}   
  \item{aa}{Hyperparameter of beta prior used in compute.binomial.posterior.}
  \item{bb}{Hyperparameter of beta prior used in compute.binomial.posterior}
  \item{priorH0}{Prior probability of the null hypothesis, used to
    convert the BF in compute.poisson.BF to a posterior probability}
  \item{prior.a0}{Shape hyperparameter of gamma prior on passenger rates used in compute.poisson.BF}
  \item{prior.a1}{Shape hyperparameter of gamma prior on non-passenger rates used in compute.poisson.BF}
  \item{prior.fold}{Hyperparameter of gamma prior  on non-passenger
    rates used compute.poisson.BF. The mean of the gamma is set so that
    the ratio of the mean to the passenger rate is the specified
    \code{prior.fold} in each type.}
}

\details{  
  The scores computed by this function are relevant for two stage
  experiments like the one in the Sjoeblom article. In this design genes
  are sequenced in a first "discovery" sample. Genes in which mutations
  are found are also sequenced in a subsequent "validation" screen.
  The goal of this tool is to facilitate reanalysis of the Sjoeblom
  dataset. Application to other projects requires a detailed
  understanding of the Sjoeblom project. 
}
\value{
  A data frame giving gene-by-gene values for each score. The columns in
  this data frame are:
  \item{CaMP}{The CaMP score of Sjoeblom and colleagues.}
  \item{neglogPg}{The negative log10 of Pg, where Pg represents the probability
  that a gene has its exact observed mutation profile under the null,
  i.e. assuming the given passenger rates.}
  \item{logLRT}{The log10 of the likelihood ratio test (LRT).}
  \item{logitBinomialPosteriorDriver}{logit of the posterior
    probability that a gene's  mutation rates above the
    specified passenger rates using a binomial model}
  \item{PoissonlogBF}{The log10 of the Bayes Factor (BF) using a
    Poisson-Gamma model.}
  \item{PoissonPosterior}{The posterior probability that a given
  gene is a driver, using a Poisson-Gamma model.}
  \item{Poissonlmlik0}{Marginal likelihood under the null hypothesis in
    the Poisson-Gamma model}
  \item{Poissonlmlik1}{Marginal likelihood under the alternative  hypothesis in
    the Poisson-Gamma model}
}
\references{
  Parmigiani G, Lin J, Boca S, Sjoeblom T, Kinzler WK,
  Velculescu VE, Vogelstein B. Statistical methods for the analysis of
  cancer genome sequencing data. 
  \url{http://www.bepress.com/jhubiostat/paper126/}

  Parsons DW, Jones S, Zhang X, Lin JCH, Leary RJ, Angenendt P, Mankoo P,
  Carter H, Siu I, et al. 
  An Integrated Genomic Analysis of Human Glioblastoma Multiforme. 
  \emph{Science.} DOI: 10.1126/science.1164382

  Sjoeblom T, Jones S, Wood LD, Parsons DW, Lin J, Barber T,
  Mandelker D, Leary R, Ptak J, Silliman N, et al.  The
  consensus coding sequences of breast and colorectal cancers.
  \emph{Science.} DOI: 10.1126/science.1133427

  Wood LD, Parsons DW, Jones S, Lin J, Sjoeblom, Leary RJ, Shen D,
  Boca SM, Barber T, Ptak J, et al. The Genomic Landscapes of Human
  Breast and Colorectal Cancer. \emph{Science.} DOI: 10.1126/science.1145720
  }
\author{Giovanni Parmigiani, Simina M. Boca}
\seealso{\code{MutationsBrain}, 
\code{GeneSizes08},
\code{do.gene.set.analysis}}
\examples{
\dontrun{ data(Parsons)
ScoresBrain <- cma.scores(cma.data=MutationsBrain,
                          number.genes=nrow(GeneSizes08))
}
}
\keyword{htest}