## ----eval=TRUE, message=FALSE-------------------------------------------------
library(SCOPE)
library(WGSmapp)
library(BSgenome.Hsapiens.UCSC.hg38)
bamfolder <- system.file("extdata", package = "WGSmapp")
bamFile <- list.files(bamfolder, pattern = '*.dedup.bam$')
bamdir <- file.path(bamfolder, bamFile)
sampname_raw <- sapply(strsplit(bamFile, ".", fixed = TRUE), "[", 1)
bambedObj <- get_bam_bed(bamdir = bamdir, sampname = sampname_raw, 
                        hgref = "hg38")
ref_raw <- bambedObj$ref

## ----eval=TRUE, message=FALSE-------------------------------------------------
mapp <- get_mapp(ref_raw, hgref = "hg38")
head(mapp)
gc <- get_gc(ref_raw, hgref = "hg38")
values(ref_raw) <- cbind(values(ref_raw), DataFrame(gc, mapp))
ref_raw

## ----eval=FALSE---------------------------------------------------------------
#  library(BSgenome.Mmusculus.UCSC.mm10)
#  mapp <- get_mapp(ref_raw, hgref = "mm10")
#  gc <- get_gc(ref_raw, hgref = "mm10")

## ----eval=TRUE----------------------------------------------------------------
# Getting raw read depth
coverageObj <- get_coverage_scDNA(bambedObj, mapqthres = 40, 
                                seq = 'paired-end', hgref = "hg38")
Y_raw <- coverageObj$Y

## ----eval=TRUE----------------------------------------------------------------
QCmetric_raw <- get_samp_QC(bambedObj)
qcObj <- perform_qc(Y_raw = Y_raw, 
    sampname_raw = sampname_raw, ref_raw = ref_raw, 
    QCmetric_raw = QCmetric_raw)
Y <- qcObj$Y
sampname <- qcObj$sampname
ref <- qcObj$ref
QCmetric <- qcObj$QCmetric

## ----eval=TRUE, message=FALSE-------------------------------------------------
# get gini coefficient for each cell
Gini <- get_gini(Y_sim)

## ----eval=TRUE, message=TRUE--------------------------------------------------
# first-pass CODEX2 run with no latent factors
normObj.sim <- normalize_codex2_ns_noK(Y_qc = Y_sim,
                                        gc_qc = ref_sim$gc,
                                        norm_index = which(Gini<=0.12))

# Ploidy initialization
ploidy.sim <- initialize_ploidy(Y = Y_sim, Yhat = normObj.sim$Yhat, ref = ref_sim)

# If using high performance clusters, parallel computing is 
# easy and improves computational efficiency. Simply use 
# normalize_scope_foreach() instead of normalize_scope(). 
# All parameters are identical. 
normObj.scope.sim <- normalize_scope_foreach(Y_qc = Y_sim, gc_qc = ref_sim$gc,
    K = 1, ploidyInt = ploidy.sim,
    norm_index = which(Gini<=0.12), T = 1:5,
    beta0 = normObj.sim$beta.hat, nCores = 2)
# normObj.scope.sim <- normalize_scope(Y_qc = Y_sim, gc_qc = ref_sim$gc,
#     K = 1, ploidyInt = ploidy.sim,
#     norm_index = which(Gini<=0.12), T = 1:5,
#     beta0 = beta.hat.noK.sim)
Yhat.sim <- normObj.scope.sim$Yhat[[which.max(normObj.scope.sim$BIC)]]
fGC.hat.sim <- normObj.scope.sim$fGC.hat[[which.max(normObj.scope.sim$BIC)]]

## ----eval=FALSE---------------------------------------------------------------
#  plot_EM_fit(Y_qc = Y_sim, gc_qc = ref_sim$gc, norm_index = which(Gini<=0.12),
#              T = 1:5,
#              ploidyInt = ploidy.sim, beta0 = normObj.sim$beta.hat,
#              filename = "plot_EM_fit_demo.pdf")

## ----eval=FALSE---------------------------------------------------------------
#  # Group-wise ploidy initialization
#  clones <- c("normal", "tumor1", "normal", "tumor1", "tumor1")
#  ploidy.sim.group <- initialize_ploidy_group(Y = Y_sim, Yhat = Yhat.noK.sim,
#                                  ref = ref_sim, groups = clones)
#  ploidy.sim.group
#  
#  # Group-wise normalization
#  normObj.scope.sim.group <- normalize_scope_group(Y_qc = Y_sim,
#                                      gc_qc = ref_sim$gc,
#                                      K = 1, ploidyInt = ploidy.sim.group,
#                                      norm_index = which(clones=="normal"),
#                                      groups = clones,
#                                      T = 1:5,
#                                      beta0 = beta.hat.noK.sim)
#  Yhat.sim.group <- normObj.scope.sim.group$Yhat[[which.max(
#                                      normObj.scope.sim.group$BIC)]]
#  fGC.hat.sim.group <- normObj.scope.sim.group$fGC.hat[[which.max(
#                                      normObj.scope.sim.group$BIC)]]

## ----eval=TRUE, message=FALSE-------------------------------------------------
chrs <- unique(as.character(seqnames(ref_sim)))
segment_cs <- vector('list',length = length(chrs))
names(segment_cs) <- chrs
for (chri in chrs) {
    message('\n', chri, '\n')
    segment_cs[[chri]] <- segment_CBScs(Y = Y_sim,
                                    Yhat = Yhat.sim,
                                    sampname = colnames(Y_sim),
                                    ref = ref_sim,
                                    chr = chri,
                                    mode = "integer", max.ns = 1)
}
iCN_sim <- do.call(rbind, lapply(segment_cs, function(z){z[["iCN"]]}))

## ----eval=FALSE---------------------------------------------------------------
#  plot_iCN(iCNmat = iCN_sim, ref = ref_sim, Gini = Gini,
#          filename = "plot_iCN_demo")

## -----------------------------------------------------------------------------
sessionInfo()