## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----eval=FALSE---------------------------------------------------------------
# To install this package, start R (version "4.2") and enter:
# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# 
# BiocManager::install("ccImpute")

## ----setup, message=FALSE, warning=FALSE--------------------------------------
library(scRNAseq)
library(scater)
library(ccImpute)
library(SingleCellExperiment)
library(stats)
library(mclust)

## ----message=FALSE, warning=FALSE---------------------------------------------
sce <- UsoskinBrainData()
X <- cpm(sce)
labels <- colData(sce)$"Level 1"

#Filter bad cells
filt <- !grepl("Empty well", labels) &
        !grepl("NF outlier", labels) &
        !grepl("TH outlier", labels) &
        !grepl("NoN outlier", labels) &
        !grepl("NoN", labels) &
        !grepl("Central, unsolved", labels) &
        !grepl(">1 cell", labels) &
        !grepl("Medium", labels)
        
labels <-labels[filt]
X <- as.matrix(X[,filt])

#Remove genes that are not expressed in any cells:
X <- X[rowSums(X)>0,]

#Recreate the SingleCellExperiment and add log-transformed data:
ann <- data.frame(cell_id = labels)
sce <- SingleCellExperiment(assays = list(normcounts = as.matrix(X)), 
                            colData = ann)
logcounts(sce) <- log(normcounts(sce) + 1)

## -----------------------------------------------------------------------------
# Set seed for reproducibility purposes.
set.seed(0) 
# Compute PCA reduction of the dataset
reducedDims(sce) <- list(PCA=prcomp(t(logcounts(sce)))$x)

# Get an actual number of cell types
k <- length(unique(colData(sce)$cell_id))

# Cluster the PCA reduced dataset and store the assignments
set.seed(0) 
assgmts <- kmeans(reducedDim(sce, "PCA"), centers = k, iter.max = 1e+09,
                    nstart = 1000)$cluster

# Use ARI to compare the k-means assignments to label assignments
adjustedRandIndex(assgmts, colData(sce)$cell_id)

## -----------------------------------------------------------------------------
library(BiocParallel)
BPPARAM = MulticoreParam(2)
sce <- ccImpute(sce, BPPARAM = BPPARAM)

## -----------------------------------------------------------------------------
# Recompute PCA reduction of the dataset
reducedDim(sce, "PCA_imputed") <- prcomp(t(assay(sce, "imputed")))$x

# Cluster the PCA reduced dataset and store the assignments
assgmts <- kmeans(reducedDim(sce, "PCA_imputed"), centers = k, 
                    iter.max = 1e+09, nstart = 1000)$cluster

# Use ARI to compare the k-means assignments to label assignments
adjustedRandIndex(assgmts, colData(sce)$cell_id)

## ----eval=FALSE---------------------------------------------------------------
# ccImpute(sce, dist, nCeil = 2000, svdMaxRatio = 0.08,
#             maxSets = 8, k, consMin=0.75, kmNStart, kmMax=1000,
#             fastSolver = TRUE, BPPARAM=bpparam(), verbose = TRUE)

## ----eval=FALSE---------------------------------------------------------------
# cores <- 2
# BPPARAM = MulticoreParam(cores)
# 
# w <- rowVars_fast(logcounts(sce), cores)
# corMat <- getCorM("spearman", logcounts(sce), w, cores)
# 
# v <- doSVD(corMat, svdMaxRatio=.08, nCeil=2000, nCores=cores)
# 
# consMtx <- runKM(logX, v, maxSets = 8, k, consMin=0.75, kmNStart, kmMax=1000,
#                     BPPARAM=bpparam())
# 
# dropIds <- findDropouts(logX, consMtx)
# 
# iLogX <- computeDropouts(consMtx, logX, dropIds,
#                             fastSolver=TRUE, nCores=cores)
# assay(sce, "imputed") <- iLogX

## ----eval=FALSE---------------------------------------------------------------
# corMat <- getCorM("pearson", logcounts(sce), nCores=2)

## ----eval=FALSE---------------------------------------------------------------
# BPPARAM = MulticoreParam(4)
# sce <- ccImpute(sce, BPPARAM = BPPARAM)

## ----eval=FALSE---------------------------------------------------------------
# library(RhpcBLASctl)
# blas_set_num_threads(4)

## ----reproduce3, echo=FALSE-------------------------------------------------------------------------------------------
## Session info
library("sessioninfo")
options(width = 120)
session_info()