## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(scellpam)

## -----------------------------------------------------------------------------
# Initially, state of debug is FALSE. Turn it on with
ScellpamSetDebug(TRUE,debparpam=TRUE,debjmat=TRUE)
# We have also turned on debugging of the parallel PAM algorithm and of the binary matrix creation/manipulation
# but if this annoys you their default value is FALSE for both cases even when the scellpam debug is set to true, so just do
# ScellpamSetDebug(TRUE)

## ----eval=FALSE---------------------------------------------------------------
#  CsvToJMat("countsfile.csv","countsfile.bin")

## ----eval=FALSE---------------------------------------------------------------
#  CsvToJMat("countsfile.csv","countsfile.bin",mtype="sparse",csep=",",
#            ctype="raw",valuetype="float",transpose=FALSE,comment="")

## ----eval=FALSE---------------------------------------------------------------
#  CsvToJMat("countsfile.csv","countsfile.bin",ctype="log1n",transpose=TRUE,
#              comment="Obtained from countsfile.csv")

## ----eval=FALSE---------------------------------------------------------------
#  JMatToCsv("countsfile.bin","countsback.csv",csep=",",withquotes=FALSE)

## ----eval=FALSE---------------------------------------------------------------
#  q <- readRDS("yourdata.rds")
#  dgCMatToJMat(q@assays$RNA@counts,"countsfile.bin",transpose=TRUE,
#               comment="Obtained from yourdata.rds")

## ----eval=FALSE---------------------------------------------------------------
#  dgCMatToJMat(q@raw.data,"countsfile.bin",transpose=TRUE,
#               comment="Obtained from other data")

## ----eval=FALSE---------------------------------------------------------------
#  Gr=GetSeuratGroups(q)

## ----eval=FALSE---------------------------------------------------------------
#  suppressPackageStartupMessages({
#     library(splatter)
#     library(scater)
#  })
#  set.seed(1)
#  Splattersce <- mockSCE()
#  SceToJMat(Splattersce@assays@data@listData$counts,"mockSCE.bin",
#             mtype="full",ctype="log1n",transpose=TRUE,
#             comment="Generated by splatter with mockSCE() and normalized
#                      to log2(counts+1).")

## ----eval=FALSE---------------------------------------------------------------
#  suppressPackageStartupMessages({
#     library(DuoClustering2018)
#  })
#  KuTCCsce <- sce_filteredExpr10_KumarTCC()
#  SceToJMat(KuTCCsce@assays$data@listData$counts,"KuTCC.bin",
#            mtype="full",ctype="log1n",transpose=TRUE,
#            comment="Generated by DuoCLustering with Kumar data and
#                  normalized to log2(counts+1).")

## -----------------------------------------------------------------------------
tmpfile=paste0(tempdir(),"/Trapnell.bin")
CsvToJMat("extdata/conquer_GSE52529_Trapnell_sample.csv",tmpfile,
          transpose=TRUE,comment="Experiment conquer GSE52529-GPL16791")
JMatInfo(tmpfile)

## -----------------------------------------------------------------------------
tmpfile=paste0(tempdir(),"/Trapnell.bin")
tmptxtfile=paste0(tempdir(),"/TrapnellInfo.txt")
JMatInfo(tmpfile,tmptxtfile)

## -----------------------------------------------------------------------------
v<-as.vector(read.table("extdata/Trapnell_remain_genes.csv")[[1]])
tmpfile1=paste0(tempdir(),"/Trapnell.bin")
tmpfiltfile1=paste0(tempdir(),"/Trapnell_filtered.bin")
FilterJMatByName(tmpfile1,v,tmpfiltfile1,namesat="cols")

## ----eval=FALSE---------------------------------------------------------------
#  tmpfile1=paste0(tempdir(),"/Trapnell.bin")
#  tmppeafile1=paste0(tempdir(),"/TrapnellPearson.bin")
#  CalcAndWriteDissimilarityMatrix(tmpfile1,tmppeafile1,distype="Pearson",nthreads=0,
#                  comment="Pearson dissimilarities for cell in experiment conquer GSE52529-GPL16791")

## -----------------------------------------------------------------------------
tmpfile1=paste0(tempdir(),"/Trapnell.bin")
tmppeafile1=paste0(tempdir(),"/TrapnellPearson.bin")
CalcAndWriteDissimilarityMatrix(tmpfile1,tmppeafile1,nthreads=-1,
                                comment="Pearson dissimilarities for cell
                                         in experiment conquer
                                         GSE52529-GPL16791")

## -----------------------------------------------------------------------------
JMatInfo(tmppeafile1)

## -----------------------------------------------------------------------------
tmppeafile1=paste0(tempdir(),"/TrapnellPearson.bin")
L=ApplyPAM(tmppeafile1,k=10,init_method="BUILD",
           max_iter=1000,nthreads=-1)

## -----------------------------------------------------------------------------
tmppeafile1=paste0(tempdir(),"/TrapnellPearson.bin")
Lbuild=ApplyPAM(tmppeafile1,k=10,init_method="BUILD",max_iter=0)
Llab1=ApplyPAM(tmppeafile1,k=10,init_method="LAB",max_iter=0)
Llab2=ApplyPAM(tmppeafile1,k=10,init_method="LAB",max_iter=0)

## -----------------------------------------------------------------------------
tmppeafile1=paste0(tempdir(),"/TrapnellPearson.bin")
Llab2Final=ApplyPAM(tmppeafile1,k=10,init_method="PREV",
                    initial_med=Llab2$med,nthreads=-1)

## -----------------------------------------------------------------------------
# Which are the indexes of the points chosen as medoids?
L$med
#
# In which class has point 147 been classified?
L$clasif[147]
#
# And which is the index (row in the dissimilarity matrix) of the medoid closest to point 147?
L$med[L$clasif[147]]

## -----------------------------------------------------------------------------
min(L$clasif)
max(L$clasif)

## -----------------------------------------------------------------------------
tmppeafile1=paste0(tempdir(),"/TrapnellPearson.bin")
F <- ClassifAsDataFrame(L,tmppeafile1)

## -----------------------------------------------------------------------------
# Extract column 1 (cell name) of those rows for which distance to
# closest medoid (column 3) is 0
F[which(F[,3]==0),1]

## ----eval=FALSE---------------------------------------------------------------
#  M=BuildAbundanceMatrix(L$clasif,Gr)

## -----------------------------------------------------------------------------
tmppeafile1=paste0(tempdir(),"/TrapnellPearson.bin")
S=CalculateSilhouette(Llab2$clasif,tmppeafile1,nthreads=-1)

## -----------------------------------------------------------------------------
Sclus <- NumSilToClusterSil(Llab2$clasif,S)
library(cluster)
plot(Sclus)

## -----------------------------------------------------------------------------
tmpfile1=paste0(tempdir(),"/Trapnell.bin")
tmpfiltfile1=paste0(tempdir(),"/TrapnellFilt.bin")
tmppeafile1=paste0(tempdir(),"/TrapnellPearson.bin")
tmppeafiltfile1=paste0(tempdir(),"/TrapnellPearsonFilt.bin")
Lfilt=FilterBySilhouetteQuantile(S,Llab2,tmpfile1,tmpfiltfile1,
                                 tmppeafile1,tmppeafiltfile1,0.2)

## -----------------------------------------------------------------------------
tmppeafiltfile1=paste0(tempdir(),"/TrapnellPearsonFilt.bin")
Ffilt=ClassifAsDataFrame(Lfilt,tmppeafiltfile1)

## -----------------------------------------------------------------------------
tmpfiltfile1=paste0(tempdir(),"/TrapnellFilt.bin")
tmpfiltcsvfile1=paste0(tempdir(),"/TrapnellFilt.csv")
JMatToCsv(tmpfiltfile1,tmpfiltcsvfile1)

## -----------------------------------------------------------------------------
tmppeafiltfile1=paste0(tempdir(),"/TrapnellPearsonFilt.bin")
Lfinal=ApplyPAM(tmppeafiltfile1,k=length(Lfilt$med),init_method="PREV",initial_med=Lfilt$med,nthreads=-1)