RCSL
is an R toolkit for single-cell clustering and trajectory analysis using single-cell RNA-seq data.
RCSL
can be installed directly from GitHub with ‘devtools’.
library(devtools)
devtools::install_github("QinglinMei/RCSL")
Now we can load RCSL
. We also load the SingleCellExperiment
, ggplot2
and igraph
package.
library(RCSL)
library(SingleCellExperiment)
#> Loading required package: SummarizedExperiment
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#>
#> Attaching package: 'MatrixGenerics'
#> The following objects are masked from 'package:matrixStats':
#>
#> colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#> colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#> colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#> colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#> colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#> colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#> colWeightedMeans, colWeightedMedians, colWeightedSds,
#> colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#> rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#> rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#> rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#> rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#> rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#> rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#> rowWeightedSds, rowWeightedVars
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#>
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:stats':
#>
#> IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#>
#> Filter, Find, Map, Position, Reduce, anyDuplicated, append,
#> as.data.frame, basename, cbind, colnames, dirname, do.call,
#> duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
#> lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
#> pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
#> tapply, union, unique, unsplit, which.max, which.min
#> Loading required package: S4Vectors
#>
#> Attaching package: 'S4Vectors'
#> The following objects are masked from 'package:base':
#>
#> I, expand.grid, unname
#> Loading required package: IRanges
#> Loading required package: GenomeInfoDb
#> Loading required package: Biobase
#> Welcome to Bioconductor
#>
#> Vignettes contain introductory material; view with
#> 'browseVignettes()'. To cite Bioconductor, see
#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
#>
#> Attaching package: 'Biobase'
#> The following object is masked from 'package:MatrixGenerics':
#>
#> rowMedians
#> The following objects are masked from 'package:matrixStats':
#>
#> anyMissing, rowMedians
library(ggplot2)
library(igraph)
#>
#> Attaching package: 'igraph'
#> The following object is masked from 'package:GenomicRanges':
#>
#> union
#> The following object is masked from 'package:IRanges':
#>
#> union
#> The following object is masked from 'package:S4Vectors':
#>
#> union
#> The following objects are masked from 'package:BiocGenerics':
#>
#> normalize, path, union
#> The following objects are masked from 'package:stats':
#>
#> decompose, spectrum
#> The following object is masked from 'package:base':
#>
#> union
library(umap)
We illustrate the usage of RCSL on a human preimplantation embryos and embryonic stem cells(Yan et al., (2013)). The yan data is distributed together with the RCSL package, with 90 cells and 20,214 genes:
head(ann)
#> cell_type1
#> Oocyte..1.RPKM. zygote
#> Oocyte..2.RPKM. zygote
#> Oocyte..3.RPKM. zygote
#> Zygote..1.RPKM. zygote
#> Zygote..2.RPKM. zygote
#> Zygote..3.RPKM. zygote
yan[1:3, 1:3]
#> Oocyte..1.RPKM. Oocyte..2.RPKM. Oocyte..3.RPKM.
#> C9orf152 0.0 0.0 0.0
#> RPS11 1219.9 1021.1 931.6
#> ELMO2 7.0 12.2 9.3
origData <- yan
label <- ann$cell_type1
In practice, we find it always beneficial to pre-process single-cell RNA-seq datasets, including: 1. Log transformation. 2. Gene filter
data <- log2(as.matrix(origData) + 1)
gfData <- GenesFilter(data)
resSimS <- SimS(gfData)
#> Calculate the Spearman correlation
#> Calculate the Nerighbor Representation
#> Find neighbors by KNN(Euclidean)
Estimated_C <- EstClusters(resSimS$drData,resSimS$S)
#> ======== Calculate maximal strongly connected components ========
#> ======== Calculate maximal strongly connected components ========
#> ======== Calculate maximal strongly connected components ========
resBDSM <- BDSM(resSimS$S, Estimated_C)
#> ======== Calculate maximal strongly connected components ========
ARI_RCSL <- igraph::compare(resBDSM$y, label, method = "adjusted.rand")
DataName <- "Yan"
res_TrajecAnalysis <- TrajectoryAnalysis(gfData, resSimS$drData, resSimS$S,
clustRes = resBDSM$y, TrueLabel = label,
startPoint = 1, dataName = DataName)
res_TrajecAnalysis$MSTPlot
res_TrajecAnalysis$PseudoTimePlot
res_TrajecAnalysis$TrajectoryPlot