## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(message = FALSE,warning = FALSE) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(stats) library(MASS) library(bootkmeans) library(abind) library(fclust) library(lmtest) library(ggplot2) library(patchwork) ## ----echo = TRUE, eval = FALSE------------------------------------------------ # library(devtools) # install_github("https://github.com/ghashti-j/bootkmeans") # library(bootkmeans) ## ----------------------------------------------------------------------------- set.seed(1) x <- as.matrix(iris[, -5]) fit <- boot.kmeans( data = x, groups = 3, iterations = 500, itermax = 1000, nstart = 5, verbose = FALSE, maxsamp = 1000 ) ## ----------------------------------------------------------------------------- cat("bootkmeans centres:\n") fit$centers cat("true cluster centers") aggregate(. ~ Species, data = iris, FUN = mean) ## ----------------------------------------------------------------------------- fit$U[1:10, ] ## ----------------------------------------------------------------------------- fit$U[51:60,] ## ----------------------------------------------------------------------------- cat("\n p-value for BG-test: \n") fit$p.value ## ----fig.align='center', fig.width=8,fig.height=8----------------------------- bootk.hardsoftvis(x, fit, plotallvars = TRUE) ## ----------------------------------------------------------------------------- res <- compare.clusters( data = x, groups = 3, nstart = 10, what = "all" ) compare.tables(res, true.labs = iris$Species) ## ----------------------------------------------------------------------------- set.seed(1) numClusters <- 10 numObsPerCluster <- 30 radius <- 6 dim <- 2 centreNumObs <- numObsPerCluster angles <- seq(0, 2 * pi, length.out = numClusters) outerMean <- t(sapply(1:(numClusters - 1), function(i) c(radius * cos(angles[i]), radius * sin(angles[i])))) centreMean <- c(0, 0) allMeans <- rbind(outerMean, centreMean) covMat <- diag(1, dim) data <- do.call(rbind, c(lapply(1:(numClusters - 1), function(i) MASS::mvrnorm(numObsPerCluster, outerMean[i, ], covMat)), list(MASS::mvrnorm(centreNumObs, centreMean, covMat)) )) # within cluster density calculation densities <- sapply(1:numClusters, function(i) mvtnorm::dmvnorm(data, mean = allMeans[i, ], sigma = covMat)) totDensity <- rowSums(densities) # overall densities U <- densities / totDensity # probabilistic (fuzzy) cluster assignments hardU <- apply(U, 1, which.max) # assigning hard cluster labels ## ----fig.align='center', fig.width=8,fig.height=8----------------------------- plotDF <- data.frame(X = data[,1], Y = data[,2], U = 1- apply(U, 1, max), Z = factor(hardU)) ggplot(plotDF, aes(x = X, y = Y, col = Z, size = U)) + geom_point() + theme_classic() + coord_equal() + labs(x = expression(X[1]), y = expression(X[2]), col = "Hard Cluster", size = "Uncertainty") + theme(panel.border = element_rect(NA, "black", 1)) ## ----fig.align='center', fig.width=8,fig.height=8----------------------------- set.seed(1) compareFUZZ <- compare.clusters(data, groups = 10) BKMres <- compareFUZZ$bkm BKMplotDF <- data.frame(X = data[,1], Y = data[,2], U = 1 - apply(BKMres$U, 1, max), Z = factor(BKMres$clusters)) FKMres <- compareFUZZ$fkm FKMplotDF <- data.frame(X = data[,1], Y = data[,2], U = 1 - apply(FKMres$U, 1, max), Z = factor(FKMres$clus[,1])) cpal <- scales::hue_pal()(10) mplots <- function(df, title) { ggplot(df, aes(x = X, y = Y, col = Z, size = U)) + geom_point(alpha = 0.85) + coord_equal() + scale_color_manual(values = cpal) + scale_size_continuous(limits = c(0,1), range = c(1,5)) + labs(x = expression(X[1]), y = expression(X[2]), col = "Cluster", size = "Uncertainty", title = title) + theme_classic() + theme(panel.border = element_rect(fill = NA, color = "black"), legend = "bottom") } p1 <- mplots(BKMplotDF, "bootkmeans") p2 <- mplots(FKMplotDF, "fuzzy cmeans") (p1 + p2) + plot_layout(guides = "collect") & theme(legend.position = "bottom", legend.box = "vertical", legend.box.just = "center") ## ----------------------------------------------------------------------------- cat("bootkmeans clustering accuracy: \n") sum(diag(Thresher::matchLabels(table(BKMres$clusters, hardU))))/nrow(data) cat("\nFuzzy cmeans clustering accuracy: \n") sum(diag(Thresher::matchLabels(table(FKMres$clus[,1], hardU))))/nrow(data) ## ----------------------------------------------------------------------------- cat("bootkmeans FARI: \n") fari(U, BKMres$U) cat("\nFuzzy cmeans FARI: \n") fari(U, FKMres$U)