## ---- echo = FALSE, results = "asis"-------------------------------------------------------------------------------------------- options(width = 130) ## ---- echo = FALSE-------------------------------------------------------------------------------------------------------------- htmltools::img(src = knitr::image_uri("ClassifyRprocedure.png"), style = 'margin-left: auto;margin-right: auto') ## ---- message = FALSE----------------------------------------------------------------------------------------------------------- library(ClassifyR) ## ---- message = FALSE----------------------------------------------------------------------------------------------------------- data(asthma) # Contains measurements and classes variables. measurements[1:5, 1:5] head(classes) ## ------------------------------------------------------------------------------------------------------------------------------- result <- crossValidate(measurements, classes, classifier = "SVM", nFeatures = 20, nFolds = 5, nRepeats = 2, nCores = 1) performancePlot(result) ## ------------------------------------------------------------------------------------------------------------------------------- measurementsDF <- DataFrame(measurements) mcols(measurementsDF) <- data.frame( dataset = rep(c("dataset_1", "dataset_2"), times = c(10, 1990)), feature = colnames(measurementsDF) ) result <- crossValidate(measurementsDF, classes, classifier = "SVM", nFolds = 5, nRepeats = 2, multiViewMethod = "merge") performancePlot(result) ## ---- eval = FALSE-------------------------------------------------------------------------------------------------------------- # # Assigns first 10 variables to dataset_1, and the rest to dataset_2 # measurementsList <- list( # (measurements |> as.data.frame())[1:10], # (measurements |> as.data.frame())[11:2000] # ) # names(measurementsList) <- c("dataset_1", "dataset_2") # # result <- crossValidate(measurementsList, classes, classifier = "SVM", nFolds = 5, # nRepeats = 2, multiViewMethod = "merge") # # performancePlot(result) ## ---- eval = FALSE-------------------------------------------------------------------------------------------------------------- # CVparams <- CrossValParams(parallelParams = SnowParam(16, RNGseed = 123)) # CVparams ## ------------------------------------------------------------------------------------------------------------------------------- ModellingParams() ## ---- tidy = FALSE-------------------------------------------------------------------------------------------------------------- crossValParams <- CrossValParams(permutations = 5) DMresults <- runTests(measurements, classes, crossValParams, verbose = 1) DMresults ## ---- fig.height = 8, fig.width = 8, results = "hold", message = FALSE---------------------------------------------------------- selectionPercentages <- distribution(DMresults, plot = FALSE) head(selectionPercentages) sortedPercentages <- sort(selectionPercentages, decreasing = TRUE) head(sortedPercentages) mostChosen <- names(sortedPercentages)[1] bestGenePlot <- plotFeatureClasses(measurements, classes, mostChosen, dotBinWidth = 0.1, xAxisLabel = "Normalised Expression") ## ------------------------------------------------------------------------------------------------------------------------------- DMresults <- calcCVperformance(DMresults, "Balanced Error") DMresults performance(DMresults) ## ---- tidy = FALSE-------------------------------------------------------------------------------------------------------------- modellingParamsDD <- ModellingParams(selectParams = SelectParams(KullbackLeiblerRanking), trainParams = TrainParams(naiveBayesKernel), predictParams = NULL) DDresults <- runTests(measurements, classes, crossValParams, modellingParamsDD, verbose = 1) DDresults ## ---- fig.width = 10, fig.height = 7-------------------------------------------------------------------------------------------- library(grid) DMresults <- calcCVperformance(DMresults, "Sample Error") DDresults <- calcCVperformance(DDresults, "Sample Error") resultsList <- list(Abundance = DMresults, Distribution = DDresults) errorPlot <- samplesMetricMap(resultsList, metric = "Sample Error", xAxisLabel = "Sample", showXtickLabels = FALSE, plot = FALSE) grid.newpage() grid.draw(errorPlot) ## ------------------------------------------------------------------------------------------------------------------------------- performancePlot(list(DMresults, DDresults)) ## ------------------------------------------------------------------------------------------------------------------------------- rankOverlaps <- rankingPlot(list(DDresults), topRanked = 1:100, xLabelPositions = c(1, seq(10, 100, 10)), plot = FALSE) rankOverlaps ## ---- fig.height = 5, fig.width = 6--------------------------------------------------------------------------------------------- ROCcurves <- ROCplot(list(DMresults, DDresults), fontSizes = c(24, 12, 12, 12, 12)) ## ------------------------------------------------------------------------------------------------------------------------------- tuneList <- list(cost = c(0.01, 0.1, 1, 10)) SVMparams <- ModellingParams(trainParams = TrainParams(SVMtrainInterface, kernel = "linear", tuneParams = tuneList), predictParams = PredictParams(SVMpredictInterface)) SVMresults <- runTests(measurements, classes, crossValParams, SVMparams) ## ------------------------------------------------------------------------------------------------------------------------------- length(tunedParameters(SVMresults)) tunedParameters(SVMresults)[1:5]