## ----setup, include = FALSE--------------------------------------------------- run_everything = FALSE knitr::opts_chunk$set( eval = nzchar(Sys.getenv("run_vignettes")), collapse = TRUE, comment = "#>" ) ## ----eval = FALSE------------------------------------------------------------- # # Install the metaforest package. This needs to be done only once. # install.packages("metaforest") # # Then, load the metaforest package # library(metaforest) # # Assign the fukkink_lont data, which is included in # # the metaforest package, to an object called "data" # data <- fukkink_lont # # Because MetaForest uses the random number generator (for bootstrapping), # # we set a random seed so analyses can be replicated exactly. # set.seed(62) ## ----echo = FALSE, message=FALSE---------------------------------------------- # library(metaforest) # library(caret) # data <- fukkink_lont # set.seed(62) ## ----eval = FALSE------------------------------------------------------------- # # Run model with many trees to check convergence # check_conv <- MetaForest(yi~., # data = data, # study = "id_exp", # whichweights = "random", # num.trees = 20000) # # Plot convergence trajectory # plot(check_conv) ## ----echo = FALSE------------------------------------------------------------- # check_conv <- readRDS("C:/Git_Repositories/S4_meta-analysis/check_conv.RData") # plot(check_conv) ## ----eval=FALSE--------------------------------------------------------------- # # Model with 5000 trees for replication # mf_rep <- MetaForest(yi~., # data = data, # study = "id_exp", # whichweights = "random", # num.trees = 5000) # # Run recursive preselection, store results in object 'preselect' # preselected <- preselect(mf_rep, # replications = 100, # algorithm = "recursive") # # Plot the results # plot(preselected) # # Retain only moderators with positive variable importance in more than # # 50% of replications # retain_mods <- preselect_vars(preselected, cutoff = .5) ## ----echo = FALSE------------------------------------------------------------- # preselected <- readRDS("C:/Git_Repositories/S4_meta-analysis/preselected.RData") # retain_mods <- preselect_vars(preselected, cutoff = .5) ## ----eval = FALSE------------------------------------------------------------- # # Load the caret library # library(caret) # # Set up 10-fold grouped (=clustered) CV # grouped_cv <- trainControl(method = "cv", # index = groupKFold(data$id_exp, k = 10)) # # # Set up a tuning grid for the three tuning parameters of MetaForest # tuning_grid <- expand.grid(whichweights = c("random", "fixed", "unif"), # mtry = 2:6, # min.node.size = 2:6) # # # X should contain only retained moderators, clustering variable, and vi # X <- data[, c("id_exp", "vi", retain_mods)] # # # Train the model # mf_cv <- train(y = data$yi, # x = X, # study = "id_exp", # Name of the clustering variable # method = ModelInfo_mf(), # trControl = grouped_cv, # tuneGrid = tuning_grid, # num.trees = 5000) # # Examine optimal tuning parameters # mf_cv$results[which.min(mf_cv$results$RMSE), ] ## ----echo = FALSE, warning=FALSE---------------------------------------------- # mf_cv <- readRDS("C:/Git_Repositories/S4_meta-analysis/mf_cv.RData") # mf_cv$results[which.min(mf_cv$results$RMSE), ] # # Extract R^2_{cv} for the optimal tuning parameters # r2_cv <- mf_cv$results$Rsquared[which.min(mf_cv$results$RMSE)] ## ----------------------------------------------------------------------------- # # For convenience, extract final model # final <- mf_cv$finalModel # # Extract R^2_{oob} from the final model # r2_oob <- final$forest$r.squared # # Plot convergence # plot(final) ## ----------------------------------------------------------------------------- # # Plot variable importance # VarImpPlot(final) # # Sort the variable names by importance, so that the # # partial dependence plots will be ranked by importance # ordered_vars <- names(final$forest$variable.importance)[ # order(final$forest$variable.importance, decreasing = TRUE)] # # Plot partial dependence # PartialDependence(final, vars = ordered_vars, # rawdata = TRUE, pi = .95)