| Title: | Interpret Tree Ensembles | 
| Version: | 1.4 | 
| Date: | 2024-04-22 | 
| Imports: | RRF, arules, gbm, xtable, xgboost, data.table, methods | 
| Description: | For tree ensembles such as random forests, regularized random forests and gradient boosted trees, this package provides functions for: extracting, measuring and pruning rules; selecting a compact rule set; summarizing rules into a learner; calculating frequent variable interactions; formatting rules in latex code. Reference: Interpreting tree ensembles with inTrees (Houtao Deng, 2019, <doi:10.1007/s41060-018-0144-8>). | 
| Maintainer: | Houtao Deng <softwaredeng@gmail.com> | 
| BugReports: | https://github.com/softwaredeng/inTrees/issues | 
| License: | GPL (≥ 3) | 
| Packaged: | 2024-04-23 05:26:26 UTC; houtaodeng | 
| NeedsCompilation: | no | 
| Repository: | CRAN | 
| Date/Publication: | 2024-04-23 06:10:03 UTC | 
| Author: | Houtao Deng [aut, cre], Xin Guan [aut], Vadim Khotilovich [aut] | 
Transform gbm object to a list of trees
Description
Transform gbm object to a list of trees that can be used for rule condition extraction
Usage
GBM2List(gbm1,X)
Arguments
gbm1 | 
 gbm object  | 
X | 
 predictor variable matrix  | 
Value
a list of trees in an inTrees-required format
See Also
Examples
    library(gbm)
    data(iris)
    X <- iris[,1:(ncol(iris)-1)]
    target <- iris[,"Species"] 
    gbmFit <- gbm(Species~ ., data=iris, n.tree = 400,
                    interaction.depth = 10,distribution="multinomial")
    treeList <- GBM2List(gbmFit,X)
    ruleExec = extractRules(treeList,X)
    ruleExec <- unique(ruleExec)
    #ruleExec <- ruleExec[1:min(2000,length(ruleExec)),,drop=FALSE]
    ruleMetric <- getRuleMetric(ruleExec,X,target)
    ruleMetric <- pruneRule(ruleMetric,X,target)
    ruleMetric <- unique(ruleMetric)
    learner <- buildLearner(ruleMetric,X,target)
    pred <- applyLearner(learner,X)
    readableLearner <- presentRules(learner,colnames(X)) # more readable format
    err <- 1-sum(pred==target)/length(pred);
internal function
Description
internal function
Usage
Num2Level(rfList, splitV)
Arguments
rfList | 
|
splitV | 
Value
data frame with numeric variables converted to categorical variables.
Examples
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.
## The function is currently defined as
function (rfList, splitV) 
{
    for (i in 1:rfList$ntree) {
        rfList$list[[i]] <- data.frame(rfList$list[[i]])
        rfList$list[[i]][, "prediction"] <- data.frame(dicretizeVector(rfList$list[[i]][, 
            "prediction"], splitV))
        colnames(rfList$list[[i]]) <- c("left daughter", "right daughter", 
            "split var", "split point", "status", "prediction")
    }
    return(rfList)
  }
Transform a random forest object to a list of trees
Description
Transform a random forest object to a list of trees
Usage
RF2List(rf)
Arguments
rf | 
 random forest object  | 
Value
a list of trees
See Also
Examples
library(RRF)
data(iris)
X <- iris[,1:(ncol(iris)-1)]
target <- iris[,"Species"] 
rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF 
treeList <- RF2List(rf)
ruleExec <- extractRules(treeList,X) # transform to R-executable rules
Transform an xgboost object to a list of trees
Description
Transform an xgboost object to a list of trees
Usage
XGB2List(xgb, X)
Arguments
xgb | 
 xgboost object  | 
X | 
 predictor variable matrix  | 
Value
a list of trees in an inTrees-required format
See Also
Examples
	library(data.table)
	library(xgboost)
	# test data set 1: iris
	X <- within(iris,rm("Species")); Y <- iris[,"Species"]
	X <- within(iris,rm("Species")); Y <- iris[,"Species"]
	model_mat <- model.matrix(~. -1, data=X)
	xgb <- xgboost(model_mat, label = as.numeric(Y) - 1, nrounds = 20, 
		objective = "multi:softprob", num_class = 3 )
	tree_list <- XGB2List(xgb,model_mat)
apply a simplified tree ensemble learner (STEL) to data
Description
apply STEL to data and get predictions
Usage
applyLearner(learner, X)
Arguments
learner | 
 a matrix with rules ordered by priority  | 
X | 
 predictor variable matrix  | 
Value
predictions for the data
See Also
build a simplified tree ensemble learner (STEL)
Description
Build a simplified tree ensemble learner (STEL). Currently works only for classification problems.
Usage
buildLearner(ruleMetric, X, target, minFreq = 0.01)
Arguments
ruleMetric | 
 a matrix including the conditions, predictions, and and metrics  | 
X | 
 predictor variable matrix  | 
target | 
 target variable  | 
minFreq | 
 minimum frequency of a rule condition in order to be included in STEL.  | 
Value
a matrix including the conditions, prediction, and metrics, ordered by priority.
Author(s)
Houtao Deng
References
Houtao Deng, Interpreting Tree Ensembles with inTrees, technical report, 2014
Examples
data(iris)
library(RRF)
X <- iris[,1:(ncol(iris)-1)]
target <- iris[,"Species"] 
rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF 
treeList <- RF2List(rf)
ruleExec <- extractRules(treeList,X)
ruleExec <- unique(ruleExec)
ruleMetric <- getRuleMetric(ruleExec,X,target) # measure rules
ruleMetric <- pruneRule(ruleMetric,X,target) # prune each rule
#ruleMetric <- selectRuleRRF(ruleMetric,X,target) # rule selection
learner <- buildLearner(ruleMetric,X,target)
pred <- applyLearner(learner,X)
read <- presentRules(learner,colnames(X)) # more readable format
# format the rule and metrics as a table in latex code
library(xtable)
print(xtable(read), include.rownames=FALSE)
print(xtable(ruleMetric[1:2,]), include.rownames=FALSE)
compute rule information
Description
compute rule information
Usage
computeRuleInfor(instIx, pred, target)
Arguments
instIx | 
 
 indices of the intances  | 
pred | 
 prediction from a rule  | 
target | 
 target values for the instances  | 
Value
return error and frequency
Examples
	# this is an internal function.
Simulate data
Description
Simulate data
Usage
dataSimulate(flag = 1, nCol = 20, nRow = 1000)
Arguments
flag | 
 1 (default): team optimization; 2: non-linear; 3: linear.  | 
nCol | 
 the number of columns in the data set. must >= 2.  | 
nRow | 
 the number of rows in the data set.  | 
Value
predictor variable matrix and target variable
Examples
res <- dataSimulate(flag=1)
X <- res$X; 
target <- res$target
discretize a variable
Description
discretize a variable
Usage
dicretizeVector(v, K = 3)
Arguments
v | 
 
 vector  | 
K | 
 discretize into up to K levels with equal frequency  | 
Value
discretized levels for v
Examples
 data(iris)
 dicretizeVector(iris[,1],3)
Extract rules from a list of trees
Description
Extract rule conditions from a list of trees. Use functions RF2List/GBM2List to transform RF/GBM objects to list of trees.
Usage
extractRules(treeList, X, ntree = 100, maxdepth = 6, random = FALSE, digits = NULL)
Arguments
treeList | 
 tree list  | 
X | 
 predictor variable matrix  | 
ntree | 
 conditions are extracted from the first ntree trees  | 
maxdepth | 
 conditions are extracted from the top maxdepth levels from each tree  | 
random | 
 the max depth for each tree is an integer randomly chosen between 1 and maxdepth  | 
digits | 
 digits for rounding  | 
Value
a set of rule conditions
Examples
    library(RRF)
    data(iris)
    X <- iris[,1:(ncol(iris)-1)]
    target <- iris[,"Species"] 
    rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF 
    treeList <- RF2List(rf)
    ruleExec <- extractRules(treeList,X,digits=4) # transform to R-executable rules
    ruleExec <- unique(ruleExec)
internal
Description
internal
Usage
formatGBM(gbmList, splitBin,X)
Arguments
gbmList | 
|
splitBin | 
|
X | 
 predictor variable matrix  | 
Value
No return value
Examples
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.
## The function is currently defined as
function (gbmList, splitBin,X) 
{
    for (j in 1:length(gbmList$list)) {
        a <- gbmList$list[[j]]
        rownames(a) <- 1:nrow(a)
        a$status <- a$SplitVar
        a <- a[, c("LeftNode", "RightNode", "MissingNode", "SplitVar", 
            "SplitCodePred", "status")]
        a[which(a[, "SplitVar"] >= 0), c("SplitVar", "LeftNode", 
            "RightNode", "MissingNode")] <- a[which(a[, "SplitVar"] >= 
            0), c("SplitVar", "LeftNode", "RightNode", "MissingNode")] + 
            1
        ix <- a$MissingNode[which(a$MissingNode > 0)]
        if (length(ix) > 0) 
            a$status[ix] <- 10
        a <- a[, c("LeftNode", "RightNode", "SplitVar", "SplitCodePred", 
            "status")]
        cat <- which(sapply(X, is.factor) & !sapply(X, is.ordered))
        ix <- which(a[, "SplitVar"] %in% cat)
        for (i in ix) a[i, "SplitCodePred"] <- splitBin[a[i, 
            "SplitCodePred"] + 1]
        colnames(a) <- c("left daughter", "right daughter", "split var", 
            "split point", "status")
        gbmList$list[[j]] <- a
    }
    return(gbmList)
  }
calculate frequent variable interactions
Description
calculate frequent variable interactions
Usage
getFreqPattern(ruleMetric, minsup = 0.01, minconf = 0.5, minlen = 1, maxlen = 4)
Arguments
ruleMetric | 
 a matrix including conditions, predictions, and the metrics  | 
minsup | 
 minimum support of conditions in a tree ensemble  | 
minconf | 
 minimum confidence of the rules  | 
minlen | 
 minimum length of the conditions  | 
maxlen | 
 max length of the conditions  | 
Value
a matrix including frequent variable interations (in a form of conditions), predictions, length, support, and confidence.
Examples
library(RRF)
library(arules)
data(iris)
X <- iris[,1:(ncol(iris)-1)]
target <- iris[,"Species"] 
rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF 
treeList <- RF2List(rf)
ruleExec <- extractRules(treeList,X) # transform to R-executable rules
ruleMetric <- getRuleMetric(ruleExec,X,target) 
freqPattern <- getFreqPattern(ruleMetric)
freqPatternMetric <- getRuleMetric(freqPattern,X,target)
Assign outcomes to a conditions, and measure the rules
Description
Assign outcomes to a conditions, and measure the rules
Usage
getRuleMetric(ruleExec, X, target)
Arguments
ruleExec | 
 a set of rule conditions  | 
X | 
 predictor variable matrix  | 
target | 
 target variable  | 
Value
a matrix including the condictions, predictions, and metrics
References
Houtao Deng, Interpreting Tree Ensembles with inTrees, technical report, 2014
Examples
library(RRF)
data(iris)
X <- iris[,1:(ncol(iris)-1)]
target <- iris[,"Species"] 
rf <- RRF(X,as.factor(target),ntree=100) # build an ordinary RF 
treeList <- RF2List(rf)
ruleExec <- extractRules(treeList,X) # transform to R-executable rules
ruleExec <- unique(ruleExec)
ruleMetric <- getRuleMetric(ruleExec,X,target) # measure rules
get type of each variable
Description
get type of each variable: numeric or categorical
Usage
getTypeX(X)
Arguments
X | 
Value
A vector indicating each variable's type: numeric: 1; categorical: 2
internal
Description
internal
Usage
lookupRule(rules, strList)
Arguments
rules | 
|
strList | 
Value
rules that matched to strList
Examples
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.
## The function is currently defined as
function (rules, strList) 
{
    ix <- grep(strList[1], rules[, "condition"])
    if (length(strList) >= 2) {
        for (i in 2:length(strList)) {
            ix2 <- grep(strList[i], rules[, "condition"])
            ix <- intersect(ix, ix2)
        }
    }
    if (length(ix) >= 1) 
        return(rules[ix, , drop = FALSE])
    if (length(ix) == 0) 
        return(NULL)
  }
internal
Description
internal
Usage
measureRule(ruleExec, X, target, pred = NULL, regMethod = "mean")
Arguments
ruleExec | 
|
X | 
|
target | 
|
pred | 
|
regMethod | 
Value
data frame including rule's length, frequency, error, rule condition and prediction
Examples
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.
## The function is currently defined as
function (ruleExec, X, target, pred = NULL) 
{
    len <- length(unlist(strsplit(ruleExec, split = " & ")))
    origRule <- ruleExec
    ruleExec <- paste("which(", ruleExec, ")")
    ixMatch <- eval(parse(text = ruleExec))
    if (length(ixMatch) == 0) {
        v <- c("-1", "-1", "-1", "", "")
        names(v) <- c("len", "freq", "err", "condition", "pred")
        return(v)
    }
    ys <- target[ixMatch]
    freq <- round(length(ys)/nrow(X), digits = 3)
    if (is.numeric(target)) {
        ysMost <- mean(ys)
        err <- sum((ysMost - ys)^2)/length(ys)
    }
    else {
        if (length(pred) > 0) {
            ysMost = pred
        }
        else {
            ysMost <- names(which.max(table(ys)))
        }
        conf <- round(table(ys)[ysMost]/sum(table(ys)), digits = 3)
        err <- 1 - conf
    }
    rule <- origRule
    v <- c(len, freq, err, rule, ysMost)
    names(v) <- c("len", "freq", "err", "condition", "pred")
    return(v)
  }
Present a learner using column names instead of X[i,]
Description
Present a learner using column names instead of X[i,]
Usage
presentRules(rules, colN, digits)
Arguments
rules | 
 a set of rules  | 
colN | 
 a vector including the column names  | 
digits | 
 digits for rounding  | 
Value
a matrix including the conditions (with column names), etc.
See Also
Examples
 # See function "buildLearner"
Prune irrevant variable-value pair from a rule condition
Description
Prune irrevant variable-value pair from a rule condition
Usage
pruneRule(rules, X, target, maxDecay = 0.05, typeDecay = 2)
Arguments
rules | 
 A metrix including the rules and metrics  | 
X | 
 predictor variable matrix  | 
target | 
 target variable vector  | 
maxDecay | 
 threshold of decay  | 
typeDecay | 
 1: relative error; 2: error; default :2  | 
Value
A matrix including the rules each being pruned, and metrics
Author(s)
Houtao Deng
References
Houtao Deng, Interpreting Tree Ensembles with inTrees, technical report, 2014
See Also
Examples
# see function "buildLearner"
internal
Description
internal
Usage
pruneSingleRule(rule, X, target, maxDecay, typeDecay)
Arguments
rule | 
|
X | 
|
target | 
|
maxDecay | 
|
typeDecay | 
Value
a pruned rule and its metrics.
Examples
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.
## The function is currently defined as
function (rule, X, target, maxDecay, typeDecay) 
{
    newRuleMetric <- measureRule(rule["condition"], X, target)
    errOrig <- as.numeric(newRuleMetric["err"])
    ruleV <- unlist(strsplit(rule["condition"], split = " & "))
    pred <- rule["pred"]
    if (length(ruleV) == 1) 
        return(newRuleMetric)
    for (i in length(ruleV):1) {
        restRule <- ruleV[-i]
        restRule <- paste(restRule, collapse = " & ")
        metricTmp <- measureRule(restRule, X, target, pred)
        errNew <- as.numeric(metricTmp["err"])
        if (typeDecay == 1) {
            decay <- (errNew - errOrig)/max(errOrig, 1e-06)
        }
        else {
            decay <- (errNew - errOrig)
        }
        if (decay <= maxDecay) {
            ruleV <- ruleV[-i]
            newRuleMetric <- metricTmp
            if (length(ruleV) <= 1) 
                break
        }
    }
    return(newRuleMetric)
  }
internal function
Description
internal function
Usage
rule2Table(ruleExec, X, target)
Arguments
ruleExec | 
|
X | 
|
target | 
Value
a matrix of indicators matching each rule condition and each row of data
Examples
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.
## The function is currently defined as
function (ruleExec, X, target) 
{
    I <- rep(0, nrow(X))
    ruleExec <- paste("which(", ruleExec, ")")
    ixMatch <- eval(parse(text = ruleExec))
    if (length(ixMatch) > 0) 
        I[ixMatch] <- 1
    names(I) = NULL
    return(I)
  }
internal
Description
internal
Usage
ruleList2Exec(X, allRulesList)
Arguments
X | 
|
allRulesList | 
Value
data frame containing rule conditions
Examples
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.
## The function is currently defined as
function (X, allRulesList) 
{
    typeX = getTypeX(X)
    ruleExec <- unique(t(sapply(allRulesList, singleRuleList2Exec, 
        typeX = typeX)))
    ruleExec <- t(ruleExec)
    colnames(ruleExec) <- "condition"
    return(ruleExec)
  }
select a set of relevant and non-redundant rules
Description
select a set of relevant and non-redundant rules using regularized random forests
Usage
selectRuleRRF(ruleMetric, X, target)
Arguments
ruleMetric | 
 a matrix including the rules and metrics  | 
X | 
 predictor variable matrix  | 
target | 
 response variable  | 
Value
a matrix including a set of relevant and non-redundant rules, and their metrics
Author(s)
Houtao Deng
See Also
Examples
 # See function "buildLearner:
internal
Description
internal
Usage
singleRuleList2Exec(ruleList, typeX)
Arguments
ruleList | 
|
typeX | 
Value
data frame containing rule conditions
Examples
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.
## The function is currently defined as
function (ruleList, typeX) 
{
    ruleExec <- ""
    vars <- ls(ruleList)
    vars <- vars[order(as.numeric(vars))]
    for (i in 1:length(vars)) {
        if (typeX[as.numeric(vars[i])] == 2) {
            values <- paste("c(", paste(paste("'", ruleList[[vars[i]]], 
                "'", sep = ""), collapse = ","), ")", sep = "")
            tmp = paste("X[,", vars[i], "] %in% ", values, sep = "")
        }
        else {
            tmp = ruleList[[vars[i]]]
        }
        if (i == 1) 
            ruleExec <- paste(ruleExec, tmp, sep = "")
        if (i > 1) 
            ruleExec <- paste(ruleExec, " & ", tmp, sep = "")
    }
    return(c(ruleExec))
  }
internal
Description
internal
Usage
sortRule(M, decreasing = TRUE)
Arguments
M | 
|
decreasing | 
Value
sorted rule conditions
Examples
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.
## The function is currently defined as
function (M, decreasing = TRUE) 
{
    qIx = order((1 - as.numeric(ruleMetric[, "err"])), as.numeric(ruleMetric[, 
        "freq"]), -as.numeric(ruleMetric[, "len"]), decreasing = decreasing)
    return(M[qIx, ])
  }
internal function
Description
internal function
Usage
treeVisit(tree, rowIx, count, ruleSet, rule, levelX, length, max_length, digits)
Arguments
tree | 
|
rowIx | 
|
count | 
|
ruleSet | 
|
rule | 
|
levelX | 
|
length | 
|
max_length | 
|
digits | 
Value
a list containing rules and the count
internal
Description
Predictions from a rule set
Usage
voteAllRules(ruleMetric, X, type = "r", method = "median")
Arguments
ruleMetric | 
 rules and metrics  | 
X | 
 predictor variable matrix  | 
type | 
 regression or classification  | 
method | 
 for regression, use median or average  | 
Value
predictions from the rule set
Examples
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.
## The function is currently defined as
function (ruleMetric, X, type = "r", method = "median") 
{
    xVoteList = vector("list", nrow(X))
    predY <- rep("", nrow(X))
    for (i in 1:nrow(ruleMetric)) {
        ixMatch <- eval(parse(text = paste("which(", ruleMetric[i, 
            "condition"], ")")))
        if (length(ixMatch) == 0) 
            next
        for (ii in ixMatch) {
            xVoteList[[ii]] = c(xVoteList[[ii]], ruleMetric[i, 
                "pred"])
        }
    }
    for (i in 1:length(xVoteList)) {
        thisV <- xVoteList[[i]]
        if (length(thisV) == 0) 
            next
        if (type == "c") 
            predY[i] <- names(table(thisV)[which.max(table(thisV))])
        if (type == "r") {
            thisV = as.numeric(thisV)
            if (method == "median") {
                predY[i] <- median(thisV)
            }
            else {
                predY[i] <- mean(thisV)
            }
        }
    }
    if (type == "r") 
        predY <- as.numeric(predY)
    return(predY)
  }