## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 5 ) ## ----------------------------------------------------------------------------- set.seed(123) n <- 100 # Number of observations p <- 10 # Number of predictors g <- 5 # Number of groups group <- rep(1:g, each = p / g) # Group structure beta <- numeric(p) beta[which(group %in% 1:2)] <- 1 # First two groups are nonzero x <- matrix(rnorm(n * p), n, p) y <- x %*% beta + rnorm(n) ## ----------------------------------------------------------------------------- library(grpsel) fit <- grpsel(x, y, group) ## ----------------------------------------------------------------------------- coef(fit) ## ----------------------------------------------------------------------------- plot(fit) ## ----------------------------------------------------------------------------- x.new <- matrix(rnorm(10 * p), 10, p) predict(fit, x.new) ## ----------------------------------------------------------------------------- fit <- grpsel(x, y, group, penalty = 'grSubset+grLasso') coef(fit, lambda = 0.05, gamma = 0.1) fit <- grpsel(x, y, group, penalty = 'grSubset+Ridge') coef(fit, lambda = 0.05, gamma = 0.1) ## ----------------------------------------------------------------------------- fit <- cv.grpsel(x, y, group, penalty = 'grSubset+Ridge', nfold = 10) # 10-fold cross-validation ## ----------------------------------------------------------------------------- plot(fit) ## ----------------------------------------------------------------------------- coef(fit) predict(fit, x.new) ## ----------------------------------------------------------------------------- y <- rbinom(n, 1, 1 / (1 + exp(- x %*% beta))) fit <- cv.grpsel(x, y, group, loss = 'logistic') coef(fit) ## ----------------------------------------------------------------------------- x <- matrix(rnorm(n * p), n, p) y <- rowSums(x) + rnorm(n) group <- list(1:6, 5:10) fit <- grpsel(x, y, group) ## ----------------------------------------------------------------------------- coef(fit) ## ----------------------------------------------------------------------------- group <- rep(1:g, each = p / g) x <- matrix(rnorm(n * p), n, p) + matrix(rnorm(n), n, p) beta[which(group %in% 1:2)] <- 1 # First two groups are nonzero y <- x %*% beta + rnorm(n) fit <- cv.grpsel(x, y, group) coef(fit) fit <- cv.grpsel(x, y, group, local.search = T) coef(fit) ## ----------------------------------------------------------------------------- m <- 10 # Number of response variables beta <- matrix(0, p, m) beta[1:5, ] <- 1 x <- matrix(rnorm(n * p), n, p) y <- x %*% beta + matrix(rnorm(n * m), n, m) y <- matrix(y, ncol = 1) x <- diag(m) %x% x group <- rep(1:p, m) cvfit <- cv.grpsel(x, y, group) matrix(coef(cvfit)[- 1, , drop = F], ncol = m) ## ----------------------------------------------------------------------------- x <- matrix(runif(n * p), n, p) y <- sinpi(2 * x[, 1]) + cospi(2 * x[, 2]) + rnorm(n, sd = 0.1) df <- 5 splines <- lapply(1:p, \(j) splines::ns(x[, j], df = df)) x.s <- do.call(cbind, splines) group <- rep(1:p, each = df) fit <- cv.grpsel(x.s, y, group) ## ----------------------------------------------------------------------------- unique(group[coef(fit)[- 1] != 0]) ## ----------------------------------------------------------------------------- library(ggplot2) beta0 <- coef(fit)[1] beta <- coef(fit)[- 1] int.x1 <- (beta0 + colMeans(x.s[, - (1:df)]) %*% beta[- (1:df)])[, ] int.x2 <- (beta0 + colMeans(x.s[, - (1:df + df)]) %*% beta[- (1:df + df)])[, ] ggplot(x = seq(- 1, 1, length.out = 101)) + stat_function(fun = \(x) sinpi(2 * x), aes(linetype = 'True function')) + stat_function(fun = \(x) int.x1 + predict(splines[[1]], x) %*% beta[1:df], aes(linetype = 'Fitted function')) + xlab('x') + ylab('f(x)') ggplot(x = seq(- 1, 1, length.out = 101)) + stat_function(fun = \(x) cospi(2 * x), aes(linetype = 'True function')) + stat_function(fun = \(x) int.x2 + predict(splines[[2]], x) %*% beta[1:df + df], aes(linetype = 'Fitted function')) + xlab('x') + ylab('f(x)') ## ----------------------------------------------------------------------------- x <- matrix(rnorm(n * p), n, p) y <- x[, 1] + x[, 2] + x[, 3] + x[, 1] * x[, 2] + rnorm(n, sd = 0.1) x.int <- model.matrix(~ - 1 + . ^ 2, data = as.data.frame(x)) group <- c(1:p, mapply(c, combn(1:p, 2, simplify = F), 1:choose(p, 2) + p, SIMPLIFY = F)) fit <- cv.grpsel(x.int, y, group) colnames(x.int)[coef(fit)[- 1] != 0]