The goal of kerasnip
is to provide a seamless bridge
between the keras
and tidymodels
ecosystems.
It allows for the dynamic creation of parsnip
model
specifications for Keras models, making them fully compatible with
tidymodels
workflows.
You can install the development version of kerasnip
from
GitHub with:
# install.packages("pak")
::pak("davidrsch/kerasnip") pak
This example shows the core workflow for building a simple, linear
stack of layers using create_keras_sequential_spec()
.
library(kerasnip)
library(tidymodels)
library(keras3)
# 1. Define Keras layer blocks
# The first block initializes the model.
<- function(model, input_shape) {
input_block keras_model_sequential(input_shape = input_shape)
}# Subsequent blocks add layers.
<- function(model, units = 32) {
dense_block |> layer_dense(units = units, activation = "relu")
model
}# The final block creates the output layer.
<- function(model) {
output_block |>
model layer_dense(units = 1)
}
# 2. Create a spec from the layer blocks
# This creates a new model function, `basic_mlp()`, in your environment.
create_keras_sequential_spec(
model_name = "basic_mlp",
layer_blocks = list(
input = input_block,
dense = dense_block,
output = output_block
),mode = "regression"
)
# 3. Use the generated spec to define a model.
# We can set the number of dense layers (`num_dense`) and their parameters (`dense_units`).
<- basic_mlp(
spec num_dense = 2,
dense_units = 64,
fit_epochs = 10,
learn_rate = 0.01
|>
) set_engine("keras")
# 4. Fit the model within a tidymodels workflow
<- recipe(mpg ~ ., data = mtcars) |>
rec step_normalize(all_numeric_predictors())
<- workflow(rec, spec)
wf
set.seed(123)
<- fit(wf, data = mtcars)
fit_obj
# 5. Make predictions
predict(fit_obj, new_data = mtcars[1:5, ])
#> # A tibble: 5 × 1
#> .pred
#> <dbl>
#> 1 21.3
#> 2 21.3
#> 3 22.8
#> 4 21.4
#> 5 18.7
For complex, non-linear architectures, use
create_keras_functional_spec()
. This example builds a model
where the input is forked into two paths, which are then
concatenated.
library(kerasnip)
library(tidymodels)
library(keras3)
# 1. Define blocks. For the functional API, blocks are nodes in a graph.
<- function(input_shape) layer_input(shape = input_shape)
input_block <- function(tensor, units = 16) tensor |> layer_dense(units = units)
path_block <- function(input_a, input_b) layer_concatenate(list(input_a, input_b))
concat_block <- function(tensor) layer_dense(tensor, units = 1)
output_block
# 2. Create the spec. The graph is defined by block names and their arguments.
create_keras_functional_spec(
model_name = "forked_mlp",
layer_blocks = list(
main_input = input_block,
path_a = inp_spec(path_block, "main_input"),
path_b = inp_spec(path_block, "main_input"),
concatenated = inp_spec(concat_block, c(path_a = "input_a", path_b = "input_b")),
# The output block must be named 'output'.
output = inp_spec(output_block, "concatenated")
),mode = "regression"
)
# 3. Use the new spec. Arguments are prefixed with their block name.
<- forked_mlp(path_a_units = 16, path_b_units = 8, fit_epochs = 10) |>
spec set_engine("keras")
# Fit and predict as usual
set.seed(123)
fit(spec, mpg ~ ., data = mtcars) |>
predict(new_data = mtcars[1:5, ])
#> # A tibble: 5 × 1
#> .pred
#> <dbl>
#> 1 19.4
#> 2 19.5
#> 3 21.9
#> 4 18.6
#> 5 17.9
This example demonstrates how to tune the number of dense layers and the rate of a final dropout layer, showcasing how to tune both architecture and block hyperparameters simultaneously.
library(kerasnip)
library(tidymodels)
library(keras3)
# 1. Define Keras layer blocks for a tunable MLP
<- function(model, input_shape) {
input_block keras_model_sequential(input_shape = input_shape)
}<- function(model, units = 32) {
dense_block |> layer_dense(units = units, activation = "relu")
model
}<- function(model, rate = 0.2) {
dropout_block |> layer_dropout(rate = rate)
model
}<- function(model) {
output_block |> layer_dense(units = 1)
model
}
# 2. Create a spec from the layer blocks
create_keras_sequential_spec(
model_name = "tunable_mlp",
layer_blocks = list(
input = input_block,
dense = dense_block,
dropout = dropout_block,
output = output_block
),mode = "regression"
)
# 3. Define a tunable model specification
<- tunable_mlp(
tune_spec num_dense = tune(),
dense_units = tune(),
num_dropout = 1,
dropout_rate = tune(),
fit_epochs = 10
|>
) set_engine("keras")
# 4. Set up and run a tuning workflow
<- recipe(mpg ~ ., data = mtcars) |>
rec step_normalize(all_numeric_predictors())
<- workflow(rec, tune_spec)
wf_tune
# Define the tuning grid.
<- extract_parameter_set_dials(wf_tune) |>
params update(
num_dense = dials::num_terms(c(1, 3)),
dense_units = dials::hidden_units(c(8, 64)),
dropout_rate = dials::dropout(c(0.1, 0.5))
)<- grid_regular(params, levels = 2)
grid
# 5. Run the tuning
set.seed(456)
<- vfold_cv(mtcars, v = 3)
folds
<- tune_grid(
tune_res
wf_tune,resamples = folds,
grid = grid,
control = control_grid(verbose = FALSE)
)
# 6. Show the best architecture
show_best(tune_res, metric = "rmse")
#> # A tibble: 5 × 7
#> num_dense dense_units dropout_rate .metric .estimator .mean .config
#> <int> <int> <dbl> <chr> <chr> <dbl> <chr>
#> 1 1 64 0.1 rmse standard 2.92 Preprocessor1_Model02
#> 2 1 64 0.5 rmse standard 3.02 Preprocessor1_Model08
#> 3 3 64 0.1 rmse standard 3.15 Preprocessor1_Model04
#> 4 1 8 0.1 rmse standard 3.20 Preprocessor1_Model01
#> 5 3 8 0.1 rmse standard 3.22 Preprocessor1_Model03