The goal of kerasnip
is to provide a seamless bridge between the keras
and tidymodels
ecosystems. It allows for the dynamic creation of parsnip
model specifications for Keras models, making them fully compatible with tidymodels
workflows.
Installation
You can install the development version of kerasnip
from GitHub with:
# install.packages("pak")
pak::pak("davidrsch/kerasnip")
Example
Example 1: Building a Sequential MLP
This example shows the core workflow for building a simple, linear stack of layers using create_keras_sequential_spec()
.
library(kerasnip)
library(tidymodels)
library(keras3)
# 1. Define Keras layer blocks
# The first block initializes the model.
input_block <- function(model, input_shape) {
keras_model_sequential(input_shape = input_shape)
}
# Subsequent blocks add layers.
dense_block <- function(model, units = 32) {
model |> layer_dense(units = units, activation = "relu")
}
# The final block creates the output layer.
output_block <- function(model) {
model |>
layer_dense(units = 1)
}
# 2. Create a spec from the layer blocks
# This creates a new model function, `basic_mlp()`, in your environment.
create_keras_sequential_spec(
model_name = "basic_mlp",
layer_blocks = list(
input = input_block,
dense = dense_block,
output = output_block
),
mode = "regression"
)
# 3. Use the generated spec to define a model.
# We can set the number of dense layers (`num_dense`) and their parameters (`dense_units`).
spec <- basic_mlp(
num_dense = 2,
dense_units = 64,
fit_epochs = 10,
learn_rate = 0.01
) |>
set_engine("keras")
# 4. Fit the model within a tidymodels workflow
rec <- recipe(mpg ~ ., data = mtcars) |>
step_normalize(all_numeric_predictors())
wf <- workflow(rec, spec)
set.seed(123)
fit_obj <- fit(wf, data = mtcars)
# 5. Make predictions
predict(fit_obj, new_data = mtcars[1:5, ])
#> # A tibble: 5 × 1
#> .pred
#> <dbl>
#> 1 21.3
#> 2 21.3
#> 3 22.8
#> 4 21.4
#> 5 18.7
Example 2: Building a Functional “Fork-Join” Model
For complex, non-linear architectures, use create_keras_functional_spec()
. This example builds a model where the input is forked into two paths, which are then concatenated.
library(kerasnip)
library(tidymodels)
library(keras3)
# 1. Define blocks. For the functional API, blocks are nodes in a graph.
input_block <- function(input_shape) layer_input(shape = input_shape)
path_block <- function(tensor, units = 16) tensor |> layer_dense(units = units)
concat_block <- function(input_a, input_b) layer_concatenate(list(input_a, input_b))
output_block <- function(tensor) layer_dense(tensor, units = 1)
# 2. Create the spec. The graph is defined by block names and their arguments.
create_keras_functional_spec(
model_name = "forked_mlp",
layer_blocks = list(
main_input = input_block,
path_a = inp_spec(path_block, "main_input"),
path_b = inp_spec(path_block, "main_input"),
concatenated = inp_spec(concat_block, c(path_a = "input_a", path_b = "input_b")),
# The output block must be named 'output'.
output = inp_spec(output_block, "concatenated")
),
mode = "regression"
)
# 3. Use the new spec. Arguments are prefixed with their block name.
spec <- forked_mlp(path_a_units = 16, path_b_units = 8, fit_epochs = 10) |>
set_engine("keras")
# Fit and predict as usual
set.seed(123)
fit(spec, mpg ~ ., data = mtcars) |>
predict(new_data = mtcars[1:5, ])
#> # A tibble: 5 × 1
#> .pred
#> <dbl>
#> 1 19.4
#> 2 19.5
#> 3 21.9
#> 4 18.6
#> 5 17.9
Example 3: Tuning a Sequential MLP Architecture
This example demonstrates how to tune the number of dense layers and the rate of a final dropout layer, showcasing how to tune both architecture and block hyperparameters simultaneously.
library(kerasnip)
library(tidymodels)
library(keras3)
# 1. Define Keras layer blocks for a tunable MLP
input_block <- function(model, input_shape) {
keras_model_sequential(input_shape = input_shape)
}
dense_block <- function(model, units = 32) {
model |> layer_dense(units = units, activation = "relu")
}
dropout_block <- function(model, rate = 0.2) {
model |> layer_dropout(rate = rate)
}
output_block <- function(model) {
model |> layer_dense(units = 1)
}
# 2. Create a spec from the layer blocks
create_keras_sequential_spec(
model_name = "tunable_mlp",
layer_blocks = list(
input = input_block,
dense = dense_block,
dropout = dropout_block,
output = output_block
),
mode = "regression"
)
# 3. Define a tunable model specification
tune_spec <- tunable_mlp(
num_dense = tune(),
dense_units = tune(),
num_dropout = 1,
dropout_rate = tune(),
fit_epochs = 10
) |>
set_engine("keras")
# 4. Set up and run a tuning workflow
rec <- recipe(mpg ~ ., data = mtcars) |>
step_normalize(all_numeric_predictors())
wf_tune <- workflow(rec, tune_spec)
# Define the tuning grid.
params <- extract_parameter_set_dials(wf_tune) |>
update(
num_dense = dials::num_terms(c(1, 3)),
dense_units = dials::hidden_units(c(8, 64)),
dropout_rate = dials::dropout(c(0.1, 0.5))
)
grid <- grid_regular(params, levels = 2)
# 5. Run the tuning
set.seed(456)
folds <- vfold_cv(mtcars, v = 3)
tune_res <- tune_grid(
wf_tune,
resamples = folds,
grid = grid,
control = control_grid(verbose = FALSE)
)
# 6. Show the best architecture
show_best(tune_res, metric = "rmse")
#> # A tibble: 5 × 7
#> num_dense dense_units dropout_rate .metric .estimator .mean .config
#> <int> <int> <dbl> <chr> <chr> <dbl> <chr>
#> 1 1 64 0.1 rmse standard 2.92 Preprocessor1_Model02
#> 2 1 64 0.5 rmse standard 3.02 Preprocessor1_Model08
#> 3 3 64 0.1 rmse standard 3.15 Preprocessor1_Model04
#> 4 1 8 0.1 rmse standard 3.20 Preprocessor1_Model01
#> 5 3 8 0.1 rmse standard 3.22 Preprocessor1_Model03