params <-
  list(EVAL = TRUE)

## ----echo = FALSE----------------------------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  message = FALSE,
  warning = FALSE,
  eval = if (isTRUE(exists("params"))) params$EVAL else FALSE
)


## ----setup, include=FALSE--------------------------------------------------------------------
knitr::opts_chunk$set(
  echo = TRUE,
  dpi = 100,
  fig.asp = 0.8,
  fig.width = 6,
  out.width = "60%",
  fig.align = "center"
)
library(mvgam)
library(ggplot2)
theme_set(theme_bw(base_size = 12, base_family = "serif"))


## --------------------------------------------------------------------------------------------
set.seed(122)
simdat <- sim_mvgam(
  trend_model = AR(),
  prop_trend = 0.6,
  mu = c(0, 1, 2),
  family = poisson()
)
trend_map <- data.frame(
  series = unique(simdat$data_train$series),
  trend = c(1, 1, 2)
)
trend_map


## --------------------------------------------------------------------------------------------
all.equal(levels(trend_map$series), levels(simdat$data_train$series))


## --------------------------------------------------------------------------------------------
fake_mod <- mvgam(
  y ~
    # observation model formula, which has a
    # different intercept per series
    series - 1,

  # process model formula, which has a shared seasonal smooth
  # (each latent process model shares the SAME smooth)
  trend_formula = ~ s(season, bs = "cc", k = 6),

  # AR1 dynamics (each latent process model has DIFFERENT)
  # dynamics; processes are estimated using the noncentred
  # parameterisation for improved efficiency
  trend_model = AR(),
  noncentred = TRUE,

  # supplied trend_map
  trend_map = trend_map,

  # data and observation family
  family = poisson(),
  data = simdat$data_train,
  run_model = FALSE
)


## --------------------------------------------------------------------------------------------
stancode(fake_mod)


## --------------------------------------------------------------------------------------------
fake_mod$model_data$Z


## ----full_mod, include = FALSE, results='hide'-----------------------------------------------
full_mod <- mvgam(
  y ~ series - 1,
  trend_formula = ~ s(season, bs = "cc", k = 6),
  trend_model = AR(),
  noncentred = TRUE,
  trend_map = trend_map,
  family = poisson(),
  data = simdat$data_train,
  silent = 2
)


## ----eval=FALSE------------------------------------------------------------------------------
# full_mod <- mvgam(
#   y ~ series - 1,
#   trend_formula = ~ s(season, bs = "cc", k = 6),
#   trend_model = AR(),
#   noncentred = TRUE,
#   trend_map = trend_map,
#   family = poisson(),
#   data = simdat$data_train,
#   silent = 2
# )

## --------------------------------------------------------------------------------------------
summary(full_mod)


## --------------------------------------------------------------------------------------------
plot(full_mod, type = "trend", series = 1)
plot(full_mod, type = "trend", series = 2)
plot(full_mod, type = "trend", series = 3)


## --------------------------------------------------------------------------------------------
set.seed(123)
# simulate a nonlinear relationship using the mgcv function gamSim
signal_dat <- mgcv::gamSim(n = 100, eg = 1, scale = 1)

# productivity is one of the variables in the simulated data
productivity <- signal_dat$x2

# simulate the true signal, which already has a nonlinear relationship
# with productivity; we will add in a fairly strong AR1 process to
# contribute to the signal
true_signal <- as.vector(
  scale(signal_dat$y) +
    arima.sim(100, model = list(ar = 0.8, sd = 0.1))
)


## --------------------------------------------------------------------------------------------
plot(
  true_signal,
  type = "l",
  bty = "l",
  lwd = 2,
  ylab = "True signal",
  xlab = "Time"
)


## --------------------------------------------------------------------------------------------
# Function to simulate a monotonic response to a covariate
sim_monotonic <- function(x, a = 2.2, b = 2) {
  out <- exp(a * x) / (6 + exp(b * x)) * -1
  return(2.5 * as.vector(scale(out)))
}

# Simulated temperature covariate
temperature <- runif(100, -2, 2)

# Simulate the three series
sim_series <- function(n_series = 3, true_signal) {
  temp_effects <- mgcv::gamSim(n = 100, eg = 7, scale = 0.05)
  alphas <- rnorm(n_series, sd = 2)

  do.call(
    rbind,
    lapply(seq_len(n_series), function(series) {
      data.frame(
        observed = rnorm(
          length(true_signal),
          mean = alphas[series] +
            sim_monotonic(temperature, runif(1, 2.2, 3), runif(1, 2.2, 3)) +
            true_signal,
          sd = runif(1, 1, 2)
        ),
        series = paste0("sensor_", series),
        time = 1:length(true_signal),
        temperature = temperature,
        productivity = productivity,
        true_signal = true_signal
      )
    })
  )
}
model_dat <- sim_series(true_signal = true_signal) %>%
  dplyr::mutate(series = factor(series))


## --------------------------------------------------------------------------------------------
plot_mvgam_series(
  data = model_dat,
  y = "observed",
  series = "all"
)


## --------------------------------------------------------------------------------------------
plot(
  observed ~ temperature,
  data = model_dat %>%
    dplyr::filter(series == "sensor_1"),
  pch = 16,
  bty = "l",
  ylab = "Sensor 1",
  xlab = "Temperature"
)
plot(
  observed ~ temperature,
  data = model_dat %>%
    dplyr::filter(series == "sensor_2"),
  pch = 16,
  bty = "l",
  ylab = "Sensor 2",
  xlab = "Temperature"
)
plot(
  observed ~ temperature,
  data = model_dat %>%
    dplyr::filter(series == "sensor_3"),
  pch = 16,
  bty = "l",
  ylab = "Sensor 3",
  xlab = "Temperature"
)


## ----sensor_mod, include = FALSE, results='hide'---------------------------------------------
mod <- mvgam(
  # formula for observations, allowing for different
  # intercepts and smooth effects of temperature
  formula = observed ~
    series +
      s(temperature, k = 10) +
      s(series, temperature, bs = "sz", k = 8),
  # formula for the latent signal, which can depend
  # nonlinearly on productivity
  trend_formula = ~ s(productivity, k = 8) - 1,
  # in addition to productivity effects, the signal is
  # assumed to exhibit temporal autocorrelation
  trend_model = AR(),
  noncentred = TRUE,
  # trend_map forces all sensors to track the same
  # latent signal
  trend_map = data.frame(
    series = unique(model_dat$series),
    trend = c(1, 1, 1)
  ),

  # informative priors on process error
  # and observation error will help with convergence
  priors = c(
    prior(normal(2, 0.5), class = sigma),
    prior(normal(1, 0.5), class = sigma_obs)
  ),

  # Gaussian observations
  family = gaussian(),
  burnin = 600,
  control = list(adapt_delta = 0.95),
  data = model_dat,
  silent = 2
)


## ----eval=FALSE------------------------------------------------------------------------------
# mod <- mvgam(
#   formula =
#   # formula for observations, allowing for different
#   # intercepts and hierarchical smooth effects of temperature
#     observed ~ series +
#       s(temperature, k = 10) +
#       s(series, temperature, bs = "sz", k = 8),
#   trend_formula =
#   # formula for the latent signal, which can depend
#   # nonlinearly on productivity
#     ~ s(productivity, k = 8) - 1,
#   trend_model =
#   # in addition to productivity effects, the signal is
#   # assumed to exhibit temporal autocorrelation
#     AR(),
#   noncentred = TRUE,
#   trend_map =
#   # trend_map forces all sensors to track the same
#   # latent signal
#     data.frame(
#       series = unique(model_dat$series),
#       trend = c(1, 1, 1)
#     ),
#
#   # informative priors on process error
#   # and observation error will help with convergence
#   priors = c(
#     prior(normal(2, 0.5), class = sigma),
#     prior(normal(1, 0.5), class = sigma_obs)
#   ),
#
#   # Gaussian observations
#   family = gaussian(),
#   data = model_dat,
#   silent = 2
# )

## --------------------------------------------------------------------------------------------
summary(mod, include_betas = FALSE)


## --------------------------------------------------------------------------------------------
conditional_effects(mod, type = "link")


## --------------------------------------------------------------------------------------------
plot_predictions(
  mod,
  condition = c("temperature", "series", "series"),
  points = 0.5
) +
  theme(legend.position = "none")


## --------------------------------------------------------------------------------------------
plot(mod, type = "trend") +
  ggplot2::geom_point(
    data = data.frame(time = 1:100, y = true_signal),
    mapping = ggplot2::aes(x = time, y = y)
  )