## ----setup, message = FALSE, warning = FALSE---------------------------------- library(rprev) library(survival) data(prevsim) ## ----------------------------------------------------------------------------- summary(prevsim) ## ----basicsurvival, fig.height=4, fig.width=7, echo=F------------------------- survf <- survfit(Surv(time, status) ~ sex, data=prevsim) survf_df <- data.frame(t=survf$time, s=survf$surv, sex=rep(c('M', 'F'), survf$strata), stringsAsFactors = TRUE) ggplot2::ggplot(survf_df, ggplot2::aes(x=t, y=s, colour=sex)) + ggplot2::geom_line() + ggplot2::theme_bw() + ggplot2::labs(x='Time (days)', y='Survival probability') + ggplot2::ylim(0, 1) ## ----------------------------------------------------------------------------- prevalence_total <- prevalence(index='2013-01-30', num_years_to_estimate=c(3, 5, 10, 20), data=prevsim, inc_formula = entrydate ~ sex, surv_formula = Surv(time, status) ~ age + sex, dist='weibull', population_size = 1e6, death_column = 'eventdate') ## ----------------------------------------------------------------------------- prevalence_total ## ----------------------------------------------------------------------------- summary(prevalence_total) ## ----------------------------------------------------------------------------- prevalence_total$estimates ## ----------------------------------------------------------------------------- head(prevalence_total$simulated) ## ----------------------------------------------------------------------------- llog <- flexsurv::flexsurvreg(Surv(time, status) ~ age + sex, data=prevsim, dist='llogis') llog ## ----cache=T------------------------------------------------------------------ prev_llog <- prevalence(index='2013-01-30', num_years_to_estimate=c(3, 5, 10, 20), data=prevsim, inc_formula = entrydate ~ sex, surv_model=llog, population_size = 1e6, death_column = 'eventdate', N_boot = 100) ## ----cache=T------------------------------------------------------------------ prev_llog ## ----------------------------------------------------------------------------- fix_cure_mod <- fixed_cure(Surv(time, status) ~ age + sex, data=prevsim, cure_time=5*365.25, dist='weibull') ## ----------------------------------------------------------------------------- prevalence(index='2013-01-30', num_years_to_estimate=20, data=prevsim, inc_formula = entrydate ~ sex, surv_model=fix_cure_mod, # Pass in the cure model that was built above population_size = 1e6, death_column = 'eventdate', N_boot = 30) ## ----------------------------------------------------------------------------- prevalence_total ## ----------------------------------------------------------------------------- mixture_cure <- flexsurvcure::flexsurvcure(Surv(time, status) ~ age + sex, data=prevsim, dist='weibull', link='logistic', mixture=TRUE) ## ----------------------------------------------------------------------------- prevalence(index='2013-01-30', num_years_to_estimate=20, data=prevsim, inc_formula = entrydate ~ sex, surv_model=mixture_cure, population_size = 1e6, death_column = 'eventdate', N_boot = 30) ## ----eval=F------------------------------------------------------------------- # build_my_survival_object <- function(formula, input_data) { # # Build a survival model using the specified formula and input data # model <- ... # object <- list(model=model, # call=match.call()) # the function call must be included as an item 'call' # class(object) <- "myobj" # object # } ## ----eval=F------------------------------------------------------------------- # data <- data.frame(...) # myobj <- build_my_survival_object(Surv(time, status) ~ sex, data) # prevalence(... # surv_model=myobj, # This will work # ...) ## ----eval=F------------------------------------------------------------------- # some_data <- data.frame(...) # myobj <- build_my_survival_object(Surv(time, status) ~ sex, some_data) # prevalence(... # surv_model=myobj, # This WON'T work, since the data parameter was called 'some_data' instead # ...) ## ----------------------------------------------------------------------------- build_poisson <- function(input_data) { rate <- nrow(input_data) / as.numeric(difftime(max(input_data$entrydate), min(input_data$eventdate))) # Build a survival model using the specified formula and input data object <- list(rate=rate, call=match.call()) # the function call must be included as an item 'call' class(object) <- "pois" object } ## ----------------------------------------------------------------------------- data <- prevsim pois_mod <- build_poisson(input_data=data) ## ----------------------------------------------------------------------------- pois_mod ## ----eval=F------------------------------------------------------------------- # # object: The incidence model that will have been created on the bootstrapped data # # data: The data available to fit the model on, will be the registry data set provided to prevalence as this acts as the attribute prior distribution. # # timeframe: A single number specifying how long to generate incident cases for. # # covars: A character vector specifying the names of individual covariates that must be included in the returned data.table (or data frame) # # # Returns a data.table (or data frame but data.table is preferred) where each row represents an incident case with: # # - The first column being the time since the origin, i.e. index date - N year prevalence # # - Any subsequent columns holding covariates that must be provided as specified in the 'covars' argument # draw_incident_population <- function(object, data, timeframe, covars, ...) ## ----------------------------------------------------------------------------- draw_incident_population.pois <- function(object, data, timeframe, covars, ...) { # Firstly draw inter-arrival times in the period [0, timeframe]. # The expected number is simply timeframe * rate so we'll take this amount + a margin for error. expected <- 1.5 * (timeframe * object$rate) # Now draw inter-arrival times inter_arrival <- rexp(expected, object$rate) # Determine absolute incident times incident_times <- cumsum(inter_arrival) # Truncate to those within the timeframe incident_times <- incident_times[incident_times < timeframe] num_incident <- length(incident_times) # Sample individual attributes into a matrix. The required attributes are given by 'covars' argument attrs <- do.call('cbind', lapply(covars, function(x) sample(data[[x]], num_incident, replace=T))) # Now add the incident times as the first column attrs <- cbind(incident_times, attrs) # Convert to data frame and add column names attrs <- data.frame(attrs) colnames(attrs) <- c('incident_time', covars) # Return this data frame attrs } ## ----------------------------------------------------------------------------- inc_times <- validate_incidence_model(pois_mod, prevsim) ## ----------------------------------------------------------------------------- head(inc_times) ## ----------------------------------------------------------------------------- prevalence(index='2013-01-30', num_years_to_estimate=c(3, 5, 10, 20), data=prevsim, inc_model = pois_mod, surv_formula = Surv(time, status) ~ age + sex, dist='weibull', population_size = 1e6, incident_column = 'entrydate', death_column = 'eventdate') ## ----------------------------------------------------------------------------- library(flexsurv) build_wei <- function(data) { mod <- flexsurvreg(Surv(time, status) ~ age, data=data, dist='weibull') obj <- list(coefs=coef(mod), call=match.call()) class(obj) <- 'mysurv' obj } ## ----------------------------------------------------------------------------- survobj <- build_wei(data=data) survobj ## ----eval=F------------------------------------------------------------------- # # object: The survival model # # # Returns a character vector detailing the covariates required to fit this model. All of # # these values should be columns in the data that is passed in the main 'prevalence' function. # extract_covars <- function(object) ## ----------------------------------------------------------------------------- extract_covars.mysurv <- function(object) { "age" } ## ----eval=F------------------------------------------------------------------- # # object: The survival object # # newdata: A data frame (or data.table) with the incident population stored with their # # required covariates for the model. # # times: A vector of times to estimate survival probability at for individuals in # # corresponding rows of 'newdata'. This should be the same length as there are # # rows in 'newdata' since each individual has their survival probability estimated once. # # # Returns: # # A vector of survival probabilities of length equal to the length of 'times' and the # # number of rows in 'newdata'. # predict_survival_probability <- function(object, newdata, times) ## ----------------------------------------------------------------------------- predict_survival_probability.mysurv <- function(object, newdata, times) { # Calculate linear predictor, this will form the shape parameter shape <- exp(object$coefs[1] + newdata$age*object$coefs[3]) scale <- exp(object$coefs[2]) 1 - pweibull(times, shape, scale) } ## ----------------------------------------------------------------------------- predict_survival_probability(survobj, newdata=data.frame(age=c(50, 70)), times=c(100, 100)) ## ----------------------------------------------------------------------------- probs <- validate_survival_model(survobj, prevsim) head(probs) ## ----------------------------------------------------------------------------- prevalence(index='2013-01-30', num_years_to_estimate=c(3, 5, 10, 20), data=prevsim, inc_formula = entrydate ~ 1, surv_model = survobj, population_size = 1e6, death_column = 'eventdate', N_boot = 100)