## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(admiraldev) ## ----message=FALSE, warning=FALSE--------------------------------------------- library(admiral) library(dplyr, warn.conflicts = FALSE) library(pharmaversesdtm) library(lubridate) library(stringr) dm <- pharmaversesdtm::dm ds <- pharmaversesdtm::ds ex <- pharmaversesdtm::ex ae <- pharmaversesdtm::ae lb <- pharmaversesdtm::lb dm <- convert_blanks_to_na(dm) ds <- convert_blanks_to_na(ds) ex <- convert_blanks_to_na(ex) ae <- convert_blanks_to_na(ae) lb <- convert_blanks_to_na(lb) ## ----eval=TRUE---------------------------------------------------------------- adsl <- dm %>% select(-DOMAIN) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette( adsl, display_vars = exprs(USUBJID, RFSTDTC, COUNTRY, AGE, SEX, RACE, ETHNIC, ARM, ACTARM) ) ## ----eval=TRUE---------------------------------------------------------------- adsl <- dm %>% mutate(TRT01P = ARM, TRT01A = ACTARM) ## ----eval=TRUE---------------------------------------------------------------- # impute start and end time of exposure to first and last respectively, # do not impute date ex_ext <- ex %>% derive_vars_dtm( dtc = EXSTDTC, new_vars_prefix = "EXST" ) %>% derive_vars_dtm( dtc = EXENDTC, new_vars_prefix = "EXEN", time_imputation = "last" ) adsl <- adsl %>% derive_vars_merged( dataset_add = ex_ext, filter_add = (EXDOSE > 0 | (EXDOSE == 0 & str_detect(EXTRT, "PLACEBO"))) & !is.na(EXSTDTM), new_vars = exprs(TRTSDTM = EXSTDTM, TRTSTMF = EXSTTMF), order = exprs(EXSTDTM, EXSEQ), mode = "first", by_vars = exprs(STUDYID, USUBJID) ) %>% derive_vars_merged( dataset_add = ex_ext, filter_add = (EXDOSE > 0 | (EXDOSE == 0 & str_detect(EXTRT, "PLACEBO"))) & !is.na(EXENDTM), new_vars = exprs(TRTEDTM = EXENDTM, TRTETMF = EXENTMF), order = exprs(EXENDTM, EXSEQ), mode = "last", by_vars = exprs(STUDYID, USUBJID) ) ## ----eval=TRUE---------------------------------------------------------------- adsl <- adsl %>% derive_vars_dtm_to_dt(source_vars = exprs(TRTSDTM, TRTEDTM)) ## ----eval=TRUE---------------------------------------------------------------- adsl <- adsl %>% derive_var_trtdurd() ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette( adsl, display_vars = exprs(USUBJID, RFSTDTC, TRTSDTM, TRTSDT, TRTEDTM, TRTEDT, TRTDURD) ) ## ----eval=TRUE---------------------------------------------------------------- # convert character date to numeric date without imputation ds_ext <- derive_vars_dt( ds, dtc = DSSTDTC, new_vars_prefix = "DSST" ) adsl <- adsl %>% derive_vars_merged( dataset_add = ds_ext, by_vars = exprs(STUDYID, USUBJID), new_vars = exprs(EOSDT = DSSTDT), filter_add = DSCAT == "DISPOSITION EVENT" & DSDECOD != "SCREEN FAILURE" ) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette( ds_ext, display_vars = exprs(USUBJID, DSCAT, DSDECOD, DSTERM, DSSTDT, DSSTDTC), filter = DSDECOD != "SCREEN FAILURE" ) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette(adsl, display_vars = exprs(USUBJID, EOSDT)) ## ----eval=TRUE---------------------------------------------------------------- format_eosstt <- function(x) { case_when( x %in% c("COMPLETED") ~ "COMPLETED", x %in% c("SCREEN FAILURE") ~ NA_character_, TRUE ~ "DISCONTINUED" ) } ## ----eval=TRUE---------------------------------------------------------------- adsl <- adsl %>% derive_vars_merged( dataset_add = ds, by_vars = exprs(STUDYID, USUBJID), filter_add = DSCAT == "DISPOSITION EVENT", new_vars = exprs(EOSSTT = format_eosstt(DSDECOD)), missing_values = exprs(EOSSTT = "ONGOING") ) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette(adsl, display_vars = exprs(USUBJID, EOSDT, EOSSTT)) ## ----------------------------------------------------------------------------- adsl <- adsl %>% derive_vars_merged( dataset_add = ds, by_vars = exprs(USUBJID), new_vars = exprs(DCSREAS = DSDECOD, DCSREASP = DSTERM), filter_add = DSCAT == "DISPOSITION EVENT" & !(DSDECOD %in% c("SCREEN FAILURE", "COMPLETED", NA)) ) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette(adsl, display_vars = exprs(USUBJID, EOSDT, EOSSTT, DCSREAS, DCSREASP)) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- adsl <- adsl %>% select(-DCSREAS, -DCSREASP) ## ----------------------------------------------------------------------------- adsl <- adsl %>% derive_vars_merged( dataset_add = ds, by_vars = exprs(USUBJID), new_vars = exprs(DCSREAS = DSDECOD), filter_add = DSCAT == "DISPOSITION EVENT" & DSDECOD %notin% c("SCREEN FAILURE", "COMPLETED", NA) ) %>% derive_vars_merged( dataset_add = ds, by_vars = exprs(USUBJID), new_vars = exprs(DCSREASP = DSTERM), filter_add = DSCAT == "DISPOSITION EVENT" & DSDECOD %in% "OTHER" ) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette(adsl, display_vars = exprs(USUBJID, EOSDT, EOSSTT, DCSREAS, DCSREASP)) ## ----eval=TRUE---------------------------------------------------------------- adsl <- adsl %>% derive_vars_merged( dataset_add = ds_ext, filter_add = DSDECOD == "RANDOMIZED", by_vars = exprs(STUDYID, USUBJID), new_vars = exprs(RANDDT = DSSTDT) ) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette(adsl, display_vars = exprs(USUBJID, RANDDT)) ## ----eval=TRUE---------------------------------------------------------------- adsl <- adsl %>% derive_vars_dt( new_vars_prefix = "DTH", dtc = DTHDTC ) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette(adsl %>% filter(!is.na(DTHDT) | row_number() %% 50 == 0), display_vars = exprs(USUBJID, TRTEDT, DTHDTC, DTHDT, DTHFL)) ## ----eval=FALSE--------------------------------------------------------------- # adsl <- adsl %>% # derive_vars_dt( # new_vars_prefix = "DTH", # dtc = DTHDTC, # date_imputation = "first" # ) ## ----eval=TRUE---------------------------------------------------------------- adsl <- adsl %>% derive_vars_extreme_event( by_vars = exprs(STUDYID, USUBJID), events = list( event( dataset_name = "ae", condition = AEOUT == "FATAL", set_values_to = exprs(DTHCAUS = AEDECOD), ), event( dataset_name = "ds", condition = DSDECOD == "DEATH" & grepl("DEATH DUE TO", DSTERM), set_values_to = exprs(DTHCAUS = DSTERM), ) ), source_datasets = list(ae = ae, ds = ds), tmp_event_nr_var = event_nr, order = exprs(event_nr), mode = "first", new_vars = exprs(DTHCAUS) ) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette( adsl, display_vars = exprs(USUBJID, EOSDT, DTHDTC, DTHDT, DTHCAUS), filter = DTHFL == "Y" ) ## ----eval=TRUE---------------------------------------------------------------- adsl <- adsl %>% select(-DTHCAUS) %>% # remove it before deriving it again derive_vars_extreme_event( by_vars = exprs(STUDYID, USUBJID), events = list( event( dataset_name = "ae", condition = AEOUT == "FATAL", set_values_to = exprs(DTHCAUS = AEDECOD, DTHDOM = "AE", DTHSEQ = AESEQ), ), event( dataset_name = "ds", condition = DSDECOD == "DEATH" & grepl("DEATH DUE TO", DSTERM), set_values_to = exprs(DTHCAUS = DSTERM, DTHDOM = "DS", DTHSEQ = DSSEQ), ) ), source_datasets = list(ae = ae, ds = ds), tmp_event_nr_var = event_nr, order = exprs(event_nr), mode = "first", new_vars = exprs(DTHCAUS, DTHDOM, DTHSEQ) ) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette( adsl, display_vars = exprs(USUBJID, TRTEDT, DTHDTC, DTHDT, DTHCAUS, DTHDOM, DTHSEQ), filter = DTHFL == "Y" ) ## ----eval=TRUE---------------------------------------------------------------- adsl <- adsl %>% mutate(DTHCGR1 = case_when( is.na(DTHDOM) ~ NA_character_, DTHDOM == "AE" ~ "ADVERSE EVENT", str_detect(DTHCAUS, "(PROGRESSIVE DISEASE|DISEASE RELAPSE)") ~ "PROGRESSIVE DISEASE", TRUE ~ "OTHER" )) ## ----eval=TRUE---------------------------------------------------------------- adsl <- adsl %>% derive_vars_duration( new_var = DTHADY, start_date = TRTSDT, end_date = DTHDT ) ## ----eval=TRUE---------------------------------------------------------------- adsl <- adsl %>% derive_vars_duration( new_var = LDDTHELD, start_date = TRTEDT, end_date = DTHDT, add_one = FALSE ) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette( adsl, display_vars = exprs(USUBJID, TRTEDT, DTHDTC, DTHDT, DTHCAUS, DTHADY, LDDTHELD), filter = DTHFL == "Y" ) ## ----eval=TRUE---------------------------------------------------------------- adsl <- adsl %>% derive_vars_extreme_event( by_vars = exprs(STUDYID, USUBJID), events = list( event( dataset_name = "ae", order = exprs(AESTDTC, AESEQ), condition = !is.na(AESTDTC), set_values_to = exprs( LSTALVDT = convert_dtc_to_dt(AESTDTC, highest_imputation = "M"), seq = AESEQ ), ), event( dataset_name = "ae", order = exprs(AEENDTC, AESEQ), condition = !is.na(AEENDTC), set_values_to = exprs( LSTALVDT = convert_dtc_to_dt(AEENDTC, highest_imputation = "M"), seq = AESEQ ), ), event( dataset_name = "lb", order = exprs(LBDTC, LBSEQ), condition = !is.na(LBDTC), set_values_to = exprs( LSTALVDT = convert_dtc_to_dt(LBDTC, highest_imputation = "M"), seq = LBSEQ ), ), event( dataset_name = "adsl", condition = !is.na(TRTEDT), set_values_to = exprs(LSTALVDT = TRTEDT, seq = 0), ) ), source_datasets = list(ae = ae, lb = lb, adsl = adsl), tmp_event_nr_var = event_nr, order = exprs(LSTALVDT, seq, event_nr), mode = "last", new_vars = exprs(LSTALVDT) ) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette( adsl, display_vars = exprs(USUBJID, TRTEDT, DTHDTC, LSTALVDT), filter = !is.na(TRTSDT) ) ## ----eval=TRUE---------------------------------------------------------------- adsl <- adsl %>% select(-LSTALVDT) %>% # created in the previous call derive_vars_extreme_event( by_vars = exprs(STUDYID, USUBJID), events = list( event( dataset_name = "ae", order = exprs(AESTDTC, AESEQ), condition = !is.na(AESTDTC), set_values_to = exprs( LSTALVDT = convert_dtc_to_dt(AESTDTC, highest_imputation = "M"), LALVSEQ = AESEQ, LALVDOM = "AE", LALVVAR = "AESTDTC" ), ), event( dataset_name = "ae", order = exprs(AEENDTC, AESEQ), condition = !is.na(AEENDTC), set_values_to = exprs( LSTALVDT = convert_dtc_to_dt(AEENDTC, highest_imputation = "M"), LALVSEQ = AESEQ, LALVDOM = "AE", LALVVAR = "AEENDTC" ), ), event( dataset_name = "lb", order = exprs(LBDTC, LBSEQ), condition = !is.na(LBDTC), set_values_to = exprs( LSTALVDT = convert_dtc_to_dt(LBDTC, highest_imputation = "M"), LALVSEQ = LBSEQ, LALVDOM = "LB", LALVVAR = "LBDTC" ), ), event( dataset_name = "adsl", condition = !is.na(TRTEDT), set_values_to = exprs(LSTALVDT = TRTEDT, LALVSEQ = NA_integer_, LALVDOM = "ADSL", LALVVAR = "TRTEDTM"), ) ), source_datasets = list(ae = ae, lb = lb, adsl = adsl), tmp_event_nr_var = event_nr, order = exprs(LSTALVDT, LALVSEQ, event_nr), mode = "last", new_vars = exprs(LSTALVDT, LALVSEQ, LALVDOM, LALVVAR) ) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette( adsl, display_vars = exprs(USUBJID, TRTEDT, DTHDTC, LSTALVDT, LALVDOM, LALVSEQ, LALVVAR), filter = !is.na(TRTSDT) ) ## ----eval=TRUE---------------------------------------------------------------- # create lookup tables agegr1_lookup <- exprs( ~condition, ~AGEGR1, AGE < 18, "<18", between(AGE, 18, 64), "18-64", AGE > 64, ">64", is.na(AGE), "Missing" ) region1_lookup <- exprs( ~condition, ~REGION1, COUNTRY %in% c("CAN", "USA"), "North America", !is.na(COUNTRY), "Rest of the World", is.na(COUNTRY), "Missing" ) ## ----eval=TRUE---------------------------------------------------------------- adsl <- adsl %>% derive_vars_cat( definition = agegr1_lookup ) %>% derive_vars_cat( definition = region1_lookup ) ## ----------------------------------------------------------------------------- # format_agegr1 <- function(var_input) { # case_when( # var_input < 18 ~ "<18", # between(var_input, 18, 64) ~ "18-64", # var_input > 64 ~ ">64", # TRUE ~ "Missing" # ) # } # format_region1 <- function(var_input) { # case_when( # var_input %in% c("CAN", "USA") ~ "North America", # !is.na(var_input) ~ "Rest of the World", # TRUE ~ "Missing" # ) # } # # adsl %>% # mutate( # AGEGR1 = format_agegr1(AGE), # REGION1 = format_region1(COUNTRY) # ) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette( adsl, display_vars = exprs(USUBJID, AGE, SEX, COUNTRY, AGEGR1, REGION1) ) ## ----eval=TRUE---------------------------------------------------------------- adsl <- adsl %>% derive_var_merged_exist_flag( dataset_add = ex, by_vars = exprs(STUDYID, USUBJID), new_var = SAFFL, condition = (EXDOSE > 0 | (EXDOSE == 0 & str_detect(EXTRT, "PLACEBO"))) ) ## ----eval=TRUE, echo=FALSE---------------------------------------------------- dataset_vignette( adsl, display_vars = exprs(USUBJID, TRTSDT, ARM, ACTARM, SAFFL) )