## ----setup, include=FALSE----------------------------------------------------- library(knitr) opts_chunk$set(comment = NA, prompt = FALSE, cache = FALSE, results = 'asis') library(kableExtra) library(summarytools) library(magrittr) st_options(plain.ascii = FALSE, style = "rmarkdown", footnote = NA, subtitle.emphasis = FALSE, lang = "en") ## ----echo=FALSE--------------------------------------------------------------- st_css(main = TRUE, global = TRUE, bootstrap = FALSE) ## ----results='asis', echo=FALSE----------------------------------------------- txt <- data.frame( Function = c('freq()', 'ctable()', 'descr()', 'dfSummary()'), Description = c(paste("**Frequency Tables** featuring counts, proportions, cumulative", "statistics as well as missing data reporting"), paste("**Cross-Tabulations** (joint frequencies) between pairs", "of discrete/categorical variables, featuring marginal sums", "as well as row, column or total proportions"), paste("**Univariate ('Descriptive') Statistics** for numerical data,", "with common measures of central tendency and dispersion"), paste("**Data Frame Summaries** featuring type-specific", "information for all variables: univariate", "statistics and/or frequency distributions, bar charts or", "histograms, as well as missing data counts and proportions.", "Very useful to quickly, detect anomalies and identify trends", "at a glance")) ) kable(txt, format = "html", escape = FALSE, align = c('l', 'l')) %>% kable_paper(full_width = FALSE, position = "left") %>% column_spec(1, extra_css = "vertical-align:top") %>% column_spec(2, extra_css = "vertical-align:top") ## ----------------------------------------------------------------------------- freq(iris$Species, plain.ascii = FALSE, style = "rmarkdown") ## ----------------------------------------------------------------------------- freq(iris$Species, report.nas = FALSE, headings = FALSE) ## ----------------------------------------------------------------------------- freq(iris$Species, report.nas = FALSE, totals = FALSE, cumul = FALSE, headings = FALSE) ## ----eval=FALSE--------------------------------------------------------------- # freq(tobacco) ## ----------------------------------------------------------------------------- freq(tobacco$disease, order = "freq", rows = 1:5, headings = FALSE) ## ----eval=FALSE--------------------------------------------------------------- # view(freq(tobacco), collapse = TRUE) ## ----------------------------------------------------------------------------- ctable(x = tobacco$smoker, y = tobacco$diseased, prop = "r") # Show row proportions ## ----------------------------------------------------------------------------- with(tobacco, print(ctable(x = smoker, y = diseased, prop = 'n', totals = FALSE, headings = FALSE), method = "render") ) ## ----------------------------------------------------------------------------- library(magrittr) tobacco %$% # Acts like with(tobacco, ...) ctable(x = smoker, y = diseased, chisq = TRUE, OR = TRUE, RR = TRUE, headings = FALSE) %>% print(method = "render") ## ----------------------------------------------------------------------------- descr(iris) ## ----------------------------------------------------------------------------- st_options(descr.silent = TRUE) ## ----------------------------------------------------------------------------- descr(iris, stats = c("mean", "sd"), transpose = TRUE, headings = FALSE) ## ----eval=FALSE--------------------------------------------------------------- # st_options(descr.stats = "common") ## ----eval=FALSE--------------------------------------------------------------- # view(dfSummary(iris)) ## ----eval=FALSE--------------------------------------------------------------- # dfSummary(tobacco, # plain.ascii = FALSE, # style = "grid", # graph.magnif = 0.82, # varnumbers = FALSE, # valid.col = FALSE, # tmp.img.dir = "/tmp") ## ----------------------------------------------------------------------------- st_options( dfSummary.custom.1 = expression( paste( "Q1 - Q3 :", round( quantile(column_data, probs = .25, type = 2, names = FALSE, na.rm = TRUE), digits = 1 ), " - ", round( quantile(column_data, probs = .75, type = 2, names = FALSE, na.rm = TRUE), digits = 1 ) ) ) ) print( dfSummary(iris, varnumbers = FALSE, na.col = FALSE, style = "multiline", plain.ascii = FALSE, headings = FALSE, graph.magnif = .82), method = "render" ) ## ----results='markup'--------------------------------------------------------- library(formatR) st_options(dfSummary.custom.1 = "default") formatR::tidy_source( text = deparse(st_options("dfSummary.custom.1")), indent = 2, args.newline = TRUE ) ## ----eval=FALSE--------------------------------------------------------------- # dfs <- dfSummary(iris) # dfs$Variable <- NULL # This deletes the Variable column ## ----------------------------------------------------------------------------- (iris_stats_by_species <- stby(data = iris, INDICES = iris$Species, FUN = descr, stats = "common", transpose = TRUE)) ## ----------------------------------------------------------------------------- with(tobacco, stby(data = BMI, INDICES = age.gr, FUN = descr, stats = c("mean", "sd", "min", "med", "max"), useNA = TRUE) ) ## ----eval=FALSE--------------------------------------------------------------- # stby(data = list(x = tobacco$smoker, y = tobacco$diseased), # INDICES = tobacco$gender, # FUN = ctable) # # # or equivalently # with(tobacco, # stby(data = list(x = smoker, y = diseased), # INDICES = gender, # FUN = ctable)) ## ----eval=FALSE--------------------------------------------------------------- # library(dplyr) # tobacco$gender %<>% forcats::fct_na_value_to_level() # tobacco %>% # group_by(gender) %>% # descr(stats = "fivenum") ## ----echo=FALSE--------------------------------------------------------------- suppressPackageStartupMessages(library(dplyr)) library(magrittr) tobacco$gender %<>% forcats::fct_na_value_to_level("(Missing)") tobacco %>% group_by(gender) %>% descr(stats = "fivenum") ## ----results='markup'--------------------------------------------------------- library(magrittr) iris %>% descr(stats = "common") %>% tb() %>% knitr::kable() iris$Species %>% freq(cumul = FALSE, report.nas = FALSE) %>% tb() %>% knitr::kable() ## ----results='markup'--------------------------------------------------------- grouped_descr <- stby(data = exams, INDICES = exams$gender, FUN = descr, stats = "common") grouped_descr %>% tb() ## ----results='markup'--------------------------------------------------------- grouped_descr %>% tb(order = 2) ## ----results='markup'--------------------------------------------------------- grouped_descr %>% tb(order = 3) ## ----results='markup'--------------------------------------------------------- tobacco %>% dplyr::group_by(gender) %>% freq(smoker) %>% tb() ## ----results='asis'----------------------------------------------------------- library(kableExtra) library(magrittr) stby(data = iris, INDICES = iris$Species, FUN = descr, stats = "fivenum") %>% tb(order = 3) %>% kable(format = "html", digits = 2) %>% collapse_rows(columns = 1, valign = "top") ## ----eval=FALSE--------------------------------------------------------------- # view(iris_stats_by_species, file = "~/iris_stats_by_species.html") # view(iris_stats_by_species, file = "~/iris_stats_by_species.md") ## ----eval=FALSE--------------------------------------------------------------- # st_options() # Display all global options values # st_options('round.digits') # Display the value of a specific option # st_options(style = 'rmarkdown', # Set the value of one or several options # footnote = NA) # Turn off the footnote for all html output ## ----------------------------------------------------------------------------- (age_stats <- freq(tobacco$age.gr)) print(age_stats, report.nas = FALSE, totals = FALSE, display.type = FALSE, Variable.label = "Age Group") ## ----eval=FALSE--------------------------------------------------------------- # print(dfSummary(tobacco), # custom.css = 'path/to/custom.css', # table.classes = 'tiny-text', # file = "tiny-tobacco-dfSummary.html") ## ----eval=FALSE--------------------------------------------------------------- # print(dfSummary(somedata, # varnumbers = FALSE, # valid.col = FALSE, # graph.magnif = 0.8), # method = 'render', # headings = FALSE, # bootstrap.css = FALSE) ## ----------------------------------------------------------------------------- st_options(lang = "fr") ## ----------------------------------------------------------------------------- freq(iris$Species) ## ----echo = FALSE------------------------------------------------------------- st_options(lang = "en") ## ----eval = FALSE------------------------------------------------------------- # Sys.setlocale("LC_CTYPE", "russian") # st_options(lang = 'ru') ## ----eval=FALSE--------------------------------------------------------------- # Sys.setlocale("LC_CTYPE", "") # st_options(lang = "en") ## ----------------------------------------------------------------------------- section_title <- "**Species of Iris**" define_keywords(title.freq = section_title, freq = "N") freq(iris$Species) ## ----------------------------------------------------------------------------- define_keywords(title.freq = "Types and Counts, Iris Flowers") print( freq(iris$Species, display.type = FALSE), # Variable type won't be displayed... Variable = "" # and neither will the variable name )