## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(freqtables)
library(dplyr)

## -----------------------------------------------------------------------------
data(mtcars)

## -----------------------------------------------------------------------------
set.seed(123)
students <- tibble(
  male = rbinom(20, 1, 0.5)
)

## -----------------------------------------------------------------------------
students %>% 
  freq_table(male)

## -----------------------------------------------------------------------------
students %>% 
  freq_table(male) %>% 
  
  # Test for equal proportions
  summarise(
    p_bar = n[1] / n_total[1],
    p0 = 0.5,
    n = n_total[1],
    z = abs((p_bar - p0)) / sqrt(p0 * (1 - p0) / n),
    p = 2 * pnorm(z, lower.tail=FALSE)
  )

## -----------------------------------------------------------------------------
prop.test(9, 20, p = 0.5, correct = FALSE)

## -----------------------------------------------------------------------------
chisq.test(c(9, 11), p = c(0.5, 0.5), correct = FALSE)

## -----------------------------------------------------------------------------
students %>% 
  freq_table(male) %>% 
  freq_test()

## -----------------------------------------------------------------------------
mtcars %>% 
  freq_table(am) %>% 
  freq_test() %>% 
  select(1:6, p_chi2_pearson)

## -----------------------------------------------------------------------------
mtcars %>% 
  freq_table(cyl) %>% 
  freq_test()

## -----------------------------------------------------------------------------
set.seed(456)
students <- tibble(
  male  = rbinom(20, 1, 0.5),
  binge = if_else(male == 0, 
                  rbinom(20, 1, 0.10), # 10% of females binge drink 
                  rbinom(20, 1, 0.30)) # 30% of males binge drink
)

## -----------------------------------------------------------------------------
students %>% 
  freq_table(male, binge) %>% 
  freq_test()

## -----------------------------------------------------------------------------
students %>% 
  freq_table(male, binge) %>% 
  freq_test()

## -----------------------------------------------------------------------------
mtcars %>% 
  freq_table(am, vs) %>% 
  freq_test() %>% 
  select(1:8, p_chi2_pearson)

## -----------------------------------------------------------------------------
mtcars %>% 
  freq_table(am, cyl) %>% 
  freq_test()

## -----------------------------------------------------------------------------
mtcars %>% 
  freq_table(am, cyl) %>% 
  freq_test()

## -----------------------------------------------------------------------------
mtcars %>% 
  freq_table(am, vs) %>% 
  freq_test() %>% 
  select(1:4, n_col, n_total, n_expected:chi2_pearson)

## -----------------------------------------------------------------------------
mtcars %>% 
  freq_table(am, vs) %>% 
  freq_test() %>% 
  select(1:3, chi2_pearson:p_chi2_pearson)

## -----------------------------------------------------------------------------
chisq.test(mtcars$am, mtcars$vs, correct = FALSE)

## -----------------------------------------------------------------------------
m_observed <- matrix(c(5, 0, 1, 4), nrow = 2, byrow = TRUE)
m_observed

## -----------------------------------------------------------------------------
p <- function(m) {
  
  # Calculate the marginal totals
  r1  <- sum(m[1, ])
  r2  <- sum(m[2, ])
  c1  <- sum(m[, 1])
  c2  <- sum(m[, 2])
  tot <- sum(m)
  
  # Calculate the conditional probability of getting the actual matrix given 
  # the particular row and column sums
  r1_fac  <- factorial(r1)
  r2_fac  <- factorial(r2)
  c1_fac  <- factorial(c1)
  c2_fac  <- factorial(c2)
  tot_fac <- factorial(tot)
  
  m_fac      <- factorial(m) # factorial of each cell of the matrix (i.e., 5!, 0!, 1!, 4!)
  prod_m_fac <- prod(m_fac)  # Multiply all of those values together
  
  numerator   <- r1_fac * r2_fac * c1_fac * c2_fac
  denominator <- tot_fac * (prod_m_fac)
  p <- numerator / denominator
  
  # Return p
  p
}

## -----------------------------------------------------------------------------
p_cutoff <- p(m_observed)
p_cutoff

## -----------------------------------------------------------------------------
# Create a empty vector to hold the p values created in the loop below
p_values <- vector(mode = "numeric")

# Create a list of all possible combinations, given our margins
combinations <- list(
  c(4, 1 ,2, 3),
  c(3, 2, 3, 2),
  c(2, 3, 4, 1),
  c(1, 4, 5, 0)
)

# Calculate the p value for each combination and save it
for (i in seq_along(combinations)) {
  
  # Turn into a matrix
  m <- matrix(combinations[[i]], nrow = 2, byrow = TRUE)
  
  # Caclulate p
  p_val <- p(m)
  
  # Save p to vector
  p_values <- c(p_values, p_val)
}

## -----------------------------------------------------------------------------
sum(p_values, p_cutoff)

## -----------------------------------------------------------------------------
df <- tibble(
  p_value  = p_values,
  p_cutoff = p_cutoff
) %>% 
  mutate(
    less_or_equal = p_value <= p_cutoff
  ) %>% 
  print()

## -----------------------------------------------------------------------------
df %>% 
  filter(less_or_equal) %>% 
  summarise(
    `p-value` = sum(p_value, p_cutoff)
  )

## -----------------------------------------------------------------------------
fisher.test(m_observed)

## ----echo=FALSE---------------------------------------------------------------
# Clean up
rm(combinations, df, m, m_observed, i, p_cutoff, p_val, p_values, p)