## ---- message = FALSE, warning = FALSE---------------------------------------- library(dplyr) library(ggplot2) library(carData) ## ---- message = FALSE, warning = FALSE---------------------------------------- set.seed(213141) data("GSSvocab") gss <- as_tibble(GSSvocab) %>% filter(year == '1978') %>% mutate(weight = sample(1:3, size = nrow(.), replace = TRUE, prob = c(0.1, 0.5, 0.4))) %>% select(ageGroup, vocab, weight) gss ## ---- eval = FALSE------------------------------------------------------------ # devtools::install_github("cimentadaj/perccalc") # library(perccalc) ## ---- echo = F, message = F, warning = F-------------------------------------- library(perccalc) ## ---- error = TRUE------------------------------------------------------------ perc_diff(gss, ageGroup, vocab, percentiles = c(90, 10)) ## ----------------------------------------------------------------------------- gss <- gss %>% mutate(ageGroup = factor(ageGroup, ordered = TRUE)) ## ----------------------------------------------------------------------------- perc_diff(gss, ageGroup, vocab, percentiles = c(90, 10)) ## ----------------------------------------------------------------------------- perc_diff(gss, ageGroup, vocab, percentiles = c(50, 10)) ## ----------------------------------------------------------------------------- perc_diff(gss, ageGroup, vocab, weight) ## ---- eval = FALSE------------------------------------------------------------ # # Saving the dataset to a path # gss %>% # write_dta(path = "C:\\Users\\cimentadaj\\Downloads\\gss_data.dta", version = 13) ## ---- eval = FALSE------------------------------------------------------------ # *-------- # use "/Users/cimentadaj/Downloads/gss_data.dta", clear # # drop if missing(ageGroup) # drop if missing(vocab) # # tab ageGroup, gen(inc) # *-------- # # /*----------------------- # Making a data set that has # one observation per age group category # and has mean and se(mean) in each category # and percent of population in the category # ------------------------*/ # # tempname memhold # tempfile results # postfile `memhold' agegroup mean se_mean per using `results' # # forv i = 1/5 { # sum inc`i' [aw=weight] # loc per`i' = r(mean) # # qui sum vocab if inc`i'==1 # # if `r(N)'>0 { # qui regress vocab if inc`i'==1 [aw=weight] # post `memhold' (`i') (_b[_cons]) (_se[_cons]) (`per`i'') # # } # } # postclose `memhold' # # /*----------------------- # Making age group categories # into percentiles # ------------------------*/ # # # use `results', clear # # sort agegroup # gen cathi = sum(per) # gen catlo = cathi[_n-1] # replace catlo = 0 if agegroup==1 # gen catmid = (catlo+cathi)/2 # # /*----------------------- # Calculate age group # vocabulary gaps # ------------------------*/ # # sort agegroup # # g x1 = catmid # g x2 = catmid^2 + ((cathi-catlo)^2)/12 # g x3 = catmid^3 + ((cathi-catlo)^2)/4 # # g cimnhi = mean + 1.96*se_mean # g cimnlo = mean - 1.96*se_mean # # reg mean x1 x2 x3 [aw=1/se_mean^2] # # twoway (rcap cimnhi cimnlo catmid) (scatter mean catmid) /// # (function y = _b[_cons] + _b[x1]*x + _b[x2]*x^2 + _b[x3]*x^3, ran(0 1)) # # loc hi_p = 90 # loc lo_p = 10 # # loc d1 = [`hi_p' - `lo_p']/100 # loc d2 = [(`hi_p')^2 - (`lo_p')^2]/(100^2) # loc d3 = [(`hi_p')^3 - (`lo_p')^3]/(100^3) # # lincom `d1'*x1 + `d2'*x2 + `d3'*x3 # loc diff`hi_p'`lo_p' = r(estimate) # loc se`hi_p'`lo_p' = r(se) # # di "`hi_p'-`lo_p' gap: `diff`hi_p'`lo_p''" # di "se(`hi_p'-`lo_p' gap): `se`hi_p'`lo_p''" ## ----------------------------------------------------------------------------- perc_diff(gss, ageGroup, vocab, weight) ## ----------------------------------------------------------------------------- perc_dist(gss, ageGroup, vocab) %>% head() ## ---- fig.align = 'center', fig.width = 6, fig.height = 5--------------------- gss %>% perc_dist(ageGroup, vocab, weight) %>% mutate(ci_low = estimate - 1.96 * std.error, ci_hi = estimate + 1.96 * std.error) %>% ggplot(aes(percentile, estimate)) + geom_point() + geom_errorbar(aes(ymin = ci_low, ymax = ci_hi)) ## ----------------------------------------------------------------------------- perc_dist(gss, ageGroup, vocab, weight) %>% filter(percentile %in% c(90, 10)) %>% summarize(diff = diff(estimate), se_diff = diff(std.error)) ## ----------------------------------------------------------------------------- perc_diff(gss, ageGroup, vocab, weight, percentiles = c(90, 10))