% File apc/vignettes/apc_indiv_examples.Rnw % Part of the apc package, http://www.R-project.org % Copyright 2020 Zoe Fannon, Bent Nielsen % Distributed under GPL 3 %\VignetteIndexEntry{Introduction: analysis of individual data: further examples} \documentclass[a4paper,twoside,12pt]{article} \usepackage[english]{babel} \usepackage{booktabs,rotating,graphicx,amsmath,verbatim,fancyhdr,Sweave} \usepackage[colorlinks,linkcolor=red,urlcolor=blue]{hyperref} \newcommand{\R}{\textsf{\bf R}} \renewcommand{\topfraction}{0.95} \renewcommand{\bottomfraction}{0.95} \renewcommand{\textfraction}{0.1} \renewcommand{\floatpagefraction}{0.9} \DeclareGraphicsExtensions{.pdf,.jpg} \setcounter{secnumdepth}{1} \setcounter{tocdepth}{1} \oddsidemargin 1mm \evensidemargin 1mm \textwidth 160mm \textheight 230mm \topmargin -5mm \headheight 8mm \headsep 5mm \footskip 15mm \begin{document} \SweaveOpts{concordance=TRUE} \raggedleft \pagestyle{empty} \vspace*{0.1\textheight} \Huge {\bf \texttt{apc.indiv} functions in the package\texttt{apc} \\ Further examples } \noindent\rule[-1ex]{\textwidth}{5pt}\\[2.5ex] \Large 24 August 2020 \vfill \normalsize \begin{tabular}{rl} Zoe Fannon & Department of Economics, University of Oxford \end{tabular} \normalsize \newpage \raggedright \parindent 3ex \parskip 0ex \tableofcontents \cleardoublepage \setcounter{page}{1} \pagestyle{fancy} \renewcommand{\sectionmark}[1]{\markboth{\thesection #1}{\thesection \ #1}} \fancyhead[OL,ER]{\sl apc indiv} %\fancyhead[ER]{\sl \rightmark} \fancyhead[EL,OR]{\bf \thepage} \fancyfoot{} \renewcommand{\headrulewidth}{0.1pt} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Introduction} The purpose of this document is to provide some further examples for \texttt{apc.indiv} for \texttt{apc} where the run time is too long for packages. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Examples for the function \texttt{apc.indiv.est.model} and related functions} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Repeated cross-sectional data} Get data <<>>= library("ISLR") data("Wage") Wage2 <- Wage[Wage$age >= 25 & Wage$age <= 55, ] names(Wage2)[names(Wage2) %in% c("year","age")] <- c("period","age") cohort <- Wage2$period - Wage2$age indust_job <- ifelse(Wage2$jobclass=="1. Industrial", 1, 0) hasdegree <- ifelse(Wage2$education %in% c("4. College Grad", "5. Advanced Degree"), 1, 0) married <- ifelse(Wage2$maritl == "2. Married", 1, 0) Wage3 <- cbind(Wage2, cohort, indust_job, hasdegree, married) rm(Wage, Wage2, cohort, indust_job, hasdegree, married) @ Bare minimum <<>>= library("plyr") library("apc") model1 <- apc.indiv.est.model(Wage3, dep.var="logwage") apc.plot.fit(model1) @ Add covariates, use a binary outcome, specify model design <<>>= model2 <- apc.indiv.est.model(Wage3, dep.var = "married", covariates = c("logwage", "hasdegree"), model.design = "AC", model.family = "binomial") apc.plot.fit(model2) model2$coefficients.covariates @ use cohort-censored data (eliminates the cohort spike above) <<>>= Wage3_cc <- Wage3[Wage3$cohort>1950 & Wage3$cohort<1982, ] model3 <- apc.indiv.est.model(Wage3_cc, dep.var = "married", covariates = c("logwage", "hasdegree"), model.design = "AC", model.family = "binomial", n.coh.excl.end = 3, n.coh.excl.start = 3) apc.plot.fit(model3) model3$coefficients.covariates @ standard hypothesis tests tools can be used <<>>= library("car") linearHypothesis(model3$fit, "logwage = hasdegree", test="F") @ use a binomial time-saturated model with optional specification of parameters <<>>= model4 <- apc.indiv.est.model(Wage3_cc, dep.var = "hasdegree", model.family = "binomial", covariates = "logwage", model.design = "TS", n.coh.excl.start = 3, n.coh.excl.end = 3) model4$result @ change the parameters of the Newton-Rhapson iteration to ensure convergence (only maxit.loop changed, others are default values) <<>>= myspec2 <- list(20,30,.002,"ols",.Machine$double.eps,.002,NULL,NULL) names(myspec2) <- c("maxit.loop", "maxit.linesearch", "tolerance", "init", "inv.tol", "d1.tol", "custom.kappa", "custom.zeta") model4b <- apc.indiv.est.model(Wage3_cc, dep.var = "hasdegree", model.family = "binomial", covariates = "logwage", model.design = "TS", n.coh.excl.start = 3, n.coh.excl.end = 3, NR.controls = myspec2) model4b$result @ run a model with invented survey weights <<>>= library("survey") inv_wt <- runif(nrow(Wage3), 0, 1) Wage_wt <- cbind(Wage3, inv_wt) model5 <- apc.indiv.est.model(Wage_wt, dep.var = "logwage", wt.var= "inv_wt") apc.plot.fit(model5) @ compare to model1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Panel data} <<>>= library("AER") data("PSID7682") period <- as.numeric(PSID7682$year) + 1975 entry <- period - PSID7682$experience logwage <- log(PSID7682$wage) inunion <- ifelse(PSID7682$union == "yes", 1, 0) insouth <- ifelse(PSID7682$south == "yes", 1, 0) psid2 <- cbind(PSID7682, period, entry, logwage, inunion, insouth) names(psid2)[names(psid2) %in% c("experience", "entry")] <- c("age", "cohort") psid3 <- psid2[psid2$cohort >=1939, ] rm(PSID7682, period, entry, logwage, inunion, insouth, psid2) @ run a panel data model with fixed effects <<>>= library("plm") model6 <- apc.indiv.est.model(psid3, dep.var = "logwage", covariates = c("inunion", "insouth"), plmmodel = "within", id.var = "id", model.design = "FAP") apc.plot.fit(model6) model6$coefficients.covariates @ existing hypothesis test tools can be used to compare models <<>>= model6b <- apc.indiv.est.model(psid3, dep.var = "logwage", plmmodel = "within", id.var = "id", model.design = "FAP") waldtest(model6$fit, model6b$fit) @ Illustrate the use of the underlying functions <<>>= collinear_1 <- apc.indiv.design.collinear(psid3) design_1 <- apc.indiv.design.model(collinear_1, dep.var = "logwage", covariates = c("inunion", "insouth"), plmmodel = "random", id.var ="id") plm_1 <- plm(design_1$model.formula, data = collinear_1$full.design.collinear, index = c("id", "period"), model = "random") design_2 <- apc.indiv.design.model(collinear_1, dep.var = "logwage", plmmodel = "random", id.var ="id") fit_2 <- apc.indiv.fit.model(design_2) waldtest(plm_1, fit_2$fit, test="F") @ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Examples for the function \texttt{apc.indiv.model.table} and related functions} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Repeated cross-sectional data} <<>>= library("ISLR") data("Wage") Wage2 <- Wage[Wage$age >= 25 & Wage$age <= 55, ] names(Wage2)[names(Wage2) %in% c("year","age")] <- c("period","age") cohort <- Wage2$period - Wage2$age indust_job <- ifelse(Wage2$jobclass=="1. Industrial", 1, 0) hasdegree <- ifelse(Wage2$education %in% c("4. College Grad", "5. Advanced Degree"), 1, 0) married <- ifelse(Wage2$maritl == "2. Married", 1, 0) Wage3 <- cbind(Wage2, cohort, indust_job, hasdegree, married) rm(Wage, Wage2, cohort, indust_job, hasdegree, married) @ Gaussian outcome variable,no covariates <<>>= test1 <- apc.indiv.model.table(Wage3, dep.var="logwage", test= "Wald", dist="F", model.family="gaussian", TS=TRUE) test1$table @ Binomial outcome variable, one covariate <<>>= test2 <- apc.indiv.model.table(Wage3, dep.var="married", covariates = "hasdegree", test="LR", dist="Chisq", TS=TRUE, model.family="binomial") test2$table test2$NR.report @ Add hypothetical survey weights to the data, investigate models for a binomial outcome with one covariate <<>>= inv_wt <- runif(nrow(Wage3), 0, 1) Wage_wt <- cbind(Wage3, inv_wt) test3 <- apc.indiv.model.table(Wage_wt, dep.var="hasdegree", covariates="logwage", test="Wald", dist="Chisq", model.family="binomial", wt.var="inv_wt") test3$table @ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Panel data} Get data <<>>= library("AER") data("PSID7682") period <- as.numeric(PSID7682$year) + 1975 entry <- period - PSID7682$experience logwage <- log(PSID7682$wage) inunion <- ifelse(PSID7682$union == "yes", 1, 0) insouth <- ifelse(PSID7682$south == "yes", 1, 0) psid2 <- cbind(PSID7682, period, entry, logwage, inunion, insouth) names(psid2)[names(psid2) %in% c("experience", "entry")] <- c("age", "cohort") psid3 <- psid2[psid2$cohort >=1939, ] @ Gaussian outcome variable, one covariate, random effects <<>>= test4 <- apc.indiv.model.table(psid3, dep.var="logwage", covariates = "insouth", plmmodel="random", id.var="id", model.family="gaussian", test="Wald", dist="Chisq") test4$table @ Gaussian outcome variable, no covariates, fixed effects <<>>= test5 <- apc.indiv.model.table(psid3, dep.var="logwage", plmmodel="within", id.var="id", model.family="gaussian", test="Wald", dist="Chisq") test5$table @ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Examples for the function \texttt{apc.indiv.compare.direct} and related functions} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Repeated cross-sectional data} Get data <<>>= library("ISLR") data("Wage") Wage2 <- Wage[Wage$age >= 25 & Wage$age <= 55, ] names(Wage2)[names(Wage2) %in% c("year","age")] <- c("period","age") cohort <- Wage2$period - Wage2$age indust_job <- ifelse(Wage2$jobclass=="1. Industrial", 1, 0) hasdegree <- ifelse(Wage2$education %in% c("4. College Grad", "5. Advanced Degree"), 1, 0) married <- ifelse(Wage2$maritl == "2. Married", 1, 0) Wage3 <- cbind(Wage2, cohort, indust_job, hasdegree, married) rm(Wage, Wage2, cohort, indust_job, hasdegree, married) @ Use an F-test to compare an AP model to a tP model <<>>= test1 <- apc.indiv.compare.direct(Wage3, big.model="AP", small.model="tP", dep.var="logwage", model.family="gaussian", test="Wald", dist="F") test1 @ Use a likelihood ratio test to compare the TS model to a PC model <<>>= test2 <- apc.indiv.compare.direct(Wage3, big.model="TS", small.model="PC", dep.var="married", covariates="hasdegree", model.family="binomial", test="LR", dist="Chisq") test2[1:8] @ don't print the NR.controls output in full Add hypothetical weights to the data and use a Chi-squared test to compare APC to P <<>>= inv_wt <- runif(nrow(Wage3), 0, 1) Wage_wt <- cbind(Wage3, inv_wt) test3 <- apc.indiv.compare.direct(Wage_wt, big.model="APC", small.model="P", dep.var="logwage", covariates = c("hasdegree", "married"), wt.var="inv_wt", test="Wald", dist="Chisq", model.family="gaussian") test3 @ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Panel data} Get data <<>>= library("AER") data("PSID7682") period <- as.numeric(PSID7682$year) + 1975 entry <- period - PSID7682$experience logwage <- log(PSID7682$wage) inunion <- ifelse(PSID7682$union == "yes", 1, 0) insouth <- ifelse(PSID7682$south == "yes", 1, 0) psid2 <- cbind(PSID7682, period, entry, logwage, inunion, insouth) names(psid2)[names(psid2) %in% c("experience", "entry")] <- c("age", "cohort") psid3 <- psid2[psid2$cohort >=1939, ] @ Compare a random effects Pd model to a t model <<>>= test4 <- apc.indiv.compare.direct(psid3, big.model="Pd", small.model="t", dep.var="logwage", covariates="insouth", plmmodel="random", id.var="id", model.family="gaussian", test="Wald", dist="F") test4 @ Compare a fixed effects FAP model to an FP model <<>>= test5 <- apc.indiv.compare.direct(psid3, big.model="FAP", small.model="FP", dep.var="logwage", plmmodel="within", id.var="id", model.family="gaussian", test="Wald", dist="Chisq") test5 @ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Examples for the function \texttt{apc.plot.fit}} Get repeated cross-sectional data <<>>= library("ISLR") data("Wage") Wage2 <- Wage[Wage$age >= 25 & Wage$age <= 55, ] names(Wage2)[names(Wage2) %in% c("year","age")] <- c("period","age") cohort <- Wage2$period - Wage2$age indust_job <- ifelse(Wage2$jobclass=="1. Industrial", 1, 0) hasdegree <- ifelse(Wage2$education %in% c("4. College Grad", "5. Advanced Degree"), 1, 0) married <- ifelse(Wage2$maritl == "2. Married", 1, 0) Wage3 <- cbind(Wage2, cohort, indust_job, hasdegree, married) rm(Wage, Wage2, cohort, indust_job, hasdegree, married) @ Estimate and plot a model <<>>= library("plyr") library("apc") model1 <- apc.indiv.est.model(Wage3, dep.var="logwage") apc.plot.fit(model1) @ \end{document}