% File apc/vignettes/apc_indiv_examples.Rnw
% Part of the apc package, http://www.R-project.org
% Copyright 2020 Zoe Fannon, Bent Nielsen
% Distributed under GPL 3

%\VignetteIndexEntry{Introduction: analysis of individual data: further examples}

\documentclass[a4paper,twoside,12pt]{article}

\usepackage[english]{babel}
\usepackage{booktabs,rotating,graphicx,amsmath,verbatim,fancyhdr,Sweave}
\usepackage[colorlinks,linkcolor=red,urlcolor=blue]{hyperref}
\newcommand{\R}{\textsf{\bf R}}
\renewcommand{\topfraction}{0.95}
\renewcommand{\bottomfraction}{0.95}
\renewcommand{\textfraction}{0.1}
\renewcommand{\floatpagefraction}{0.9}
\DeclareGraphicsExtensions{.pdf,.jpg}
\setcounter{secnumdepth}{1}
\setcounter{tocdepth}{1}

\oddsidemargin 1mm
\evensidemargin 1mm
\textwidth 160mm
\textheight 230mm
\topmargin -5mm
\headheight 8mm
\headsep 5mm
\footskip 15mm

\begin{document}
\SweaveOpts{concordance=TRUE}

\raggedleft
\pagestyle{empty}
\vspace*{0.1\textheight}
\Huge
{\bf \texttt{apc.indiv} functions in the package\texttt{apc} \\ Further examples }
\noindent\rule[-1ex]{\textwidth}{5pt}\\[2.5ex]
\Large
24 August 2020
\vfill
\normalsize
\begin{tabular}{rl}
 Zoe Fannon    & Department of Economics, University of Oxford 
\end{tabular}
\normalsize
\newpage
\raggedright
\parindent 3ex
\parskip 0ex
\tableofcontents
\cleardoublepage
\setcounter{page}{1}
\pagestyle{fancy}
\renewcommand{\sectionmark}[1]{\markboth{\thesection #1}{\thesection \ #1}}
\fancyhead[OL,ER]{\sl apc indiv}
%\fancyhead[ER]{\sl \rightmark}
\fancyhead[EL,OR]{\bf \thepage}
\fancyfoot{}
\renewcommand{\headrulewidth}{0.1pt}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Introduction}

The purpose of this document is to provide some further examples for \texttt{apc.indiv} for \texttt{apc} where the run time is too long for packages.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Examples for the function \texttt{apc.indiv.est.model} and related functions}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Repeated cross-sectional data}

Get data

<<>>=
library("ISLR")
data("Wage")
Wage2 <- Wage[Wage$age >= 25 & Wage$age <= 55, ]
names(Wage2)[names(Wage2) %in% c("year","age")] <- c("period","age")
cohort <- Wage2$period - Wage2$age
indust_job <- ifelse(Wage2$jobclass=="1. Industrial", 1, 0)
hasdegree <- ifelse(Wage2$education %in%
        c("4. College Grad", "5. Advanced Degree"), 1, 0)
married <- ifelse(Wage2$maritl == "2. Married", 1, 0)
Wage3 <- cbind(Wage2, cohort, indust_job, hasdegree, married)
rm(Wage, Wage2, cohort, indust_job, hasdegree, married)
@

Bare minimum
<<>>=
library("plyr")
library("apc")
model1 <- apc.indiv.est.model(Wage3, dep.var="logwage")
apc.plot.fit(model1)
@

Add covariates, use a binary outcome, specify model design
<<>>=
model2 <- apc.indiv.est.model(Wage3, dep.var = "married",
                              covariates = c("logwage", "hasdegree"),
                              model.design = "AC",
                              model.family = "binomial")
apc.plot.fit(model2)
model2$coefficients.covariates
@

use cohort-censored data (eliminates the cohort spike above)
<<>>=
Wage3_cc <- Wage3[Wage3$cohort>1950 & Wage3$cohort<1982, ]
model3 <- apc.indiv.est.model(Wage3_cc, dep.var = "married",
                              covariates = c("logwage", "hasdegree"),
                              model.design = "AC",
                              model.family = "binomial",
                              n.coh.excl.end = 3,
                              n.coh.excl.start = 3)
apc.plot.fit(model3)
model3$coefficients.covariates
@

standard hypothesis tests tools can be used
<<>>=
library("car")
linearHypothesis(model3$fit, "logwage = hasdegree", test="F")
@

use a binomial time-saturated model with optional specification of parameters
<<>>=
model4 <- apc.indiv.est.model(Wage3_cc, dep.var = "hasdegree",
                          model.family = "binomial",
                          covariates = "logwage",
                          model.design = "TS",
                          n.coh.excl.start = 3,
                          n.coh.excl.end = 3)
model4$result
@

change the parameters of the Newton-Rhapson iteration to ensure convergence (only maxit.loop changed, others are default values)
<<>>=
myspec2 <- list(20,30,.002,"ols",.Machine$double.eps,.002,NULL,NULL)
names(myspec2) <- c("maxit.loop", "maxit.linesearch", "tolerance",
        "init", "inv.tol", "d1.tol", "custom.kappa", "custom.zeta")
model4b <- apc.indiv.est.model(Wage3_cc, dep.var = "hasdegree",
                          model.family = "binomial",
                          covariates = "logwage",
                          model.design = "TS",
                          n.coh.excl.start = 3,
                          n.coh.excl.end = 3,
                          NR.controls = myspec2)
model4b$result
@    

run a model with invented survey weights
<<>>=
library("survey")
inv_wt <- runif(nrow(Wage3), 0, 1)
Wage_wt <- cbind(Wage3, inv_wt)
model5 <- apc.indiv.est.model(Wage_wt, dep.var = "logwage",
                              wt.var= "inv_wt")
apc.plot.fit(model5)
@

compare to model1

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Panel data}

<<>>=
library("AER")
data("PSID7682")
period <- as.numeric(PSID7682$year) + 1975
entry <- period - PSID7682$experience
logwage <- log(PSID7682$wage)
inunion <- ifelse(PSID7682$union == "yes", 1, 0)
insouth <- ifelse(PSID7682$south == "yes", 1, 0)
psid2 <- cbind(PSID7682, period, entry, logwage, inunion, insouth)
names(psid2)[names(psid2) %in% c("experience", "entry")] <-
                                                c("age", "cohort")
psid3 <- psid2[psid2$cohort >=1939, ]

rm(PSID7682, period, entry, logwage, inunion, insouth, psid2)
@

run a panel data model with fixed effects
<<>>=
library("plm")
model6 <- apc.indiv.est.model(psid3, dep.var = "logwage",
                              covariates = c("inunion", "insouth"),
                              plmmodel = "within", id.var = "id", 
                              model.design = "FAP")
apc.plot.fit(model6)
model6$coefficients.covariates
@

existing hypothesis test tools can be used to compare models
<<>>=
model6b <- apc.indiv.est.model(psid3, dep.var = "logwage",
                               plmmodel = "within", id.var = "id", 
                               model.design = "FAP")
waldtest(model6$fit, model6b$fit)
@

Illustrate the use of the underlying functions
<<>>=
collinear_1 <- apc.indiv.design.collinear(psid3)
design_1 <- apc.indiv.design.model(collinear_1, dep.var = "logwage",
                           covariates = c("inunion", "insouth"),
                           plmmodel = "random", id.var ="id")
plm_1 <- plm(design_1$model.formula,
              data = collinear_1$full.design.collinear, 
              index = c("id", "period"), model = "random")
design_2 <- apc.indiv.design.model(collinear_1, dep.var = "logwage",
                                   plmmodel = "random", id.var ="id")
fit_2 <- apc.indiv.fit.model(design_2)

waldtest(plm_1, fit_2$fit, test="F")
@

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Examples for the function \texttt{apc.indiv.model.table} and related functions}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Repeated cross-sectional data}

<<>>=
library("ISLR")
data("Wage")
Wage2 <- Wage[Wage$age >= 25 & Wage$age <= 55, ]
names(Wage2)[names(Wage2) %in% c("year","age")] <- c("period","age")
cohort <- Wage2$period - Wage2$age
indust_job <- ifelse(Wage2$jobclass=="1. Industrial", 1, 0)
hasdegree <- ifelse(Wage2$education %in%
            c("4. College Grad", "5. Advanced Degree"), 1, 0)
married <- ifelse(Wage2$maritl == "2. Married", 1, 0)
Wage3 <- cbind(Wage2, cohort, indust_job, hasdegree, married)
rm(Wage, Wage2, cohort, indust_job, hasdegree, married)
@

Gaussian outcome variable,no covariates
<<>>=
test1 <- apc.indiv.model.table(Wage3, dep.var="logwage",
                               test= "Wald", dist="F",
                               model.family="gaussian",
                               TS=TRUE)
test1$table
@

Binomial outcome variable, one covariate
<<>>=
test2 <- apc.indiv.model.table(Wage3, dep.var="married",
                               covariates = "hasdegree",
                               test="LR", dist="Chisq",
                               TS=TRUE, model.family="binomial")
test2$table
test2$NR.report
@

Add hypothetical survey weights to the data,
investigate models for a binomial outcome with one covariate
<<>>=
    inv_wt <- runif(nrow(Wage3), 0, 1)
    Wage_wt <- cbind(Wage3, inv_wt)
    test3 <- apc.indiv.model.table(Wage_wt, dep.var="hasdegree",
                               covariates="logwage", test="Wald",
                               dist="Chisq",
                               model.family="binomial",
                               wt.var="inv_wt")
    test3$table
@    

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Panel data}

Get data

<<>>=
library("AER")
data("PSID7682")
period <- as.numeric(PSID7682$year) + 1975
entry <- period - PSID7682$experience
logwage <- log(PSID7682$wage)
inunion <- ifelse(PSID7682$union == "yes", 1, 0)
insouth <- ifelse(PSID7682$south == "yes", 1, 0)
psid2 <- cbind(PSID7682, period, entry, logwage, inunion, insouth)
names(psid2)[names(psid2) %in% c("experience", "entry")] <-
                                                c("age", "cohort")
psid3 <- psid2[psid2$cohort >=1939, ]
@

Gaussian outcome variable, one covariate, random effects
<<>>=
test4 <- apc.indiv.model.table(psid3, dep.var="logwage",
                               covariates = "insouth",
                               plmmodel="random", id.var="id",
                               model.family="gaussian", 
                               test="Wald", dist="Chisq")
test4$table
@

Gaussian outcome variable, no covariates, fixed effects
<<>>=
test5 <- apc.indiv.model.table(psid3, dep.var="logwage",
                                plmmodel="within", id.var="id",
                                model.family="gaussian", 
                                test="Wald", dist="Chisq")
test5$table
@


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Examples for the function \texttt{apc.indiv.compare.direct} and related functions}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Repeated cross-sectional data}

Get data

<<>>=
library("ISLR")
data("Wage")
Wage2 <- Wage[Wage$age >= 25 & Wage$age <= 55, ]
names(Wage2)[names(Wage2) %in% c("year","age")] <- c("period","age")
cohort <- Wage2$period - Wage2$age
indust_job <- ifelse(Wage2$jobclass=="1. Industrial", 1, 0)
hasdegree <- ifelse(Wage2$education %in%
        c("4. College Grad", "5. Advanced Degree"), 1, 0)
married <- ifelse(Wage2$maritl == "2. Married", 1, 0)
Wage3 <- cbind(Wage2, cohort, indust_job, hasdegree, married)
rm(Wage, Wage2, cohort, indust_job, hasdegree, married)
@

Use an F-test to compare an AP model to a tP model 
<<>>=
test1 <- apc.indiv.compare.direct(Wage3, big.model="AP",
                small.model="tP",
                dep.var="logwage", model.family="gaussian",
                test="Wald", dist="F")
test1
@

Use a likelihood ratio test to compare the TS model to a PC model
<<>>=
test2 <- apc.indiv.compare.direct(Wage3, big.model="TS",
                small.model="PC",
                dep.var="married", covariates="hasdegree",
                model.family="binomial", test="LR", dist="Chisq")
test2[1:8]
@
don't print the NR.controls output in full                            

Add hypothetical weights to the data and use a Chi-squared test to compare APC to P
<<>>=
inv_wt <- runif(nrow(Wage3), 0, 1)
Wage_wt <- cbind(Wage3, inv_wt)
test3 <- apc.indiv.compare.direct(Wage_wt, big.model="APC",
                small.model="P",
                dep.var="logwage", 
                covariates = c("hasdegree", "married"),
                wt.var="inv_wt", test="Wald", dist="Chisq",
                model.family="gaussian")
test3
@

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Panel data}

Get data

<<>>=
library("AER")
data("PSID7682")
period <- as.numeric(PSID7682$year) + 1975
entry <- period - PSID7682$experience
logwage <- log(PSID7682$wage)
inunion <- ifelse(PSID7682$union == "yes", 1, 0)
insouth <- ifelse(PSID7682$south == "yes", 1, 0)
psid2 <- cbind(PSID7682, period, entry, logwage, inunion, insouth)
names(psid2)[names(psid2) %in% c("experience", "entry")] <-
                                                c("age", "cohort")
psid3 <- psid2[psid2$cohort >=1939, ]
@

Compare a random effects Pd model to a t model
<<>>=
test4 <- apc.indiv.compare.direct(psid3, big.model="Pd",
                    small.model="t",
                    dep.var="logwage", covariates="insouth",
                    plmmodel="random", id.var="id",
                    model.family="gaussian", test="Wald", dist="F")
test4
@

Compare a fixed effects FAP model to an FP model
<<>>=
test5 <- apc.indiv.compare.direct(psid3, big.model="FAP",
                    small.model="FP",
                    dep.var="logwage", 
                    plmmodel="within", id.var="id",
                    model.family="gaussian", test="Wald", 
                    dist="Chisq")
test5
@

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Examples for the function \texttt{apc.plot.fit}}

Get repeated cross-sectional data

<<>>=
library("ISLR")
data("Wage")
Wage2 <- Wage[Wage$age >= 25 & Wage$age <= 55, ]
names(Wage2)[names(Wage2) %in% c("year","age")] <- c("period","age")
cohort <- Wage2$period - Wage2$age
indust_job <- ifelse(Wage2$jobclass=="1. Industrial", 1, 0)
hasdegree <- ifelse(Wage2$education %in%
                c("4. College Grad", "5. Advanced Degree"), 1, 0)
married <- ifelse(Wage2$maritl == "2. Married", 1, 0)
Wage3 <- cbind(Wage2, cohort, indust_job, hasdegree, married)
rm(Wage, Wage2, cohort, indust_job, hasdegree, married)
@

Estimate and plot a model
<<>>=
library("plyr")
library("apc")
model1 <- apc.indiv.est.model(Wage3, dep.var="logwage")
apc.plot.fit(model1)
@

\end{document}