# racd06p3.R March 2012 for R version 2.15.0 rm(list=ls()) # Create log file sink("racd06p3.Rout") # ********** OVERVIEW OF racd06p3.R ********** # R Program # copyright C 2012 by A. Colin Cameron and Pravin K. Trivedi # used for "Regression Analysis of Count Data" SECOND EDITION # by A. Colin Cameron and Pravin K. Trivedi (2012) # Cambridge University Press # To run you need files # racd06data3fertilityswiss.dta # racd06data4fertilitybritish.dta # and R packages # foreign, MASS, gamlss, pscl, flexmix # This R program gives some ofthe analysis for chapter 6 Fertility example # The Stata program racd06p3.do does more. # Also some models in R cannot be estimated with intercept only # so age was added as a regressor here (but not in the book) # 6.4 COMPLETED FERTILITY # ********* DATA DESCRIPTION # Two datasets ... # (1) Swiss Household Panel W1 1999 N = 1878 # (2) British Household Panel updated to Wave 18 N = 6782 # See ?? for more detailed discussion # Also see racd06makedata3fertility.do for further details # ********* 6.5 COMPLETED FERTILITY ### SWISS DATA # Read in and select data # install.packages("foreign") library(foreign) data.ch06p3a <- read.dta(file = "racd06data3fertilityswiss.dta") one <- matrix(1,nrow=nrow(data.ch06p3a),ncol=1) data.ch06p3a <- cbind(data.ch06p3a,one) # Allows variables in database to be accessed simply by giving names attach(data.ch06p3a) # Lists first six observations head(data.ch06p3a) # Variable list names(data.ch06p3a) # Summary statistics summary(data.ch06p3a) sapply(data.ch06p3a,mean) sapply(data.ch06p3a,sd) ### TABLE 6.15 FREQUENCIES # Frequencies and relative frequencies of children # PRedicted probabilities from nNB2 come later table(children) table(children) / nrow(data.ch06p3a) ### POISSON AND NB2 # Poisson with default standard errors (variance equals the mean) model.poiss.int <- glm(children ~ one, family=poisson(), data=data.ch06p3a) summary(model.poiss.int) logl.poiss.int <- logLik(model.poiss.int) # Negative binomial 2 MLE using MASS # install.packages("MASS") library(MASS) model.nb.int <- glm.nb(children ~ one, data=data.ch06p3a) summary(model.nb.int) logl.nb.int <- logLik(model.nb.int) # Predicted probabilities for NB2 differ a little from Table 6.15 # install.packages("pscl") library(pscl) pnb <- predprob(model.nb.int) colMeans(pnb) ### REMAINING MODELS THE R COMMANDS DO NOT DO ONTERCEPT ONLY ### SO ADD AGE AS REGRESSOR (BOOK DOES NOT INCLUDE AGE) # Poisson with default standard errors (variance equals the mean) model.poiss <- glm(children ~ age, family=poisson(), data=data.ch06p3a) summary(model.poiss) logl.poiss <- logLik(model.poiss) # Negative binomial 2 MLE using MASS model.nb <- glm.nb(children ~ age, data=data.ch06p3a) summary(model.nb) logl.nb <- logLik(model.nb) # Finite mixtures of Poisson 2 components using flexmix # install.packages("flexmix") library(flexmix) # Two-component model model.FMP2 <- flexmix(children ~ age, data=data.ch06p3a, k=2, model=FLXMRglm(family="poisson")) summary(model.FMP2) model.FMP2.refit <- refit(model.FMP2) summary(model.FMP2.refit) logl.FMP2 <- logLik(model.FMP2) # Hurdle Poisson # install.packages("pscl") library(pscl) # Inflation with same regressors as model and logit hurdle model.hurdle.poiss <- hurdle(children ~ age, data=data.ch06p3a, dist="poisson") summary(model.hurdle.poiss) logl.hurdle.poiss <- logLik(model.hurdle.poiss) # Zero-inflated Poisson (not reported) model.zip.poiss <- zeroinfl(children ~ age, data=data.ch06p3a, dist="poisson") summary(model.zip.poiss) logl.zip.poiss <- logLik(model.zip.poiss) # Ordered probit requires dependent variable to be declared as a factor variable childrenFACTOR <- factor(children) model.orderedprobit <- polr(factor(children) ~ age, method="probit") summary(model.orderedprobit) logl.orderedprobit <- logLik(model.orderedprobit) # TABLE 6.17 LOG-LIKELIHOODS # Intercept only models cbind(logl.poiss.int,logl.nb.int) # WIth AGE as extra regressor (note: book Table 6.17 did not include AGE as regressor) cbind(logl.poiss,logl.nb,logl.FMP2,logl.hurdle.poiss,logl.zip.poiss,logl.orderedprobit) ### BRITISH DATA # Read in and select data # install.packages("foreign") library(foreign) data.ch06p3b <- read.dta(file = "racd06data4fertilitybritish.dta") one <- matrix(1,nrow=nrow(data.ch06p3b),ncol=1) data.ch06p3b <- cbind(data.ch06p3b,one) # Allows variables in database to be accessed simply by giving names attach(data.ch06p3b) # Lists first six observations head(data.ch06p3b) ### TABLE 6.16 FREQUENCIES # Tabulate counts of doctor visits table(children) table(children) / nrow(data.ch06p3b) ### POISSON AND NB2 # Poisson with default standard errors (variance equals the mean) model.poiss.int <- glm(children ~ one, family=poisson(), data=data.ch06p3a) summary(model.poiss.int) logl.poiss.intonly <- logLik(model.poiss.int) # Negative binomial 2 MLE using MASS # install.packages("MASS") library(MASS) model.nb.int <- glm.nb(children ~ one, data=data.ch06p3a) summary(model.nb.int) logl.nb.int <- logLik(model.nb.int) # Predicted probabilities for NB2 differ a little from Table 6.16 # install.packages("pscl") library(pscl) pnb <- predprob(model.nb.int) colMeans(pnb) # close log file sink()