# racd06p3.R  March 2012 for R version 2.15.0

rm(list=ls())

# Create log file
sink("racd06p3.Rout")

# ********** OVERVIEW OF racd06p3.R **********

# R Program 
# copyright C 2012 by A. Colin Cameron and Pravin K. Trivedi 
# used for "Regression Analysis of Count Data" SECOND EDITION
# by A. Colin Cameron and Pravin K. Trivedi (2012)
# Cambridge University Press

# To run you need files
#    racd06data3fertilityswiss.dta
#   racd06data4fertilitybritish.dta
# and R packages
#   foreign, MASS, gamlss, pscl, flexmix

# This R program gives some ofthe analysis for chapter 6 Fertility example
# The Stata program racd06p3.do does more.

# Also some models in R cannot be estimated with intercept only
# so age was added as a regressor here (but not in the book)

#  6.4 COMPLETED FERTILITY
 
# ********* DATA DESCRIPTION

# Two datasets ...
# (1) Swiss Household Panel W1 1999 N = 1878
# (2) British Household Panel updated to Wave 18  N = 6782
# See ?? for more detailed discussion 
# Also see racd06makedata3fertility.do for further details 

# ********* 6.5 COMPLETED FERTILITY

### SWISS DATA 

# Read in and select data
# install.packages("foreign")
library(foreign)
data.ch06p3a <- read.dta(file = "racd06data3fertilityswiss.dta")
one <- matrix(1,nrow=nrow(data.ch06p3a),ncol=1)
data.ch06p3a <- cbind(data.ch06p3a,one)

# Allows variables in database to be accessed simply by giving names
attach(data.ch06p3a)
# Lists first six observations
head(data.ch06p3a)

# Variable list
names(data.ch06p3a)

# Summary statistics
summary(data.ch06p3a)
sapply(data.ch06p3a,mean)
sapply(data.ch06p3a,sd)

### TABLE 6.15 FREQUENCIES 

# Frequencies and relative frequencies of children
# PRedicted probabilities from nNB2 come later
table(children)
table(children) / nrow(data.ch06p3a)

### POISSON AND NB2

# Poisson with default standard errors (variance equals the mean)
model.poiss.int <- glm(children ~ one, family=poisson(), data=data.ch06p3a) 
summary(model.poiss.int)
logl.poiss.int <- logLik(model.poiss.int) 

# Negative binomial 2 MLE using MASS
# install.packages("MASS")
library(MASS)
model.nb.int <- glm.nb(children ~ one, data=data.ch06p3a) 
summary(model.nb.int)
logl.nb.int <- logLik(model.nb.int)

# Predicted probabilities for NB2 differ a little from Table 6.15
# install.packages("pscl")
library(pscl)
pnb <- predprob(model.nb.int)
colMeans(pnb)

### REMAINING MODELS THE R COMMANDS DO NOT DO ONTERCEPT ONLY
### SO ADD AGE AS REGRESSOR  (BOOK DOES NOT INCLUDE AGE)

# Poisson with default standard errors (variance equals the mean)
model.poiss <- glm(children ~ age, family=poisson(), data=data.ch06p3a) 
summary(model.poiss)
logl.poiss <- logLik(model.poiss) 

# Negative binomial 2 MLE using MASS
model.nb <- glm.nb(children ~ age, data=data.ch06p3a) 
summary(model.nb)
logl.nb <- logLik(model.nb)

# Finite mixtures of Poisson 2 components using flexmix
# install.packages("flexmix")
library(flexmix)
# Two-component model
model.FMP2 <- flexmix(children ~ age, data=data.ch06p3a, k=2, model=FLXMRglm(family="poisson"))
summary(model.FMP2)
model.FMP2.refit <- refit(model.FMP2) 
summary(model.FMP2.refit)
logl.FMP2 <- logLik(model.FMP2)

# Hurdle Poisson
# install.packages("pscl")
library(pscl)
# Inflation with same regressors as model and logit hurdle
model.hurdle.poiss <- hurdle(children ~ age, data=data.ch06p3a, dist="poisson")
summary(model.hurdle.poiss)
logl.hurdle.poiss <- logLik(model.hurdle.poiss)

# Zero-inflated Poisson (not reported)
model.zip.poiss <- zeroinfl(children ~ age, data=data.ch06p3a, dist="poisson")
summary(model.zip.poiss)
logl.zip.poiss <- logLik(model.zip.poiss)

# Ordered probit requires dependent variable to be declared as a factor variable
childrenFACTOR <- factor(children)
model.orderedprobit <- polr(factor(children) ~ age, method="probit")
summary(model.orderedprobit)
logl.orderedprobit <- logLik(model.orderedprobit)

# TABLE 6.17  LOG-LIKELIHOODS

# Intercept only models
cbind(logl.poiss.int,logl.nb.int)

# WIth AGE as extra regressor  (note: book Table 6.17 did not include AGE as regressor)
cbind(logl.poiss,logl.nb,logl.FMP2,logl.hurdle.poiss,logl.zip.poiss,logl.orderedprobit)

### BRITISH DATA 

# Read in and select data
# install.packages("foreign")
library(foreign)
data.ch06p3b <- read.dta(file = "racd06data4fertilitybritish.dta")
one <- matrix(1,nrow=nrow(data.ch06p3b),ncol=1)
data.ch06p3b <- cbind(data.ch06p3b,one)

# Allows variables in database to be accessed simply by giving names
attach(data.ch06p3b)
# Lists first six observations
head(data.ch06p3b)

### TABLE 6.16 FREQUENCIES 

# Tabulate counts of doctor visits
table(children)
table(children) / nrow(data.ch06p3b)

### POISSON AND NB2

# Poisson with default standard errors (variance equals the mean)
model.poiss.int <- glm(children ~ one, family=poisson(), data=data.ch06p3a) 
summary(model.poiss.int)
logl.poiss.intonly <- logLik(model.poiss.int) 

# Negative binomial 2 MLE using MASS
# install.packages("MASS")
library(MASS)
model.nb.int <- glm.nb(children ~ one, data=data.ch06p3a) 
summary(model.nb.int)
logl.nb.int <- logLik(model.nb.int)

# Predicted probabilities for NB2 differ a little from Table 6.16
# install.packages("pscl")
library(pscl)
pnb <- predprob(model.nb.int)
colMeans(pnb)

# close log file
sink()

