* racd06p3.do  January 2013 for Stata version 12

capture log close
log using racd06p3.txt, text replace

********** OVERVIEW OF racd06p3.do **********

* STATA Program 
* copyright C 2013 by A. Colin Cameron and Pravin K. Trivedi 
* used for "Regression Analyis of Count Data" SECOND EDITION
* by A. Colin Cameron and Pravin K. Trivedi (2013)
* Cambridge University Press

* Chapter 6.5 only
*   6.5 COMPLETED FERTILITY

* To run you need files
*   racd06data3fertilityswiss.dta
*   racd06data4fertilitybritish.dta
* and user-written Stata addons
*   fmm and hnblogit
* in your directory

********** SETUP **********

set more off
version 12
clear all
set mem 10m
* set linesize 82
set scheme s1mono  /* Graphics scheme */

********** DATA DESCRIPTION

* Two datasets ...
* (1) Swiss Household Panel W1 1999 N = 1878
* (2) British Household Panel updated to Wave 18  N = 6782
* See ?? for more detailed discussion 
* Also see racd06makedata3fertility.do for further details 

********** 6.5 COMPLETED FERTILITY

********** SWISS DATA SUMMARY

use racd06data3fertilityswiss.dta
describe
summarize

*** TABLE 6.15 - FREQUENCIES AND PREDICTED PROBABILITIES (SWISS)

summarize children, detail 
tabulate children
nbreg children
forvalues i = 0/12 {
   predict nbfit`i', pr(`i')
   }
sum nbfit*

*** BRITISH DATA SUMMARY

use racd06data4fertilitybritish.dta, clear

describe
summarize

**** TABLE 6.16 - FREQUENCIES AND PREDICTED PROBABILITIES (BRITISH)

summarize children, detail 
tabulate children
nbreg children
forvalues i = 0/12 {
   predict nbfit`i', pr(`i')
   }
sum nbfit*

*** Histogram of data for the two data sets

use racd06data3fertilityswiss.dta, clear
summarize children
di "Mean: " r(mean) "  Variance: " r(Var)
label variable children "Number of children (Swiss)"
histogram children, discrete frequency barwidth(0.8) saving(racd06graph1, replace) xlabel(#6)

use racd06data4fertilitybritish.dta, clear
summarize children
di "Mean: " r(mean) "  Variance: " r(Var)
label variable children "Number of children (British)"
histogram children, discrete frequency barwidth(0.8) saving(racd06graph2, replace) xlabel(#6)

graph combine racd06graph1.gph racd06graph2.gph, iscale(0.7) ysize(3) xsize(6) xcommon

********** VARIOUS INTERCEPT-ONLY COUNT MODELS FOR SWISS FERTILITY DATA

use racd06data3fertilityswiss.dta, clear
 
* Poisson model
poisson children, vce(robust)
estimates store POISSON

* Negative binomial model
nbreg children, vce(robust)
estimates store NB

* Finite mixtures Poisson - 2 components 
* Default start values
fmm children, components(2) mixtureof(poisson) vce(robust)
if (e(converged) == 0) display " *** FMM DID NOT CONVERGE *** "
estimates store FMP2a
matrix bfmp2a = e(b)
scalar mu1 = exp(bfmp2a[1,1])
scalar mu2 = exp(bfmp2a[1,2])
scalar pi = exp(bfmp2a[1,3])/(1+exp(bfmp2a[1,3]))
display "Mixture probability = " pi " and Poisson means = " mu1 " and " mu2

* Finite mixtures Poisson - 2 components 
* Different start values leads to a higher log-likleihood
* This is at the boundary
fmm children, components(2) mixtureof(poisson) vce(robust) from(1 1 3)
if (e(converged) == 0) display " *** FMM DID NOT CONVERGE *** "
estimates store FMP2b
matrix bfmp2a = e(b)
scalar mu1 = exp(bfmp2a[1,1])
scalar mu2 = exp(bfmp2a[1,2])
scalar pi = exp(bfmp2a[1,3])/(1+exp(bfmp2a[1,3]))
display "Mixture probability = " pi " and Poisson means = " mu1 " and " mu2

* Following does not converge. Included for completeness.
* Finite mixtures NB - 2 components 
fmm children, components(2) mixtureof(negbin2) vce(robust) iter(20)
if (e(converged) == 0) display " *** FMM DID NOT CONVERGE *** "
estimates store FMNB2

* Following may not converge. Included for completeness.
* Finite mixtures Poisson - 3 components 
quietly fmm children, components(2) mixtureof(poisson) vce(robust)
fmm children, components(3) mixtureof(poisson) vce(robust) iter(20)
if (e(converged) == 0) display " *** FMM DID NOT CONVERGE *** "
estimates store FMNB2

* Hurdle Poisson model
hplogit children, vce(robust)
estimates store HP

* Hurdle negative binomial model
hnblogit children, vce(robust)
estimates store HNB

* The following predicted frequencies are reported in the text
* Zero Inflated Poisson Model
zip children, inflate(_cons) vce(robust)
estimates store ZIP
forvalues i = 0/12 {
   predict zipfit`i', pr(`i')
   }
sum zipfit*

* Zero Inflated NB Model
zip children, inflate(_cons) vce(robust)
estimates store ZINB

* Ordered probit
generate childrange = children
replace childrange = 6 if children >= 6
tabulate childrange
oprobit childrange, vce(robust)
estimates store OPROBIT
predict pop0 pop1 pop2 pop3 pop4 pop5 pop6
summarize pop*

*** TABLE 6.17 FIT (and estimates) FOR VARIOUS MODELS (FMP2b preferred to FMP2a)

estimates table POISSON NB FMP2a FMP2b HP ZIP, b(%10.3f) t(%10.2f) stats(ll aic bic N k)
estimates table OPROBIT, b(%10.3f) t(%10.2f) stats(ll aic bic N k)

********** MODEL WITH REGRESSORS - results do not change much

* Do ZIP with regressors: 
* educational dummies, religion dummies, language dunnies, age, age-squared
tabulate education, generate(deduc)
tabulate intlang, generate(dlang)
tabulate religion, generate(drelig)
generate agesq = age^2
drop deduc1 dlang1 drelig1   // drop one dummy in each category
global XLIST age agesq deduc* dlang* drelig*
poisson children $XLIST, vce(robust)
zip children $XLIST, inflate($XLIST) vce(robust)
forvalues i = 0/12 {
   predict zipregfit`i', pr(`i')
   }
sum zipregfit*

********** CLOSE OUTPUT

* log close
* clear
* exit


