------------------------------------------------------------------------------------------------
      name:  <unnamed>
       log:  c:\Users\ccameron\Dropbox\Desktop\Imbook\courses\2019_canada\canada01_crosssection_
> basics\canada2019_crosssection.txt
  log type:  text
 opened on:   8 May 2019, 19:28:18

. 
. ********** OVERVIEW OF canada2019_crosssection.do **********
. 
. * STATA Program to demonstrate Count Data Regression Models
. * Based on mus17p1cnt.do in Cameron and Trivedi(2009)
. * "Microeconometrics using Stata", Stata Press
. 
. * 1A. COUNT REGRESSION: BASIC CROSS-SECTION
. * 1B. COUNT REGRESSION: INFERENCE (STANDARD ERRORS AND BOOTSTRAP) 
. * 2: COUNT REGRESSION: EXTRAS (TRUNCATED/CENSORED, HURDLE, ZERO-INFLATED, 
. *                              FINITE MIXTURE, ENDOGENOUS)
. 
. * To run you need files
. *      mus17data.dta
. *      mus18data.dta    
. *      musbootdata.dta
. * in your directory
. 
. * Stata user-written commands
. *   chi2gof        // For chis-squared goodness of fit test
. *   boottest       // for wild bootstraps 
. *   spost9_ado     // For countfit
. *   hnblogit       // hurdle logit - negative binomial 
. *   qcount         // quantile count regression  
. * are used
. 
. ********** SETUP **********
. 
. set more off

. version 15

. clear all

. set mem 10m
set memory ignored.
    Memory no longer needs to be set in modern Statas; memory adjustments are performed on the
    fly automatically.

. set scheme s1mono  /* Graphics scheme */

. 
. *********** 1A: COUNT REGRESSION: BASICS (POISSON, GLM, NLS, DIAGNOSTICS, NB)
. 
. *** 1A. BASICS: ANALYSIS WITHOUT REGRESSORS
. 
. * Summarize doctor visits data (MEPS 2003 data 65-90 years old: see MUS p.557) 
. use mus17data.dta

. summarize docvis

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
      docvis |      3,677    6.822682    7.394937          0        144

. summarize docvis, detail

                       # doctor visits
-------------------------------------------------------------
      Percentiles      Smallest
 1%            0              0
 5%            0              0
10%            0              0       Obs               3,677
25%            2              0       Sum of Wgt.       3,677

50%            5                      Mean           6.822682
                        Largest       Std. Dev.      7.394937
75%            9             59
90%           15             73       Variance       54.68509
95%           20            106       Skewness       4.174335
99%           33            144       Kurtosis       49.67923

. histogram docvis if docvis < 40, discrete ///
>   xtitle("# Doctor visits (for < 40 visits)")
(start=0, width=1)

. graph export countfig01histraw.wmf, replace
(file c:\Users\ccameron\Dropbox\Desktop\Imbook\courses\2019_canada\canada01_crosssection_basics\
> countfig01histraw.wmf written in Windows Metafile format)

. 
. * Tabulate docvis after recoding values > 10 to ranges 11-40 and 41-60
. generate dvrange = docvis

. recode dvrange (11/40 = 40) (41/60 = 60)
(dvrange: 784 changes made)

. label define dvcounts 40 "11-40" 60 "41-60"

. label values dvrange dvcounts

. tabulate dvrange

    dvrange |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |        401       10.91       10.91
          1 |        314        8.54       19.45
          2 |        358        9.74       29.18
          3 |        334        9.08       38.26
          4 |        339        9.22       47.48
          5 |        266        7.23       54.72
          6 |        231        6.28       61.00
          7 |        202        5.49       66.49
          8 |        179        4.87       71.36
          9 |        154        4.19       75.55
         10 |        108        2.94       78.49
      11-40 |        774       21.05       99.54
      41-60 |         14        0.38       99.92
         73 |          1        0.03       99.95
        106 |          1        0.03       99.97
        144 |          1        0.03      100.00
------------+-----------------------------------
      Total |      3,677      100.00

. 
. *** 1A. BASICS: POISSON REGRESSION
. 
. drop _all 

. 
. * Summary statistics for doctor visits data
. use mus17data.dta              

. global xlist private medicaid age age2 educyr actlim totchr 

. describe docvis $xlist

              storage   display    value
variable name   type    format     label      variable label
------------------------------------------------------------------------------------------------
docvis          float   %9.0g                 # doctor visits
private         byte    %8.0g                 =1 if has private supplementary insurance
medicaid        byte    %8.0g                 =1 if has Medicaid public insurance
age             byte    %8.0g                 Age
age2            float   %9.0g                 Age-squared
educyr          byte    %8.0g                 Years of education
actlim          byte    %8.0g                 =1 if activity limitation
totchr          byte    %8.0g                 # chronic conditions

. summarize docvis $xlist, sep(10)

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
      docvis |      3,677    6.822682    7.394937          0        144
     private |      3,677    .4966005    .5000564          0          1
    medicaid |      3,677     .166712    .3727692          0          1
         age |      3,677    74.24476    6.376638         65         90
        age2 |      3,677    5552.936    958.9996       4225       8100
      educyr |      3,677    11.18031    3.827676          0         17
      actlim |      3,677     .333152    .4714045          0          1
      totchr |      3,677    1.843351    1.350026          0          8

. 
. * Poisson with preffered robust standard errors
. poisson docvis $xlist, vce(robust) nolog  // Poisson robust SEs

Poisson regression                              Number of obs     =      3,677
                                                Wald chi2(7)      =     720.43
                                                Prob > chi2       =     0.0000
Log pseudolikelihood =  -15019.64               Pseudo R2         =     0.1297

------------------------------------------------------------------------------
             |               Robust
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .1422324    .036356     3.91   0.000      .070976    .2134889
    medicaid |   .0970005   .0568264     1.71   0.088    -.0143773    .2083783
         age |   .2936722   .0629776     4.66   0.000     .1702383    .4171061
        age2 |  -.0019311   .0004166    -4.64   0.000    -.0027475   -.0011147
      educyr |   .0295562   .0048454     6.10   0.000     .0200594     .039053
      actlim |   .1864213   .0396569     4.70   0.000     .1086953    .2641474
      totchr |   .2483898   .0125786    19.75   0.000     .2237361    .2730435
       _cons |  -10.18221   2.369212    -4.30   0.000    -14.82578   -5.538638
------------------------------------------------------------------------------

. 
. * Poisson with default ML standard errors
. poisson docvis $xlist                     // Poisson default ML standard errors 

Iteration 0:   log likelihood = -15019.656  
Iteration 1:   log likelihood =  -15019.64  
Iteration 2:   log likelihood =  -15019.64  

Poisson regression                              Number of obs     =      3,677
                                                LR chi2(7)        =    4477.98
                                                Prob > chi2       =     0.0000
Log likelihood =  -15019.64                     Pseudo R2         =     0.1297

------------------------------------------------------------------------------
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .1422324   .0143311     9.92   0.000      .114144    .1703208
    medicaid |   .0970005   .0189307     5.12   0.000     .0598969     .134104
         age |   .2936722   .0259563    11.31   0.000     .2427988    .3445457
        age2 |  -.0019311   .0001724   -11.20   0.000    -.0022691   -.0015931
      educyr |   .0295562    .001882    15.70   0.000     .0258676    .0332449
      actlim |   .1864213    .014566    12.80   0.000     .1578726    .2149701
      totchr |   .2483898   .0046447    53.48   0.000     .2392864    .2574933
       _cons |  -10.18221   .9720115   -10.48   0.000    -12.08732   -8.277101
------------------------------------------------------------------------------

. 
. * Poisson coefficients as incidence ratios
. poisson docvis $xlist, irr vce(robust) nolog  // Incidence ratios

Poisson regression                              Number of obs     =      3,677
                                                Wald chi2(7)      =     720.43
                                                Prob > chi2       =     0.0000
Log pseudolikelihood =  -15019.64               Pseudo R2         =     0.1297

------------------------------------------------------------------------------
             |               Robust
      docvis |        IRR   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   1.152845   .0419128     3.91   0.000     1.073555     1.23799
    medicaid |   1.101861   .0626148     1.71   0.088     .9857255    1.231679
         age |   1.341344   .0844747     4.66   0.000     1.185587    1.517564
        age2 |   .9980708   .0004158    -4.64   0.000     .9972562     .998886
      educyr |   1.029997   .0049907     6.10   0.000     1.020262    1.039826
      actlim |    1.20493   .0477838     4.70   0.000     1.114823     1.30232
      totchr |    1.28196   .0161253    19.75   0.000     1.250741    1.313957
       _cons |   .0000378   .0000896    -4.30   0.000     3.64e-07    .0039319
------------------------------------------------------------------------------
Note: _cons estimates baseline incidence rate.

. 
. * AME and MEM for Poisson 
. quietly poisson docvis $xlist, vce(robust) 

. margins, dydx(*)        // AME: Average marginal effect for Poisson

Average marginal effects                        Number of obs     =      3,677
Model VCE    : Robust

Expression   : Predicted number of events, predict()
dy/dx w.r.t. : private medicaid age age2 educyr actlim totchr

------------------------------------------------------------------------------
             |            Delta-method
             |      dy/dx   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .9704067    .247564     3.92   0.000     .4851902    1.455623
    medicaid |   .6618033   .3901692     1.70   0.090    -.1029143    1.426521
         age |   2.003632   .4303206     4.66   0.000     1.160219    2.847045
        age2 |  -.0131753   .0028473    -4.63   0.000    -.0187559   -.0075947
      educyr |   .2016526   .0337805     5.97   0.000     .1354441    .2678612
      actlim |   1.271893   .2749286     4.63   0.000     .7330432    1.810744
      totchr |   1.694685   .0908883    18.65   0.000     1.516547    1.872823
------------------------------------------------------------------------------

. margins, dydx(*) atmean // MEM: ME for Poisson evaluated at average of x

Conditional marginal effects                    Number of obs     =      3,677
Model VCE    : Robust

Expression   : Predicted number of events, predict()
dy/dx w.r.t. : private medicaid age age2 educyr actlim totchr
at           : private         =    .4966005 (mean)
               medicaid        =     .166712 (mean)
               age             =    74.24476 (mean)
               age2            =    5552.936 (mean)
               educyr          =    11.18031 (mean)
               actlim          =     .333152 (mean)
               totchr          =    1.843351 (mean)

------------------------------------------------------------------------------
             |            Delta-method
             |      dy/dx   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .8914309   .2270816     3.93   0.000     .4463591    1.336503
    medicaid |    .607943   .3577377     1.70   0.089      -.09321    1.309096
         age |   1.840568   .3924681     4.69   0.000     1.071345    2.609791
        age2 |   -.012103   .0025973    -4.66   0.000    -.0171936   -.0070125
      educyr |   .1852413   .0306709     6.04   0.000     .1251275    .2453551
      actlim |   1.168381   .2516143     4.64   0.000     .6752264    1.661536
      totchr |   1.556764   .0760166    20.48   0.000     1.407774    1.705754
------------------------------------------------------------------------------

. 
. * Older code uses add-ons mfx and margeff now superceded by margins     
. * margeff  // AME: Average marginal effect for Poisson
. * mfx      // MEM: Marginal effect for Poisson evaluated at average of x
. 
. * Also factor variables and noncaculus methods
. * The i. are discrete and will calculate ME of one unit change (not derivative)
. * The c.age##age means age and age-sauared appear and ME is w.r.t. age
. poisson docvis i.private i.medicaid c.age##c.age educyr i.actlim totchr, vce(robust)

Iteration 0:   log pseudolikelihood = -15019.656  
Iteration 1:   log pseudolikelihood =  -15019.64  
Iteration 2:   log pseudolikelihood =  -15019.64  

Poisson regression                              Number of obs     =      3,677
                                                Wald chi2(7)      =     720.43
                                                Prob > chi2       =     0.0000
Log pseudolikelihood =  -15019.64               Pseudo R2         =     0.1297

------------------------------------------------------------------------------
             |               Robust
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
   1.private |   .1422324    .036356     3.91   0.000      .070976    .2134889
  1.medicaid |   .0970005   .0568264     1.71   0.088    -.0143773    .2083783
         age |   .2936722   .0629776     4.66   0.000     .1702383    .4171061
             |
 c.age#c.age |  -.0019311   .0004166    -4.64   0.000    -.0027475   -.0011147
             |
      educyr |   .0295562   .0048454     6.10   0.000     .0200594     .039053
    1.actlim |   .1864213   .0396569     4.70   0.000     .1086953    .2641474
      totchr |   .2483898   .0125786    19.75   0.000     .2237361    .2730435
       _cons |  -10.18221   2.369212    -4.30   0.000    -14.82578   -5.538638
------------------------------------------------------------------------------

. margins, dydx(*) // MEM: ME for Poisson evaluated at average of x

Average marginal effects                        Number of obs     =      3,677
Model VCE    : Robust

Expression   : Predicted number of events, predict()
dy/dx w.r.t. : 1.private 1.medicaid age educyr 1.actlim totchr

------------------------------------------------------------------------------
             |            Delta-method
             |      dy/dx   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
   1.private |   .9701906   .2473149     3.92   0.000     .4854622    1.454919
  1.medicaid |   .6830664   .4153252     1.64   0.100     -.130956    1.497089
         age |   .0385842   .0172075     2.24   0.025     .0048581    .0723103
      educyr |   .2016526   .0337805     5.97   0.000     .1354441    .2678612
    1.actlim |   1.295942   .2850588     4.55   0.000     .7372367    1.854647
      totchr |   1.694685   .0908883    18.65   0.000     1.516547    1.872823
------------------------------------------------------------------------------
Note: dy/dx for factor levels is the discrete change from the base level.

. 
. *** 1A. BASICS: GLM REGRESSION
. 
. * GLM for Poisson with robust standard errors
. glm docvis $xlist, family(poisson) link(log) vce(robust) nolog

Generalized linear models                         No. of obs      =      3,677
Optimization     : ML                             Residual df     =      3,669
                                                  Scale parameter =          1
Deviance         =  18395.14033                   (1/df) Deviance =   5.013666
Pearson          =  23147.37781                   (1/df) Pearson  =   6.308906

Variance function: V(u) = u                       [Poisson]
Link function    : g(u) = ln(u)                   [Log]

                                                  AIC             =   8.173859
Log pseudolikelihood =  -15019.6398               BIC             =  -11726.81

------------------------------------------------------------------------------
             |               Robust
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .1422324    .036356     3.91   0.000      .070976    .2134889
    medicaid |   .0970005   .0568264     1.71   0.088    -.0143773    .2083783
         age |   .2936722   .0629776     4.66   0.000     .1702383    .4171061
        age2 |  -.0019311   .0004166    -4.64   0.000    -.0027475   -.0011147
      educyr |   .0295562   .0048454     6.10   0.000     .0200594     .039053
      actlim |   .1864213   .0396569     4.70   0.000     .1086953    .2641474
      totchr |   .2483898   .0125786    19.75   0.000     .2237361    .2730435
       _cons |  -10.18221   2.369212    -4.30   0.000    -14.82578   -5.538638
------------------------------------------------------------------------------

. 
. * GLM for Poisson with GLM corrected standard errors
. * These are default ML standard errors multiplied by square root Pearson statistic 
. glm docvis $xlist, family(poisson) link(log) scale(x2) nolog 

Generalized linear models                         No. of obs      =      3,677
Optimization     : ML                             Residual df     =      3,669
                                                  Scale parameter =          1
Deviance         =  18395.14033                   (1/df) Deviance =   5.013666
Pearson          =  23147.37781                   (1/df) Pearson  =   6.308906

Variance function: V(u) = u                       [Poisson]
Link function    : g(u) = ln(u)                   [Log]

                                                  AIC             =   8.173859
Log likelihood   =  -15019.6398                   BIC             =  -11726.81

------------------------------------------------------------------------------
             |                 OIM
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .1422324   .0359961     3.95   0.000     .0716813    .2127836
    medicaid |   .0970005   .0475493     2.04   0.041     .0038055    .1901954
         age |   .2936722   .0651959     4.50   0.000     .1658906    .4214538
        age2 |  -.0019311   .0004331    -4.46   0.000      -.00278   -.0010822
      educyr |   .0295562   .0047271     6.25   0.000     .0202912    .0388212
      actlim |   .1864213   .0365861     5.10   0.000     .1147139    .2581288
      totchr |   .2483898   .0116663    21.29   0.000     .2255243    .2712554
       _cons |  -10.18221   2.441453    -4.17   0.000    -14.96737   -5.397048
------------------------------------------------------------------------------
(Standard errors scaled using square root of Pearson X2-based dispersion.)

. 
. *** 1A. BASICS: NONLINEAR LEAST SQUARES  
. 
. * Nonlinear least squares with exponential conditional mean
. nl (docvis = exp({xb: $xlist}+{b0})), vce(robust) nolog
(obs = 3,677)


Nonlinear regression                                Number of obs =      3,677
                                                    R-squared     =     0.5436
                                                    Adj R-squared =     0.5426
                                                    Root MSE      =   6.804007
                                                    Res. dev.     =   24528.25

------------------------------------------------------------------------------
             |               Robust
      docvis |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
 /xb_private |   .1235144   .0395179     3.13   0.002     .0460351    .2009937
/xb_medicaid |   .0856747   .0649936     1.32   0.188    -.0417525    .2131018
     /xb_age |   .2951153   .0720509     4.10   0.000     .1538516     .436379
    /xb_age2 |  -.0019481   .0004771    -4.08   0.000    -.0028836   -.0010127
  /xb_educyr |   .0309924   .0051192     6.05   0.000     .0209557    .0410291
  /xb_actlim |   .1916735   .0413705     4.63   0.000      .110562    .2727851
  /xb_totchr |   .2191967   .0151021    14.51   0.000     .1895874     .248806
         /b0 |  -10.12438   2.713159    -3.73   0.000    -15.44383   -4.804931
------------------------------------------------------------------------------

. 
. *** 1A. BASICS: NEGATIVE BINOMIAL REGRESSION MOTIVATION
. 
. *** IMPORTANT: The following uses the old Stata 13 random number generator (kiss32)
. 
. set rng kiss32

. 
. * Generate Poisson data with sample mean of docvis
. quietly summarize docvis

. scalar dvmean = r(mean)

. set seed 10101                      // set the seed !

. generate ypoisson= rpoisson(dvmean) // Draw from Poisson(mu=dvmean) 

. summarize docvis ypoisson

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
      docvis |      3,677    6.822682    7.394937          0        144
    ypoisson |      3,677    6.836008    2.631633          0         18

. tabulate ypoisson

   ypoisson |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |          4        0.11        0.11
          1 |         33        0.90        1.01
          2 |         89        2.42        3.43
          3 |        214        5.82        9.25
          4 |        375       10.20       19.45
          5 |        475       12.92       32.36
          6 |        546       14.85       47.21
          7 |        533       14.50       61.71
          8 |        471       12.81       74.52
          9 |        374       10.17       84.69
         10 |        252        6.85       91.54
         11 |        137        3.73       95.27
         12 |         92        2.50       97.77
         13 |         45        1.22       98.99
         14 |         19        0.52       99.51
         15 |         10        0.27       99.78
         16 |          3        0.08       99.86
         17 |          3        0.08       99.95
         18 |          2        0.05      100.00
------------+-----------------------------------
      Total |      3,677      100.00

. histogram docvis if docvis < 40, discrete addplot(hist ypoisson, fcolor(white) discrete) ///
>   xtitle("# Doctor visits versus Poisson") legend(off)
(start=0, width=1)

. graph export countfig02histpoisson.wmf, replace
(file c:\Users\ccameron\Dropbox\Desktop\Imbook\courses\2019_canada\canada01_crosssection_basics\
> countfig02histpoisson.wmf written in Windows Metafile format)

. 
. * Generate negative binomial (mu=1 var=2) generated data
. quietly nbreg docvis

. scalar alpha = e(alpha)

. display alpha
.84081223

. set seed 10101                      // set the seed !

. generate mugamma = rgamma(1/alpha,alpha)

. generate ynegbin = rpoisson(dvmean*mugamma) // NB generated as a Poisson-gamma mixture 

. summarize docvis ynegbin mugamma 

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
      docvis |      3,677    6.822682    7.394937          0        144
     ynegbin |      3,677    6.996736    6.999999          0         66
     mugamma |      3,677    1.027634    .9443734   .0001458   8.468604

. tabulate ynegbin

    ynegbin |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |        375       10.20       10.20
          1 |        411       11.18       21.38
          2 |        349        9.49       30.87
          3 |        322        8.76       39.62
          4 |        248        6.74       46.37
          5 |        232        6.31       52.68
          6 |        241        6.55       59.23
          7 |        185        5.03       64.26
          8 |        179        4.87       69.13
          9 |        171        4.65       73.78
         10 |         98        2.67       76.45
         11 |        104        2.83       79.28
         12 |        120        3.26       82.54
         13 |         90        2.45       84.99
         14 |         93        2.53       87.52
         15 |         71        1.93       89.45
         16 |         52        1.41       90.86
         17 |         48        1.31       92.17
         18 |         37        1.01       93.17
         19 |         35        0.95       94.13
         20 |         36        0.98       95.10
         21 |         21        0.57       95.68
         22 |         22        0.60       96.27
         23 |         17        0.46       96.74
         24 |         16        0.44       97.17
         25 |         13        0.35       97.53
         26 |         11        0.30       97.82
         27 |          9        0.24       98.07
         28 |         10        0.27       98.34
         29 |         11        0.30       98.64
         30 |          5        0.14       98.78
         31 |          4        0.11       98.88
         32 |          6        0.16       99.05
         33 |          8        0.22       99.27
         34 |          4        0.11       99.37
         35 |          2        0.05       99.43
         36 |          1        0.03       99.46
         37 |          4        0.11       99.56
         38 |          5        0.14       99.70
         41 |          3        0.08       99.78
         42 |          1        0.03       99.81
         43 |          1        0.03       99.84
         45 |          1        0.03       99.86
         48 |          2        0.05       99.92
         51 |          1        0.03       99.95
         59 |          1        0.03       99.97
         66 |          1        0.03      100.00
------------+-----------------------------------
      Total |      3,677      100.00

. histogram docvis if docvis < 40, discrete addplot(hist ynegbin if ynegbin < 40, fcolor(white) 
> discrete) ///
>   xtitle("# Doctor visits versus negative binomial") legend(off)
(start=0, width=1)

. graph export countfig03histnegbin.wmf, replace
(file c:\Users\ccameron\Dropbox\Desktop\Imbook\courses\2019_canada\canada01_crosssection_basics\
> countfig03histnegbin.wmf written in Windows Metafile format)

. 
. * Standard negative binomial (NB2) with default SEs
. nbreg docvis $xlist, nolog 

Negative binomial regression                    Number of obs     =      3,677
                                                LR chi2(7)        =     773.44
Dispersion     = mean                           Prob > chi2       =     0.0000
Log likelihood = -10589.339                     Pseudo R2         =     0.0352

------------------------------------------------------------------------------
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .1640928   .0332186     4.94   0.000     .0989856    .2292001
    medicaid |    .100337   .0454209     2.21   0.027     .0113137    .1893603
         age |   .2941294   .0601588     4.89   0.000     .1762203    .4120384
        age2 |  -.0019282   .0004004    -4.82   0.000    -.0027129   -.0011434
      educyr |   .0286947   .0042241     6.79   0.000     .0204157    .0369737
      actlim |   .1895376   .0347601     5.45   0.000      .121409    .2576662
      totchr |   .2776441   .0121463    22.86   0.000     .2538378    .3014505
       _cons |  -10.29749   2.247436    -4.58   0.000    -14.70238   -5.892595
-------------+----------------------------------------------------------------
    /lnalpha |  -.4452773   .0306758                     -.5054007   -.3851539
-------------+----------------------------------------------------------------
       alpha |   .6406466   .0196523                      .6032638    .6803459
------------------------------------------------------------------------------
LR test of alpha=0: chibar2(01) = 8860.60              Prob >= chibar2 = 0.000

. 
. * Newer command - use rather than countfit below  
. chi2gof, cells(11) table

Chi-square Goodness-of-Fit Test for NegBin Model:

         Chi-square chi2(10) =   53.62
         Prob>chi2           =    0.00

------------------------------------------------------------
                                       Fitted  
      Cells  Abs. Freq.  Rel. Freq.  Rel. Freq.   Abs. Dif.
------------------------------------------------------------
        0      401         .1091       .0913       .0178
        1      314         .0854       .1079       .0225
        2      358         .0974       .1054       .0081
        3      334         .0908       .0962       .0053
        4      339         .0922       .0849       .0073
        5      266         .0723       .0735       .0012
        6      231         .0628       .0631       2.6e-04
        7      202         .0549       .0538       .0011
        8      179         .0487       .0457       .0029
        9      154         .0419       .0388        .003
 10 or more    791         .2445       .2393       .0052
------------------------------------------------------------

. 
. * NB2: Sample vs average predicted probabilities of y = 0, 1, ..., 10 
. countfit docvis $xlist, maxcount(10) nbreg nograph noestimates nofit
Comparison of Mean Observed and Predicted Count

            Maximum       At      Mean
Model     Difference    Value    |Diff|
---------------------------------------------
NBRM       -0.023         1      0.007

NBRM: Predicted and actual probabilities

Count   Actual    Predicted    |Diff|   Pearson
------------------------------------------------
0        0.109       0.091      0.018    12.708
1        0.085       0.108      0.023    17.288
2        0.097       0.105      0.008     2.270
3        0.091       0.096      0.005     1.086
4        0.092       0.085      0.007     2.333
5        0.072       0.074      0.001     0.072
6        0.063       0.063      0.000     0.004
7        0.055       0.054      0.001     0.088
8        0.049       0.046      0.003     0.694
9        0.042       0.039      0.003     0.873
10       0.029       0.033      0.004     1.456
------------------------------------------------
Sum      0.785       0.794      0.073    38.872

. 
. * Generalized negative binomial with alpha parameterized
. gnbreg docvis $xlist, lnalpha($xlist) nolog

Generalized negative binomial regression        Number of obs     =      3,677
                                                LR chi2(7)        =     733.91
                                                Prob > chi2       =     0.0000
Log likelihood = -10518.541                     Pseudo R2         =     0.0337

------------------------------------------------------------------------------
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
docvis       |
     private |   .1421503   .0330792     4.30   0.000     .0773163    .2069843
    medicaid |   .0896531   .0459235     1.95   0.051    -.0003554    .1796616
         age |   .2996728   .0597171     5.02   0.000     .1826295    .4167161
        age2 |  -.0019674   .0003968    -4.96   0.000    -.0027452   -.0011896
      educyr |   .0286626   .0042215     6.79   0.000     .0203887    .0369366
      actlim |   .1764802   .0337961     5.22   0.000     .1102412    .2427193
      totchr |   .2523805   .0113765    22.18   0.000     .2300829    .2746781
       _cons |  -10.42024   2.234642    -4.66   0.000    -14.80006   -6.040424
-------------+----------------------------------------------------------------
lnalpha      |
     private |  -.2143547   .0666325    -3.22   0.001    -.3449519   -.0837574
    medicaid |   .0561093   .0897534     0.63   0.532    -.1198041    .2320227
         age |  -.1606495   .1221281    -1.32   0.188    -.4000162    .0787172
        age2 |   .0010047   .0008126     1.24   0.216     -.000588    .0025974
      educyr |  -.0085659   .0086239    -0.99   0.321    -.0254685    .0083366
      actlim |    .096942   .0698175     1.39   0.165    -.0398978    .2337818
      totchr |  -.2584128   .0235344   -10.98   0.000    -.3045393   -.2122862
       _cons |   6.587102   4.563726     1.44   0.149    -2.357637    15.53184
------------------------------------------------------------------------------

. 
. * Comparison of Poisson and NB and various standard error estimates
. quietly poisson docvis $xlist

. estimates store PDEFAULT

. quietly poisson docvis $xlist, vce(robust)

. estimates store PROBUST

. quietly glm docvis $xlist, family(poisson) link(log) scale(x2) 

. estimates store PPEARSON

. quietly nbreg docvis $xlist

. estimates store NBDEFAULT

. quietly nbreg docvis $xlist, vce(robust)

. estimates store NBROBUST

. esttab PDEFAULT PROBUST PPEARSON NBDEFAULT NBROBUST, b(%10.4f) se mtitles pr2

--------------------------------------------------------------------------------------------
                      (1)             (2)             (3)             (4)             (5)   
                 PDEFAULT         PROBUST        PPEARSON       NBDEFAULT        NBROBUST   
--------------------------------------------------------------------------------------------
docvis                                                                                      
private            0.1422***       0.1422***       0.1422***       0.1641***       0.1641***
                 (0.0143)        (0.0364)        (0.0360)        (0.0332)        (0.0369)   

medicaid           0.0970***       0.0970          0.0970*         0.1003*         0.1003   
                 (0.0189)        (0.0568)        (0.0475)        (0.0454)        (0.0567)   

age                0.2937***       0.2937***       0.2937***       0.2941***       0.2941***
                 (0.0260)        (0.0630)        (0.0652)        (0.0602)        (0.0646)   

age2              -0.0019***      -0.0019***      -0.0019***      -0.0019***      -0.0019***
                 (0.0002)        (0.0004)        (0.0004)        (0.0004)        (0.0004)   

educyr             0.0296***       0.0296***       0.0296***       0.0287***       0.0287***
                 (0.0019)        (0.0048)        (0.0047)        (0.0042)        (0.0049)   

actlim             0.1864***       0.1864***       0.1864***       0.1895***       0.1895***
                 (0.0146)        (0.0397)        (0.0366)        (0.0348)        (0.0394)   

totchr             0.2484***       0.2484***       0.2484***       0.2776***       0.2776***
                 (0.0046)        (0.0126)        (0.0117)        (0.0121)        (0.0132)   

_cons            -10.1822***     -10.1822***     -10.1822***     -10.2975***     -10.2975***
                 (0.9720)        (2.3692)        (2.4415)        (2.2474)        (2.4241)   
--------------------------------------------------------------------------------------------
/                                                                                           
lnalpha                                                           -0.4453***      -0.4453***
                                                                 (0.0307)        (0.0378)   
--------------------------------------------------------------------------------------------
N                    3677            3677            3677            3677            3677   
pseudo R-sq         0.130           0.130                           0.035           0.035   
--------------------------------------------------------------------------------------------
Standard errors in parentheses
* p<0.05, ** p<0.01, *** p<0.001

. 
. *********** 1B. COUNT REGRESSION: INFERENCE (STANDARD ERRORS AND BOOTSTRAP) 
. 
. *** 1B. INFERENCE: ROBUST STANDARD ERRORS
. 
. * Inference - Robust standard errors
. clear

. use mus17data.dta              

. global xlist private medicaid age age2 educyr actlim totchr 

. 
. * Poisson with preferred robust standard errors
. poisson docvis $xlist, vce(robust) nolog  // Poisson robust SEs

Poisson regression                              Number of obs     =      3,677
                                                Wald chi2(7)      =     720.43
                                                Prob > chi2       =     0.0000
Log pseudolikelihood =  -15019.64               Pseudo R2         =     0.1297

------------------------------------------------------------------------------
             |               Robust
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .1422324    .036356     3.91   0.000      .070976    .2134889
    medicaid |   .0970005   .0568264     1.71   0.088    -.0143773    .2083783
         age |   .2936722   .0629776     4.66   0.000     .1702383    .4171061
        age2 |  -.0019311   .0004166    -4.64   0.000    -.0027475   -.0011147
      educyr |   .0295562   .0048454     6.10   0.000     .0200594     .039053
      actlim |   .1864213   .0396569     4.70   0.000     .1086953    .2641474
      totchr |   .2483898   .0125786    19.75   0.000     .2237361    .2730435
       _cons |  -10.18221   2.369212    -4.30   0.000    -14.82578   -5.538638
------------------------------------------------------------------------------

. 
. * Poisson with cluster robust standard errors - illustration
. * Here cluster on age for illustration
. * In practice the grouping variable would be, for example, village
. poisson docvis $xlist, vce(cluster age) nolog  // Poisson robust SEs

Poisson regression                              Number of obs     =      3,677
                                                Wald chi2(7)      =     917.07
                                                Prob > chi2       =     0.0000
Log pseudolikelihood =  -15019.64               Pseudo R2         =     0.1297

                                   (Std. Err. adjusted for 26 clusters in age)
------------------------------------------------------------------------------
             |               Robust
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .1422324   .0357205     3.98   0.000     .0722215    .2122434
    medicaid |   .0970005   .0653316     1.48   0.138    -.0310471    .2250481
         age |   .2936722   .0471694     6.23   0.000     .2012219    .3861226
        age2 |  -.0019311   .0003162    -6.11   0.000    -.0025508   -.0013114
      educyr |   .0295562   .0054728     5.40   0.000     .0188296    .0402828
      actlim |   .1864213   .0374476     4.98   0.000     .1130254    .2598172
      totchr |   .2483898    .011554    21.50   0.000     .2257444    .2710353
       _cons |  -10.18221   1.749064    -5.82   0.000    -13.61031   -6.754105
------------------------------------------------------------------------------

. 
. *** 1B. BOOTSTRAP PAIRS STANDARD ERRORS
. 
. * Small data set for bootstraps so doesn't take long
. clear

. use musbootdata.dta 

. describe 

Contains data from musbootdata.dta
  obs:            50                          
 vars:             3                          16 Apr 2010 10:32
 size:           350                          
------------------------------------------------------------------------------------------------
              storage   display    value
variable name   type    format     label      variable label
------------------------------------------------------------------------------------------------
docvis          int     %8.0g                 number of doctor visits
age             float   %9.0g                 Age in years / 10
chronic         byte    %8.0g                 = 1 if a chronic condition
------------------------------------------------------------------------------------------------
Sorted by: 

. summarize

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
      docvis |         50        4.12     7.82106          0         43
         age |         50       4.162    1.160382        2.6        6.2
     chronic |         50         .28    .4535574          0          1

. 
. * Default and robust standard errors
. poisson docvis chronic, nolog

Poisson regression                              Number of obs     =         50
                                                LR chi2(1)        =      48.18
                                                Prob > chi2       =     0.0000
Log likelihood = -238.75384                     Pseudo R2         =     0.0917

------------------------------------------------------------------------------
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     chronic |   .9833014   .1393729     7.06   0.000     .7101356    1.256467
       _cons |   1.031602   .0995037    10.37   0.000     .8365779    1.226625
------------------------------------------------------------------------------

. poisson docvis chronic, nolog vce(robust)

Poisson regression                              Number of obs     =         50
                                                Wald chi2(1)      =       3.64
                                                Prob > chi2       =     0.0565
Log pseudolikelihood = -238.75384               Pseudo R2         =     0.0917

------------------------------------------------------------------------------
             |               Robust
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     chronic |   .9833014   .5154894     1.91   0.056    -.0270391    1.993642
       _cons |   1.031602   .3446734     2.99   0.003     .3560541    1.707149
------------------------------------------------------------------------------

. 
. *** IMPORTANT: Stata 14 uses different random number generator (mt64) 
. ***            than earlier versions of Stata. 
. ***            My slides use the earlier random number generator (kiss32)
. ***            So here I use the command set rng kiss32
. 
. * Use the old Stata 13 and earlier random number generator
. set rng kiss32

. 
. * Compute bootstrap standard errors using option vce(bootstrap) to 
. poisson docvis chronic, vce(boot, reps(400) seed(10101) nodots)

Poisson regression                              Number of obs     =         50
                                                Replications      =        400
                                                Wald chi2(1)      =       3.50
                                                Prob > chi2       =     0.0612
Log likelihood = -238.75384                     Pseudo R2         =     0.0917

------------------------------------------------------------------------------
             |   Observed   Bootstrap                         Normal-based
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     chronic |   .9833014   .5253149     1.87   0.061    -.0462968      2.0129
       _cons |   1.031602   .3497212     2.95   0.003     .3461607    1.717042
------------------------------------------------------------------------------

. 
. * Same using bootstrap prefix command
. bootstrap, seed(10101) reps(400) nodots: poisson docvis chronic

Poisson regression                              Number of obs     =         50
                                                Replications      =        400
                                                Wald chi2(1)      =       3.50
                                                Prob > chi2       =     0.0612
Log likelihood = -238.75384                     Pseudo R2         =     0.0917

------------------------------------------------------------------------------
             |   Observed   Bootstrap                         Normal-based
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     chronic |   .9833014   .5253149     1.87   0.061    -.0462968      2.0129
       _cons |   1.031602   .3497212     2.95   0.003     .3461607    1.717042
------------------------------------------------------------------------------

. 
. /* SKIP THE FOLLOWING AS TAKES TIME
> 
> * Bootstrap standard errors for different reps and seeds
> quietly poisson docvis chronic, vce(boot, reps(50) seed(10101))
> estimates store boot50
> quietly poisson docvis chronic, vce(boot, reps(50) seed(20202))
> estimates store boot50diff
> quietly poisson docvis chronic, vce(boot, reps(2000) seed(10101))
> estimates store boot2000
> quietly poisson docvis chronic, vce(robust)
> estimates store robust
> estimates table boot50 boot50diff boot2000 robust, b(%8.5f) se(%8.5f)
> 
> */
. 
. * The jackknife
. poisson docvis chronic, vce(jackknife)
(running poisson on estimation sample)

Jackknife replications (50)
----+--- 1 ---+--- 2 ---+--- 3 ---+--- 4 ---+--- 5 
..................................................    50

Poisson regression                              Number of obs     =         50
                                                Replications      =         50
                                                F(   1,     49)   =       2.50
                                                Prob > F          =     0.1205
Log likelihood = -238.75384                     Pseudo R2         =     0.0917

------------------------------------------------------------------------------
             |              Jackknife
      docvis |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     chronic |   .9833014   .6222826     1.58   0.121    -.2672222    2.233825
       _cons |   1.031602   .3919746     2.63   0.011     .2438992    1.819304
------------------------------------------------------------------------------

. 
. * Cluster bootstrap - cluster on age for illustration
. poisson docvis chronic, vce(boot, cluster(age) reps(400) seed(10101) nodots)

Poisson regression                              Number of obs     =         50
                                                Replications      =        400
                                                Wald chi2(1)      =       4.12
                                                Prob > chi2       =     0.0423
Log likelihood = -238.75384                     Pseudo R2         =     0.0917

                                    (Replications based on 26 clusters in age)
------------------------------------------------------------------------------
             |   Observed   Bootstrap                         Normal-based
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     chronic |   .9833014    .484145     2.03   0.042     .0343947    1.932208
       _cons |   1.031602    .303356     3.40   0.001     .4370348    1.626168
------------------------------------------------------------------------------

. 
. * Bootstrap confidence intervals: normal-based, percentile, BC, and BCa
. quietly poisson docvis chronic, vce(boot, reps(999) seed(10101) bca)

. estat bootstrap, all

Poisson regression                              Number of obs     =         50
                                                Replications      =        999

------------------------------------------------------------------------------
             |    Observed               Bootstrap
      docvis |       Coef.       Bias    Std. Err.  [95% Conf. Interval]
-------------+----------------------------------------------------------------
     chronic |   .98330144  -.0244473   .54040762    -.075878   2.042481   (N)
             |                                      -.1316499   2.076792   (P)
             |                                      -.0820317   2.100361  (BC)
             |                                      -.0215526   2.181476 (BCa)
       _cons |   1.0316016  -.0503223   .35257252    .3405721   1.722631   (N)
             |                                       .2177235   1.598568   (P)
             |                                       .2578293   1.649789  (BC)
             |                                       .3794897   1.781907 (BCa)
------------------------------------------------------------------------------
(N)    normal confidence interval
(P)    percentile confidence interval
(BC)   bias-corrected confidence interval
(BCa)  bias-corrected and accelerated confidence interval

. 
. ****** PERCENTILE-T BOOTSTRAP WITH ASYMPTOTIC REFINEMENT
. 
. * Percentile-t for a single coefficient: Bootstrap the t statistic
. use musbootdata.dta, clear 

. quietly poisson docvis chronic, vce(robust)

. local theta = _b[chronic] 

. local setheta = _se[chronic]

. bootstrap tstar=((_b[chronic]-`theta')/_se[chronic]), seed(10101)        ///
>   reps(999) nodots saving(percentilet, replace): poisson docvis chronic, ///
>   vce(robust)
(note: file percentilet.dta not found)

Bootstrap results                               Number of obs     =         50
                                                Replications      =        999

      command:  poisson docvis chronic, vce(robust)
        tstar:  (_b[chronic]-.9833014421442415)/_se[chronic]

------------------------------------------------------------------------------
             |   Observed   Bootstrap                         Normal-based
             |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       tstar |          0     1.3004     0.00   1.000    -2.548736    2.548736
------------------------------------------------------------------------------

. 
. * Simple plot of the distribution of the tstar
. use percentilet, clear
(bootstrap: poisson)

. summarize

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
       tstar |        999   -.0874198      1.3004  -4.435354   4.611352

. centile tstar, c(2.5,50,97.5)

                                                       -- Binom. Interp. --
    Variable |       Obs  Percentile    Centile        [95% Conf. Interval]
-------------+-------------------------------------------------------------
       tstar |       999        2.5   -2.756196       -3.030972   -2.567785
             |                   50   -.0569957       -.1464812    .0312477
             |                 97.5    2.568691          2.3092    2.917802

. kdensity tstar, normal

. 
. * Fancier plot of the distribution of the tstar
. use percentilet, clear
(bootstrap: poisson)

. summarize

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
       tstar |        999   -.0874198      1.3004  -4.435354   4.611352

. centile tstar, c(2.5,50,97.5)

                                                       -- Binom. Interp. --
    Variable |       Obs  Percentile    Centile        [95% Conf. Interval]
-------------+-------------------------------------------------------------
       tstar |       999        2.5   -2.756196       -3.030972   -2.567785
             |                   50   -.0569957       -.1464812    .0312477
             |                 97.5    2.568691          2.3092    2.917802

. kdensity tstar, generate(evalpoint densityest) xtitle("tstar from the bootstrap replications")

. generate phistnorm = normalden(evalpoint)
(949 missing values generated)

. label variable phistnorm "Standard normal"

. label variable densityest "Bootstrap density"

. label variable evalpoint "tstar"

. graph twoway (scatter densityest evalpoint, connect(l) msize(tiny)) ///
>   (scatter phistnorm evalpoint, connect(l) msize(tiny))

. * graph export ct_bootstrap1.wmf, replace
. 
. * Percentile-t p-value for symmetric two-sided Wald test of H0: theta = 0
. use percentilet, clear
(bootstrap: poisson)

. quietly count if abs(`theta'/`setheta') < abs(tstar)

. display "p-value = " r(N)/(_N+1)
p-value = .145

. 
. * Percentile-t critical values and confidence interval
. _pctile tstar, p(2.5,97.5) 

. scalar lb = `theta' + r(r1)*`setheta'

. scalar ub = `theta' + r(r2)*`setheta'

. display "2.5 and 97.5 percentiles of t* distn: " r(r1) ", " r(r2) _n ///
>     "95 percent percentile-t confidence interval is:  (" lb   ","  ub ")"
2.5 and 97.5 percentiles of t* distn: -2.7561963, 2.5686913
95 percent percentile-t confidence interval is:  (-.43748842,2.3074345)

. 
. /* Following did not work
> * Command boottest
> * Wild score bootstrap, Rademacher weights, null imposed, 999 replications
> poisson docvis chronic
> boottest chronic 
> */
. 
. *********** 2: COUNT REGRESSION: EXTRAS
. *              (TRUNCATED/CENSORED, HURDLE, ZERO-INFLATED, FINITE MIXTURE, DIAGNOSTICS, ENDOGE
> NOUS)
. 
. *** 2. EXTRAS: TRUNCATED AND CENSORED
. 
. * Summary statistics for doctor visits data
. use mus17data.dta, clear    

. global xlist private medicaid age age2 educyr actlim totchr 

. describe docvis $xlist

              storage   display    value
variable name   type    format     label      variable label
------------------------------------------------------------------------------------------------
docvis          float   %9.0g                 # doctor visits
private         byte    %8.0g                 =1 if has private supplementary insurance
medicaid        byte    %8.0g                 =1 if has Medicaid public insurance
age             byte    %8.0g                 Age
age2            float   %9.0g                 Age-squared
educyr          byte    %8.0g                 Years of education
actlim          byte    %8.0g                 =1 if activity limitation
totchr          byte    %8.0g                 # chronic conditions

. summarize docvis $xlist, sep(10)

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
      docvis |      3,677    6.822682    7.394937          0        144
     private |      3,677    .4966005    .5000564          0          1
    medicaid |      3,677     .166712    .3727692          0          1
         age |      3,677    74.24476    6.376638         65         90
        age2 |      3,677    5552.936    958.9996       4225       8100
      educyr |      3,677    11.18031    3.827676          0         17
      actlim |      3,677     .333152    .4714045          0          1
      totchr |      3,677    1.843351    1.350026          0          8

. 
. * Zero-truncated negative binomial 
. tnbreg docvis $xlist if docvis>0, ll(0) nolog

Truncated negative binomial regression          Number of obs     =      3,276
Truncation point: 0                             LR chi2(7)        =     509.10
Dispersion     = mean                           Prob > chi2       =     0.0000
Log likelihood =  -9452.899                     Pseudo R2         =     0.0262

------------------------------------------------------------------------------
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .1095567   .0345239     3.17   0.002     .0418911    .1772223
    medicaid |   .0972309   .0470358     2.07   0.039     .0050425    .1894193
         age |   .2719032   .0625359     4.35   0.000     .1493352    .3944712
        age2 |  -.0017959    .000416    -4.32   0.000    -.0026113   -.0009805
      educyr |   .0265974   .0043937     6.05   0.000     .0179859     .035209
      actlim |   .1955384   .0355161     5.51   0.000     .1259281    .2651488
      totchr |   .2226969   .0124128    17.94   0.000     .1983683    .2470254
       _cons |   -9.19017   2.337591    -3.93   0.000    -13.77176   -4.608576
-------------+----------------------------------------------------------------
    /lnalpha |  -.5259629   .0418671                     -.6080209    -.443905
-------------+----------------------------------------------------------------
       alpha |    .590986   .0247429                      .5444273    .6415264
------------------------------------------------------------------------------
LR test of alpha=0: chibar2(01) = 7089.37              Prob >= chibar2 = 0.000

. estimates store ZTNB

. 
. * Replaces old command ztnb docvis $xlist if docvis>0, nolog 
. * And only does lower truncationhelp ztpoisson
. 
. * Censored negative binomial at y <= 10
. generate docviscens10 = docvis

. replace docviscens10 = 10 if docvis > 10
(791 real changes made)

.  
. * Censored Negbin ML program lfnbcens10 with censoring from right at 10
. program lfnbcens10
  1.   version 10.1
  2.   args lnf theta1 a               // theta1=x'b, a=alpha, lnf=lnf(y)
  3.   tempvar mu sumgammaratios probyle10 cens
  4.   local y $ML_y1                  // Define y so program more readable
  5.   generate double `mu'  = exp(`theta1')
  6.   generate double `sumgammaratios'  = exp(lngamma(0+(1/`a')))/1 ///
>     + exp(lngamma(1+(1/`a')))/1 + exp(lngamma(2+(1/`a')))/2     ///
>     + exp(lngamma(3+(1/`a')))/6 + exp(lngamma(4+(1/`a')))/24    ///
>     + exp(lngamma(5+(1/`a')))/120 + exp(lngamma(6+(1/`a')))/720 ///
>     + exp(lngamma(7+(1/`a')))/5040 + exp(lngamma(8+(1/`a')))/40320 ///
>     + exp(lngamma(9+(1/`a')))/362880 + exp(lngamma(10+(1/`a')))/3628800 
  7.   generate double `probyle10'  = (`sumgammaratios'/exp(lngamma((1/`a')))) ///
>      *((1/`a')^(1/`a'))*(`mu'^`y')/((1/`a'+`mu')^(1/`a'+`y'))
  8.   generate double `cens' = `y' >= 10
  9.   quietly replace `lnf' = (1-`cens') * (lngamma(`y'+(1/`a')) - lngamma(1/`a') ///
>                -  lnfactorial(`y') - (`y'+(1/`a'))*ln(1+`a'*`mu')  ///
>                + `y'*ln(`a') + `y'*ln(`mu') ) + `cens'*ln(`probyle10')
 10. end

. * Command lfnbcens10 implemented for negative binomial MLE
. ml model lf lfnbcens10 (docviscens10 = $xlist) ()

. ml maximize, nolog
(3,677 missing values generated)
(3,677 missing values generated)
initial:       log likelihood =     -<inf>  (could not be evaluated)
(3,677 missing values generated)
(3,677 missing values generated)
feasible:      log likelihood = -12293.433
rescale:       log likelihood = -8141.2566
rescale eq:    log likelihood = -8118.0172

                                                Number of obs     =      3,677
                                                Wald chi2(7)      =     293.34
Log likelihood = -7796.8328                     Prob > chi2       =     0.0000

------------------------------------------------------------------------------
docviscens10 |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
eq1          |
     private |   .1258389   .0408548     3.08   0.002      .045765    .2059128
    medicaid |   .0652582   .0558857     1.17   0.243    -.0442758    .1747922
         age |   .2275648   .0735028     3.10   0.002     .0835019    .3716277
        age2 |  -.0014782   .0004892    -3.02   0.003     -.002437   -.0005194
      educyr |   .0196804   .0052061     3.78   0.000     .0094767    .0298842
      actlim |    .097664   .0429777     2.27   0.023     .0134293    .1818988
      totchr |   .2147348   .0150827    14.24   0.000     .1851732    .2442964
       _cons |       -7.8   2.746158    -2.84   0.005    -13.18237    -2.41763
-------------+----------------------------------------------------------------
eq2          |
       _cons |    1.01341   .0378353    26.78   0.000     .9392546    1.087566
------------------------------------------------------------------------------

. estimates store NBCENS10 

. 
. /* Following did not work
>   generate double `sumgammaratios'  = 0
>   forvalues i = 0/10 {
>     quietly replace `sumgammaratios' = `sumgammaratios' + exp(lngamma(`i'+(1/`a')))/exp(lngamm
> a(`i'+1))
>     }
> */
. 
. * Comparison with regular negative binomial
. quietly nbreg docvis $xlist, nolog 

. estimates store NBREG

. estimates table NBREG ZTNB NBCENS10, equation(1) b(%10.4f) se  stats(N ll)

-----------------------------------------------------
    Variable |   NBREG         ZTNB       NBCENS10   
-------------+---------------------------------------
#1           |
     private |     0.1641       0.1096       0.1258  
             |     0.0332       0.0345       0.0409  
    medicaid |     0.1003       0.0972       0.0653  
             |     0.0454       0.0470       0.0559  
         age |     0.2941       0.2719       0.2276  
             |     0.0602       0.0625       0.0735  
        age2 |    -0.0019      -0.0018      -0.0015  
             |     0.0004       0.0004       0.0005  
      educyr |     0.0287       0.0266       0.0197  
             |     0.0042       0.0044       0.0052  
      actlim |     0.1895       0.1955       0.0977  
             |     0.0348       0.0355       0.0430  
      totchr |     0.2776       0.2227       0.2147  
             |     0.0121       0.0124       0.0151  
       _cons |   -10.2975      -9.1902      -7.8000  
             |     2.2474       2.3376       2.7462  
-------------+---------------------------------------
    /lnalpha |    -0.4453      -0.5260               
             |     0.0307       0.0419               
-------------+---------------------------------------
eq2          |
       _cons |                               1.0134  
             |                               0.0378  
-------------+---------------------------------------
Statistics   |                                       
           N |       3677         3276         3677  
          ll | -1.059e+04   -9452.8990   -7796.8328  
-----------------------------------------------------
                                         legend: b/se

. 
. * Ordered logit
. * Do for docviscens10 which has 10 categories
. ologit docviscens10 $xlist, nolog 

Ordered logistic regression                     Number of obs     =      3,677
                                                LR chi2(7)        =     882.51
                                                Prob > chi2       =     0.0000
Log likelihood = -7883.0551                     Pseudo R2         =     0.0530

------------------------------------------------------------------------------
docviscens10 |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .3343672     .06573     5.09   0.000     .2055388    .4631957
    medicaid |   .1576941    .091015     1.73   0.083     -.020692    .3360802
         age |   .5883239   .1163095     5.06   0.000     .3603615    .8162864
        age2 |  -.0038291   .0007736    -4.95   0.000    -.0053454   -.0023128
      educyr |   .0541312   .0084618     6.40   0.000     .0375464    .0707159
      actlim |   .2771719   .0707754     3.92   0.000     .1384548     .415889
      totchr |   .6188858   .0258162    23.97   0.000     .5682869    .6694846
-------------+----------------------------------------------------------------
       /cut1 |   22.06108   4.348475                      13.53822    30.58393
       /cut2 |   22.81988    4.34891                      14.29617    31.34358
       /cut3 |   23.42779   4.349424                      14.90307     31.9525
       /cut4 |   23.89882   4.349991                      15.37299    32.42464
       /cut5 |   24.34237    4.35066                      15.81524    32.86951
       /cut6 |   24.68387   4.351227                      16.15562    33.21211
       /cut7 |   24.98738   4.351744                      16.45812    33.51665
       /cut8 |   25.26841   4.352184                      16.73828    33.79853
       /cut9 |   25.53461   4.352502                      17.00386    34.06535
      /cut10 |   25.78217   4.352756                      17.25093    34.31342
------------------------------------------------------------------------------

. 
. *** 2. EXTRAS: HURDLE MODEL
. 
. * Hurdle logit-nb model manually
. logit docvis $xlist, nolog

Logistic regression                             Number of obs     =      3,677
                                                LR chi2(7)        =     453.08
                                                Prob > chi2       =     0.0000
Log likelihood = -1040.3258                     Pseudo R2         =     0.1788

------------------------------------------------------------------------------
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .6586978   .1264608     5.21   0.000     .4108392    .9065563
    medicaid |   .0554225   .1726693     0.32   0.748    -.2830032    .3938482
         age |   .5428779   .2238845     2.42   0.015     .1040724    .9816834
        age2 |  -.0034989   .0014957    -2.34   0.019    -.0064304   -.0005673
      educyr |    .047035   .0155706     3.02   0.003     .0165171    .0775529
      actlim |   .1623927   .1523743     1.07   0.287    -.1362553    .4610408
      totchr |   1.050562   .0671922    15.64   0.000     .9188676    1.182256
       _cons |  -20.94163   8.335137    -2.51   0.012     -37.2782   -4.605057
------------------------------------------------------------------------------

. predict dvplogit, p       // Logit Pr[y=1] 

. * Second step uses positives only
. ztnb docvis $xlist if docvis>0, nolog 

Zero-truncated negative binomial regression     Number of obs     =      3,276
                                                LR chi2(7)        =     509.10
Dispersion     = mean                           Prob > chi2       =     0.0000
Log likelihood =  -9452.899                     Pseudo R2         =     0.0262

------------------------------------------------------------------------------
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .1095567   .0345239     3.17   0.002     .0418911    .1772223
    medicaid |   .0972309   .0470358     2.07   0.039     .0050425    .1894193
         age |   .2719032   .0625359     4.35   0.000     .1493352    .3944712
        age2 |  -.0017959    .000416    -4.32   0.000    -.0026113   -.0009805
      educyr |   .0265974   .0043937     6.05   0.000     .0179859     .035209
      actlim |   .1955384   .0355161     5.51   0.000     .1259281    .2651488
      totchr |   .2226969   .0124128    17.94   0.000     .1983683    .2470254
       _cons |   -9.19017   2.337591    -3.93   0.000    -13.77176   -4.608576
-------------+----------------------------------------------------------------
    /lnalpha |  -.5259629   .0418671                     -.6080209    -.443905
-------------+----------------------------------------------------------------
       alpha |    .590986   .0247429                      .5444273    .6415264
------------------------------------------------------------------------------
Likelihood-ratio test of alpha=0:  chibar2(01) = 7089.37 Prob>=chibar2 = 0.000

. predict dvztnbcm, cm     // ztnb E[y|y>0] = E[y](1-/Pr[|y=0]

. /* Following computes this manually
> scalar alpha = e(alpha)
> predict dvztnb, n
> generate pryeq0 = (1+alpha*dvztnb)^(-1/alpha)
> generate dvztnbcmman = dvztnb/(1-pryeq0)
> */
. generate dvhurdle = dvplogit*dvztnbcm

. 
. * Same hurdle logit-nb model using the user-written hnblogit command
. hnblogit docvis $xlist, nolog

Negative Binomial-Logit Hurdle Regression       Number of obs     =      3,677
                                                Wald chi2(7)      =     309.90
Log likelihood = -10493.225                     Prob > chi2       =     0.0000

------------------------------------------------------------------------------
             |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
logit        |
     private |  -.6586978   .1264608    -5.21   0.000    -.9065563   -.4108393
    medicaid |  -.0554225   .1726694    -0.32   0.748    -.3938483    .2830032
         age |  -.5428779   .2238845    -2.42   0.015    -.9816835   -.1040724
        age2 |   .0034989   .0014957     2.34   0.019     .0005673    .0064304
      educyr |   -.047035   .0155706    -3.02   0.003    -.0775529   -.0165171
      actlim |  -.1623927   .1523743    -1.07   0.287    -.4610408    .1362554
      totchr |  -1.050562   .0671922   -15.64   0.000    -1.182256   -.9188676
       _cons |   20.94163   8.335138     2.51   0.012     4.605058     37.2782
-------------+----------------------------------------------------------------
negbinomial  |
     private |   .1095566   .0345239     3.17   0.002      .041891    .1772222
    medicaid |   .0972308   .0470358     2.07   0.039     .0050423    .1894193
         age |   .2719031   .0625359     4.35   0.000      .149335    .3944712
        age2 |  -.0017959    .000416    -4.32   0.000    -.0026113   -.0009805
      educyr |   .0265974   .0043937     6.05   0.000     .0179859     .035209
      actlim |   .1955384   .0355161     5.51   0.000      .125928    .2651487
      totchr |   .2226967   .0124128    17.94   0.000     .1983681    .2470252
       _cons |  -9.190165   2.337592    -3.93   0.000    -13.77176   -4.608569
-------------+----------------------------------------------------------------
    /lnalpha |   -.525962   .0418671   -12.56   0.000      -.60802    -.443904
------------------------------------------------------------------------------
AIC Statistic =     5.712

. estimates store HURDLENB

. 
. *** 2. EXTRAS: ZERO-INFLATED
. 
. * Zero-inflated negative binomial
. zinb docvis $xlist, inflate($xlist) nolog

Zero-inflated negative binomial regression      Number of obs     =      3,677
                                                Nonzero obs       =      3,276
                                                Zero obs          =        401

Inflation model = logit                         LR chi2(7)        =     588.93
Log likelihood  = -10492.88                     Prob > chi2       =     0.0000

------------------------------------------------------------------------------
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
docvis       |
     private |   .1289797    .032987     3.91   0.000     .0643264     .193633
    medicaid |   .1091956    .044511     2.45   0.014     .0219556    .1964356
         age |   .2847325   .0589577     4.83   0.000     .1691776    .4002874
        age2 |  -.0018781   .0003922    -4.79   0.000    -.0026469   -.0011093
      educyr |   .0253991   .0041432     6.13   0.000     .0172786    .0335196
      actlim |   .1737716   .0336464     5.16   0.000     .1078258    .2397173
      totchr |    .229991   .0120795    19.04   0.000     .2063156    .2536663
       _cons |  -9.680235   2.204161    -4.39   0.000    -14.00031    -5.36016
-------------+----------------------------------------------------------------
inflate      |
     private |  -.9152675   .2758402    -3.32   0.001    -1.455904   -.3746307
    medicaid |   .3487142   .3372848     1.03   0.301    -.3123519     1.00978
         age |  -.4357439   .5156094    -0.85   0.398     -1.44632    .5748319
        age2 |    .002805   .0034886     0.80   0.421    -.0040326    .0096426
      educyr |    -.08423   .0339273    -2.48   0.013    -.1507263   -.0177336
      actlim |  -.8241735   .4825621    -1.71   0.088    -1.769978    .1216309
      totchr |  -2.985208   .6860952    -4.35   0.000     -4.32993   -1.640486
       _cons |   17.09618   18.97318     0.90   0.368    -20.09057    54.28294
-------------+----------------------------------------------------------------
    /lnalpha |  -.5848279   .0349792   -16.72   0.000    -.6533859   -.5162699
-------------+----------------------------------------------------------------
       alpha |   .5572017   .0194905                      .5202812    .5967423
------------------------------------------------------------------------------

. estimates store ZINB

. predict dvzinb, n

. 
. quietly nbreg docvis $xlist

. estimates store NBREG

. predict dvnbreg, n

. 
. *** 2. EXTRAS: CONTINUOUS MIXTURE AND HIERARCHICAL MODEL
. 
. * Poisson - normal mixture
. gsem (docvis <- $xlist M1[educyr]), poisson

Fitting fixed-effects model:

Iteration 0:   log likelihood = -20148.682  
Iteration 1:   log likelihood = -15054.652  
Iteration 2:   log likelihood =  -15019.67  
Iteration 3:   log likelihood =  -15019.64  
Iteration 4:   log likelihood =  -15019.64  

Refining starting values:

Grid node 0:   log likelihood = -15039.845

Refining starting values (unscaled likelihoods):

Grid node 0:   log likelihood = -15039.845

Fitting full model:

Iteration 0:   log likelihood = -15039.845  (not concave)
Iteration 1:   log likelihood = -15037.763  (not concave)
Iteration 2:   log likelihood = -15031.129  (not concave)
Iteration 3:   log likelihood = -15023.138  (not concave)
Iteration 4:   log likelihood = -15012.113  (not concave)
Iteration 5:   log likelihood = -15006.382  (not concave)
Iteration 6:   log likelihood = -15004.651  
Iteration 7:   log likelihood =  -15004.24  
Iteration 8:   log likelihood = -15004.233  
Iteration 9:   log likelihood = -15004.233  

Generalized structural equation model           Number of obs     =      3,677
Response       : docvis
Family         : Poisson
Link           : log
Log likelihood = -15004.233

 ( 1)  [docvis]M1[educyr] = 1
---------------------------------------------------------------------------------
                |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
----------------+----------------------------------------------------------------
docvis          |
        private |   .1430063   .0143673     9.95   0.000     .1148468    .1711657
       medicaid |   .0932565   .0191176     4.88   0.000     .0557867    .1307262
            age |   .2919079   .0260086    11.22   0.000     .2409321    .3428838
           age2 |  -.0019193   .0001728   -11.11   0.000     -.002258   -.0015807
         educyr |   .0249328   .0038423     6.49   0.000     .0174021    .0324635
         actlim |   .1891015   .0146152    12.94   0.000     .1604561    .2177468
         totchr |   .2495125   .0046676    53.46   0.000     .2403643    .2586608
                |
     M1[educyr] |          1  (constrained)
                |
          _cons |  -10.07233   .9745124   -10.34   0.000    -11.98234   -8.162318
----------------+----------------------------------------------------------------
 var(M1[educyr])|   .0046838   .0023054                       .001785    .0122903
---------------------------------------------------------------------------------

. 
. * Hierarchical model with vary by cluster on educyr
. gsem (docvis <- $xlist M1[educyr]), poisson

Fitting fixed-effects model:

Iteration 0:   log likelihood = -20148.682  
Iteration 1:   log likelihood = -15054.652  
Iteration 2:   log likelihood =  -15019.67  
Iteration 3:   log likelihood =  -15019.64  
Iteration 4:   log likelihood =  -15019.64  

Refining starting values:

Grid node 0:   log likelihood = -15039.845

Refining starting values (unscaled likelihoods):

Grid node 0:   log likelihood = -15039.845

Fitting full model:

Iteration 0:   log likelihood = -15039.845  (not concave)
Iteration 1:   log likelihood = -15037.763  (not concave)
Iteration 2:   log likelihood = -15031.129  (not concave)
Iteration 3:   log likelihood = -15023.138  (not concave)
Iteration 4:   log likelihood = -15012.113  (not concave)
Iteration 5:   log likelihood = -15006.382  (not concave)
Iteration 6:   log likelihood = -15004.651  
Iteration 7:   log likelihood =  -15004.24  
Iteration 8:   log likelihood = -15004.233  
Iteration 9:   log likelihood = -15004.233  

Generalized structural equation model           Number of obs     =      3,677
Response       : docvis
Family         : Poisson
Link           : log
Log likelihood = -15004.233

 ( 1)  [docvis]M1[educyr] = 1
---------------------------------------------------------------------------------
                |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
----------------+----------------------------------------------------------------
docvis          |
        private |   .1430063   .0143673     9.95   0.000     .1148468    .1711657
       medicaid |   .0932565   .0191176     4.88   0.000     .0557867    .1307262
            age |   .2919079   .0260086    11.22   0.000     .2409321    .3428838
           age2 |  -.0019193   .0001728   -11.11   0.000     -.002258   -.0015807
         educyr |   .0249328   .0038423     6.49   0.000     .0174021    .0324635
         actlim |   .1891015   .0146152    12.94   0.000     .1604561    .2177468
         totchr |   .2495125   .0046676    53.46   0.000     .2403643    .2586608
                |
     M1[educyr] |          1  (constrained)
                |
          _cons |  -10.07233   .9745124   -10.34   0.000    -11.98234   -8.162318
----------------+----------------------------------------------------------------
 var(M1[educyr])|   .0046838   .0023054                       .001785    .0122903
---------------------------------------------------------------------------------

. 
. *** 2. EXTRAS: MODEL COMPARISON
. 
. * Comparison of NB and ZINB using countfit
. countfit docvis $xlist, nbreg prm nograph noestimates
Comparison of Mean Observed and Predicted Count

            Maximum       At      Mean
Model     Difference    Value    |Diff|
---------------------------------------------
PRM         0.102         0      0.042
NBRM       -0.023         1      0.007

PRM: Predicted and actual probabilities

Count   Actual    Predicted    |Diff|   Pearson
------------------------------------------------
0        0.109       0.007      0.102  5168.233
1        0.085       0.030      0.056   387.868
2        0.097       0.063      0.034    69.000
3        0.091       0.095      0.005     0.789
4        0.092       0.116      0.024    17.861
5        0.072       0.121      0.049    72.441
6        0.063       0.114      0.051    84.355
7        0.055       0.099      0.044    73.016
8        0.049       0.082      0.033    50.128
9        0.042       0.065      0.024    31.225
------------------------------------------------
Sum      0.756       0.793      0.422  5954.916

NBRM: Predicted and actual probabilities

Count   Actual    Predicted    |Diff|   Pearson
------------------------------------------------
0        0.109       0.091      0.018    12.708
1        0.085       0.108      0.023    17.288
2        0.097       0.105      0.008     2.270
3        0.091       0.096      0.005     1.086
4        0.092       0.085      0.007     2.333
5        0.072       0.074      0.001     0.072
6        0.063       0.063      0.000     0.004
7        0.055       0.054      0.001     0.088
8        0.049       0.046      0.003     0.694
9        0.042       0.039      0.003     0.873
------------------------------------------------
Sum      0.756       0.761      0.070    37.416

Tests and Fit Statistics

PRM            BIC=   -82.669  AIC=     8.174  Prefer  Over  Evidence
-------------------------------------------------------------------------
  vs NBRM      BIC= -8935.061  dif=  8852.392  NBRM    PRM   Very strong
               AIC=     5.765  dif=     2.409  NBRM    PRM
               LRX2= 8860.602  prob=    0.000  NBRM    PRM   p=0.000    
-------------------------------------------------------------------------
NBRM           BIC= -8935.061  AIC=     5.765  Prefer  Over  Evidence

. 
. * Compare LL, AIC, BIC across models
. estimates table NBREG HURDLENB ZINB, equation(1) b(%12.4f) se  ///
>    stats(N ll aic bic) stfmt(%12.1f)  newpanel

-----------------------------------------------------------
    Variable |    NBREG         HURDLENB         ZINB      
-------------+---------------------------------------------
#1           |
     private |       0.1641        -0.6587         0.1290  
             |       0.0332         0.1265         0.0330  
    medicaid |       0.1003        -0.0554         0.1092  
             |       0.0454         0.1727         0.0445  
         age |       0.2941        -0.5429         0.2847  
             |       0.0602         0.2239         0.0590  
        age2 |      -0.0019         0.0035        -0.0019  
             |       0.0004         0.0015         0.0004  
      educyr |       0.0287        -0.0470         0.0254  
             |       0.0042         0.0156         0.0041  
      actlim |       0.1895        -0.1624         0.1738  
             |       0.0348         0.1524         0.0336  
      totchr |       0.2776        -1.0506         0.2300  
             |       0.0121         0.0672         0.0121  
       _cons |     -10.2975        20.9416        -9.6802  
             |       2.2474         8.3351         2.2042  
-------------+---------------------------------------------
    /lnalpha |      -0.4453                       -0.5848  
             |       0.0307                        0.0350  
-------------+---------------------------------------------
negbinomial  |
     private |                      0.1096                 
             |                      0.0345                 
    medicaid |                      0.0972                 
             |                      0.0470                 
         age |                      0.2719                 
             |                      0.0625                 
        age2 |                     -0.0018                 
             |                      0.0004                 
      educyr |                      0.0266                 
             |                      0.0044                 
      actlim |                      0.1955                 
             |                      0.0355                 
      totchr |                      0.2227                 
             |                      0.0124                 
       _cons |                     -9.1902                 
             |                      2.3376                 
-------------+---------------------------------------------
lnalpha      |
       _cons |                     -0.5260                 
             |                      0.0419                 
-------------+---------------------------------------------
inflate      |
     private |                                    -0.9153  
             |                                     0.2758  
    medicaid |                                     0.3487  
             |                                     0.3373  
         age |                                    -0.4357  
             |                                     0.5156  
        age2 |                                     0.0028  
             |                                     0.0035  
      educyr |                                    -0.0842  
             |                                     0.0339  
      actlim |                                    -0.8242  
             |                                     0.4826  
      totchr |                                    -2.9852  
             |                                     0.6861  
       _cons |                                    17.0962  
             |                                    18.9732  
-----------------------------------------------------------

-----------------------------------------------------------
  Statistics |    NBREG         HURDLENB         ZINB      
-------------+---------------------------------------------
           N |         3677           3677           3677  
          ll |     -10589.3       -10493.2       -10492.9  
         aic |      21196.7        21020.4        21019.8  
         bic |      21252.6        21126.0        21125.3  
-----------------------------------------------------------
                                               legend: b/se

. 
. * Compare predicted means across models
. summarize docvis dvnbreg dvhurdle dvzinb

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
      docvis |      3,677    6.822682    7.394937          0        144
     dvnbreg |      3,677    6.890034    3.486562   2.078925   41.31503
    dvhurdle |      3,677    6.840676    3.134925    1.35431   31.86874
      dvzinb |      3,677    6.838704    3.135122   .9473827   32.98153

. correlate docvis dvnbreg dvhurdle dvzinb
(obs=3,677)

             |   docvis  dvnbreg dvhurdle   dvzinb
-------------+------------------------------------
      docvis |   1.0000
     dvnbreg |   0.3870   1.0000
    dvhurdle |   0.3990   0.9894   1.0000
      dvzinb |   0.3983   0.9882   0.9982   1.0000


. 
. *** 2. EXTRAS: QUANTILE COUNT REGRESSION - 2 COMPONENT NEGATIVE BINOMIAL
. 
. * To speed up number of reps is 50 but should be e.g. 1000
. qcount docvis $xlist, q(0.5) rep(50)
..................................................            

Count Data Quantile Regression
( Quantile 0.50 )
                                            Number of obs           =     3677
                                            No. jittered samples    =       50
------------------------------------------------------------------------------
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .1886224   .0500635     3.77   0.000     .0904997     .286745
    medicaid |   .1043362   .0710944     1.47   0.142    -.0350063    .2436787
         age |   .3552676   .0801129     4.43   0.000     .1982493     .512286
        age2 |  -.0023115   .0005318    -4.35   0.000    -.0033539   -.0012692
      educyr |   .0322744   .0054332     5.94   0.000     .0216256    .0429232
      actlim |   .1359789   .0475781     2.86   0.004     .0427276    .2292302
      totchr |   .3223034    .016654    19.35   0.000     .2896622    .3549446
       _cons |   -13.1382   2.996697    -4.38   0.000    -19.01162   -7.264787
------------------------------------------------------------------------------

. * My data set had a variable one in it and this causes problems for qcount_mfx
. capture drop one

. * Marginal effects
. qcount_mfx // MEM

 Marginal effects after qcount
        y = Qz(0.50|X)
          =  5.04493 (0.0965)
--------------------------------------------------------------------------------
                |      ME     Std. Err.      z    P>|z|  [  95% C.I  ]       X
----------------+---------------------------------------------------------------
private         |  .85909832  .22589128     3.8  0.0001  0.4164  1.3018    0.50 
medicaid        |  .49120392  .34721708    1.41  0.1572 -0.1893  1.1717    0.17 
age             |   1.614668  .36464188    4.43  0.0000  0.9000  2.3294   74.24 
age2            | -.01050582  .00242128   -4.34  0.0000 -0.0153 -0.0058 5552.94 
educyr          |  .14668499  .02488381    5.89  0.0000  0.0979  0.1955   11.18 
actlim          |  .63268383  .22903634    2.76  0.0057  0.1838  1.0816    0.33 
totchr          |  1.4648477     .07003    20.9  0.0000  1.3276  1.6021    1.84 
--------------------------------------------------------------------------------


 Marginal effects after qcount
        y = Qy(0.50|X)
          = 5 
------------------------------------------
                | ME  [95% C. Set]    X
----------------+-------------------------
private         |  0      0  1       0.50
medicaid        |  0      -1  1       0.17
age             |  1      0  2      74.24
age2            |  0      0  0    5552.94
educyr          |  0      0  0      11.18
actlim          |  0      0  1       0.33
totchr          |  1      1  1       1.84
------------------------------------------


. 
. *** 2. EXTRAS: FINITE MIXTURE MODEL - 2 COMPONENT NEGATIVE BINOMIAL
. 
. * Finite-mixture model using fmm command with constant probabilities
. use mus17data.dta, clear

. fmm 2, nolog: nbreg docvis $xlist

Finite mixture model                            Number of obs     =      3,677
Log likelihood = -10534.237

------------------------------------------------------------------------------
             |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
1.Class      |  (base outcome)
-------------+----------------------------------------------------------------
2.Class      |
       _cons |   .2286522   .3567562     0.64   0.522    -.4705772    .9278815
------------------------------------------------------------------------------

Class          : 1
Response       : docvis
Model          : nbreg, dispersion(mean)

------------------------------------------------------------------------------
             |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
docvis       |
     private |   .3292254    .090978     3.62   0.000     .1509117    .5075391
    medicaid |   .1319335   .1180946     1.12   0.264    -.0995277    .3633948
         age |   .3818359    .142241     2.68   0.007     .1030487    .6606231
        age2 |  -.0024401   .0009451    -2.58   0.010    -.0042924   -.0005878
      educyr |   .0383352   .0106471     3.60   0.000     .0174674    .0592031
      actlim |    .066328   .0964388     0.69   0.492    -.1226887    .2553446
      totchr |   .5013939   .0493956    10.15   0.000     .4045802    .5982076
       _cons |  -15.15966   5.370014    -2.82   0.005     -25.6847   -4.634629
-------------+----------------------------------------------------------------
/docvis      |
     lnalpha |  -.9786601   .2237814                     -1.417264   -.5400566
------------------------------------------------------------------------------

Class          : 2
Response       : docvis
Model          : nbreg, dispersion(mean)

------------------------------------------------------------------------------
             |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
docvis       |
     private |   .0957578   .0460128     2.08   0.037     .0055744    .1859411
    medicaid |   .0901739   .0631541     1.43   0.153    -.0336059    .2139536
         age |   .2691574   .0818963     3.29   0.001     .1086437    .4296712
        age2 |  -.0017901   .0005455    -3.28   0.001    -.0028593    -.000721
      educyr |   .0241906   .0058636     4.13   0.000     .0126982    .0356829
      actlim |    .219569   .0466449     4.71   0.000     .1281468    .3109913
      totchr |   .1758912   .0296181     5.94   0.000     .1178408    .2339417
       _cons |  -8.645314   3.070642    -2.82   0.005    -14.66366   -2.626967
-------------+----------------------------------------------------------------
/docvis      |
     lnalpha |  -.7697485   .0835753                      -.933553    -.605944
------------------------------------------------------------------------------

. 
. * Marginal predicted means for each component
. estat lcmean

Latent class marginal means                     Number of obs     =      3,677

------------------------------------------------------------------------------
             |            Delta-method
             |     Margin   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
1            |
      docvis |   4.477236    .516289     8.67   0.000     3.465329    5.489144
-------------+----------------------------------------------------------------
2            |
      docvis |   8.828413   .4718929    18.71   0.000      7.90352    9.753307
------------------------------------------------------------------------------

. 
. * Marginal predicted probabilities for each component
. estat lcprob

Latent class marginal probabilities             Number of obs     =      3,677

--------------------------------------------------------------
             |            Delta-method
             |     Margin   Std. Err.     [95% Conf. Interval]
-------------+------------------------------------------------
       Class |
          1  |   .4430847   .0880334      .2833547    .6155204
          2  |   .5569153   .0880334      .3844796    .7166453
--------------------------------------------------------------

. 
. * Marginal effects averaged across the two components
. margins, dydx(*)

Average marginal effects                        Number of obs     =      3,677
Model VCE    : OIM

Expression   : Predicted mean (# doctor visits), using class probabilities, predict(mu
               outcome(docvis))
dy/dx w.r.t. : private medicaid age age2 educyr actlim totchr

------------------------------------------------------------------------------
             |            Delta-method
             |      dy/dx   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   1.123926   .2338631     4.81   0.000     .6655627    1.582289
    medicaid |    .705085   .3179559     2.22   0.027     .0819029    1.328267
         age |   2.080845   .4228884     4.92   0.000     1.251999    2.909691
        age2 |  -.0136422   .0028106    -4.85   0.000     -.019151   -.0081334
      educyr |   .1949865   .0302031     6.46   0.000     .1357896    .2541835
      actlim |   1.211131    .243566     4.97   0.000     .7337508    1.688512
      totchr |   1.859463   .0975413    19.06   0.000     1.668286    2.050641
------------------------------------------------------------------------------

. 
. * Predict y for two components
. estimates store FMM2

. predict dvfit*
(option mu assumed)

. summarize dvfit1 dvfit2

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
      dvfit1 |      3,677    4.477236    4.406067   .5638143   75.16145
      dvfit2 |      3,677    8.828413    2.976359   3.799716   31.43396

. 
. * Create histograms of fitted values  
. quietly histogram dvfit1 if dvfit1< 40, saving(graph1.gph, replace)

. quietly histogram dvfit2 if dvfit1< 40, saving(graph2.gph, replace)

. quietly graph combine graph1.gph graph2.gph, ysize(3) xsize(6) ///
>     ycommon xcommon iscale(1.15)

. quietly graph export countfig21finitemixture.wmf, replace

. 
. * Finite-mixture model poisson
. use mus17data.dta, clear

. fmm 2, nolog: poisson docvis $xlist

Finite mixture model                            Number of obs     =      3,677
Log likelihood = -12100.185

------------------------------------------------------------------------------
             |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
1.Class      |  (base outcome)
-------------+----------------------------------------------------------------
2.Class      |
       _cons |  -.5980831    .050677   -11.80   0.000    -.6974083   -.4987579
------------------------------------------------------------------------------

Class          : 1
Response       : docvis
Model          : poisson

------------------------------------------------------------------------------
             |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
docvis       |
     private |   .2393558   .0312351     7.66   0.000     .1781361    .3005756
    medicaid |   .0463821    .040343     1.15   0.250    -.0326888     .125453
         age |  -.6233526   .0583698   -10.68   0.000    -.7377554   -.5089499
        age2 |   .0045366   .0003904    11.62   0.000     .0037714    .0053019
      educyr |   .0284599   .0039608     7.19   0.000     .0206969    .0362229
      actlim |   .1723268   .0318187     5.42   0.000     .1099633    .2346903
      totchr |   .3286694   .0097798    33.61   0.000     .3095014    .3478374
       _cons |   21.35464   2.164152     9.87   0.000     17.11298     25.5963
------------------------------------------------------------------------------

Class          : 2
Response       : docvis
Model          : poisson

------------------------------------------------------------------------------
             |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
docvis       |
     private |   .1566873   .0252956     6.19   0.000     .1071088    .2062658
    medicaid |   .1924436   .0337855     5.70   0.000     .1262252     .258662
         age |   1.232368   .0485717    25.37   0.000     1.137169    1.327567
        age2 |  -.0085471   .0003268   -26.15   0.000    -.0091876   -.0079065
      educyr |   .0219929   .0032055     6.86   0.000     .0157102    .0282756
      actlim |   .1486859   .0260608     5.71   0.000     .0976077    .1997641
      totchr |   .1898829    .009189    20.66   0.000     .1718728     .207893
       _cons |  -42.46506   1.795471   -23.65   0.000    -45.98412     -38.946
------------------------------------------------------------------------------

. 
. *** 2. EXTRAS: DIAGNOSTICS
. 
. * Residuals
. quietly glm docvis $xlist, family(poisson) link(log)

. predict rraw, response 

. predict rpearson, pearson

. predict rdeviance, deviance

. predict ranscombe, anscombe

. predict hat, hat

. predict cooksd, cooksd

. summarize rraw rpearson rdeviance ranscombe hat cooksd, sep(10)

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
        rraw |      3,677   -7.08e-10    6.808178  -29.12996   136.9007
    rpearson |      3,677   -.0060737    2.509354  -4.914746   51.38051
   rdeviance |      3,677   -.3438453    2.210397  -6.087067   24.35213
   ranscombe |      3,677   -.3634087    2.254119  -6.153762   25.72892
         hat |      3,677    .0021757    .0015322   .0007023    .027556
      cooksd |      3,677    .0019357    .0177516   6.54e-11    .964198

. correlate rraw rpearson rdeviance ranscombe
(obs=3,677)

             |     rraw rpearson rdevia~e ransco~e
-------------+------------------------------------
        rraw |   1.0000
    rpearson |   0.9792   1.0000
   rdeviance |   0.9454   0.9669   1.0000
   ranscombe |   0.9435   0.9661   0.9998   1.0000


. 
. * An R-squared measure
. capture drop yphat

. quietly poisson docvis $xlist, vce(robust)

. predict yphat, n

. quietly correlate docvis yphat

. display "Squared correlation between y and yhat = " r(rho)^2
Squared correlation between y and yhat = .1530784

. 
. * Overdispersion test against V[y|x] = E[y|x] + a*(E[y|x]^2)
. quietly poisson docvis $xlist, vce(robust)

. predict muhat, n

. quietly generate ystar = ((docvis-muhat)^2 - docvis)/muhat

. regress ystar muhat, noconstant noheader
------------------------------------------------------------------------------
       ystar |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       muhat |   .7047319   .1035926     6.80   0.000     .5016273    .9078365
------------------------------------------------------------------------------

. 
. * Newer command - use rather than countfit below  
. quietly poisson docvis $xlist

. chi2gof, cells(11) table

Chi-square Goodness-of-Fit Test for Poisson Model:

         Chi-square chi2(10) = 1103.43
         Prob>chi2           =    0.00

------------------------------------------------------------
                                       Fitted  
      Cells  Abs. Freq.  Rel. Freq.  Rel. Freq.   Abs. Dif.
------------------------------------------------------------
        0      401         .1091       .0074       .1017
        1      314         .0854       .0296       .0558
        2      358         .0974        .063       .0344
        3      334         .0908       .0954       .0045
        4      339         .0922       .1159       .0237
        5      266         .0723       .1212       .0489
        6      231         .0628        .114       .0511
        7      202         .0549       .0994       .0444
        8      179         .0487       .0821       .0335
        9      154         .0419       .0655       .0236
 10 or more    791         .2445       .2067       .0378
------------------------------------------------------------

. 
. * Poisson: Sample vs avg predicted probabilities of y = 0, 1, ..., 10 
. countfit docvis $xlist, maxcount(10) prm nograph noestimates nofit
Comparison of Mean Observed and Predicted Count

            Maximum       At      Mean
Model     Difference    Value    |Diff|
---------------------------------------------
PRM         0.102         0      0.040

PRM: Predicted and actual probabilities

Count   Actual    Predicted    |Diff|   Pearson
------------------------------------------------
0        0.109       0.007      0.102  5168.233
1        0.085       0.030      0.056   387.868
2        0.097       0.063      0.034    69.000
3        0.091       0.095      0.005     0.789
4        0.092       0.116      0.024    17.861
5        0.072       0.121      0.049    72.441
6        0.063       0.114      0.051    84.355
7        0.055       0.099      0.044    73.016
8        0.049       0.082      0.033    50.128
9        0.042       0.065      0.024    31.225
10       0.029       0.051      0.021    33.402
------------------------------------------------
Sum      0.785       0.844      0.443  5988.318

. 
. *** 2. EXTRAS: ENDOGENOUS REGRESSORS
. 
. use mus17data.dta, clear

. global xlist private medicaid age age2 educyr actlim totchr 

. 
. ***** Approach 1: Conditional Moments
. 
. * GMM (Nonlinear IV) for Poisson: using command gmm
. global zlist income ssiratio medicaid age age2 educyr actlim totchr

. gmm (docvis - exp({xb:$xlist}+{b0})), instruments($zlist) onestep nolog

Final GMM criterion Q(b) =  .0495772

GMM estimation 

Number of parameters =   8
Number of moments    =   9
Initial weight matrix: Unadjusted                 Number of obs   =      3,677

------------------------------------------------------------------------------
             |               Robust
             |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |    .592014   .3397351     1.74   0.081    -.0738545    1.257882
    medicaid |   .3186684   .1909954     1.67   0.095    -.0556757    .6930124
         age |   .3323178   .0705385     4.71   0.000     .1940648    .4705708
        age2 |   -.002176   .0004643    -4.69   0.000     -.003086   -.0012659
      educyr |   .0190887   .0092216     2.07   0.038     .0010147    .0371627
      actlim |   .2084978   .0433758     4.81   0.000     .1234827    .2935128
      totchr |    .241843   .0129869    18.62   0.000     .2163892    .2672968
-------------+----------------------------------------------------------------
         /b0 |  -11.86322   2.732853    -4.34   0.000    -17.21952   -6.506931
------------------------------------------------------------------------------
Instruments for equation 1: income ssiratio medicaid age age2 educyr actlim totchr _cons

. 
. * GMM (Nonlinear IV) for Poisson: computation using command optimize
. generate cons = 1

. local y docvis

. local xlist private medicaid age age2 educyr actlim totchr cons

. local zlist income ssiratio medicaid age age2 educyr actlim totchr cons

. mata
------------------------------------------------- mata (type end to exit) ----------------------
:   void pgmm(todo, b, y, X, Z, Qb, g, H)
>   {
>     Xb = X*b'
>     mu = exp(Xb)
>     h = Z'(y-mu)
>     W = cholinv(cross(Z,Z))
>     Qb = h'W*h
>     if (todo == 0) return
>     G = -(mu:*Z)'X
>     g = (G'W*h)'
>     if (todo == 1) return
>     H = G'W*G
>     _makesymmetric(H)
>    }

:   st_view(y=., ., "`y'")

:   st_view(X=., ., tokens("`xlist'"))

:   st_view(Z=., ., tokens("`zlist'"))

:   S = optimize_init()

:   optimize_init_which(S,"min")

:   optimize_init_evaluator(S, &pgmm())

:   optimize_init_evaluatortype(S, "d2")

:   optimize_init_argument(S, 1, y)

:   optimize_init_argument(S, 2, X)

:   optimize_init_argument(S, 3, Z)

:   optimize_init_params(S, J(1,cols(X),0))

:   optimize_init_technique(S,"nr")

:   b = optimize(S)
Iteration 0:   f(p) =  156836.04  
Iteration 1:   f(p) =  21765.741  
Iteration 2:   f(p) =  2087.4467  
Iteration 3:   f(p) =  186.55764  
Iteration 4:   f(p) =    182.298  
Iteration 5:   f(p) =  182.29546  
Iteration 6:   f(p) =  182.29541  
Iteration 7:   f(p) =  182.29541  

:   // Compute robust estimate of VCE
:   Xb = X*b'

:   mu = exp(Xb)

:   h = Z'(y-mu)

:   W = cholinv(cross(Z,Z))

:   G = -(mu:*Z)'X

:   Shat = ((y-mu):*Z)'((y-mu):*Z)*rows(X)/(rows(X)-cols(X))

:   Vb = luinv(G'W*G)*G'W*Shat*W*G*luinv(G'W*G)

:   st_matrix("b",b)

:   st_matrix("Vb",Vb)

: end
------------------------------------------------------------------------------------------------

. 
. * Nonlinear IV estimator for Poisson: formatted results
. matrix colnames b = `xlist'

. matrix colnames Vb = `xlist'

. matrix rownames Vb = `xlist'

. ereturn post b Vb

. ereturn display
------------------------------------------------------------------------------
             |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .5920658   .3401151     1.74   0.082    -.0745475    1.258679
    medicaid |   .3186961   .1912099     1.67   0.096    -.0560685    .6934607
         age |   .3323219   .0706128     4.71   0.000     .1939233    .4707205
        age2 |   -.002176   .0004648    -4.68   0.000     -.003087    -.001265
      educyr |   .0190875   .0092318     2.07   0.039     .0009935    .0371815
      actlim |   .2084997   .0434233     4.80   0.000     .1233916    .2936079
      totchr |   .2418424    .013001    18.60   0.000     .2163608     .267324
        cons |  -11.86341   2.735737    -4.34   0.000    -17.22535    -6.50146
------------------------------------------------------------------------------

. 
. ***** Approach 2: Control Function but not MLE
. 
. * Control function approach to endogeneity
. * First stage is reduced form to get predicted residuals
. global xlist2 medicaid age age2 educyr actlim totchr

. regress private $xlist2 income ssiratio, vce(robust)

Linear regression                               Number of obs     =      3,677
                                                F(8, 3668)        =     249.61
                                                Prob > F          =     0.0000
                                                R-squared         =     0.2108
                                                Root MSE          =     .44472

------------------------------------------------------------------------------
             |               Robust
     private |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
    medicaid |  -.3934477   .0173623   -22.66   0.000    -.4274884   -.3594071
         age |  -.0831201   .0293734    -2.83   0.005    -.1407098   -.0255303
        age2 |   .0005257   .0001959     2.68   0.007     .0001417    .0009098
      educyr |   .0212523   .0020492    10.37   0.000     .0172345      .02527
      actlim |  -.0300936   .0176874    -1.70   0.089    -.0647718    .0045845
      totchr |   .0185063    .005743     3.22   0.001     .0072465    .0297662
      income |   .0027416   .0004736     5.79   0.000     .0018131    .0036702
    ssiratio |  -.0647637   .0211178    -3.07   0.002    -.1061675   -.0233599
       _cons |   3.531058    1.09581     3.22   0.001       1.3826    5.679516
------------------------------------------------------------------------------

. predict lpuhat, residual

. 
. * Second-stage Poisson with robust SEs
. poisson docvis private $xlist2 lpuhat, vce(robust) nolog

Poisson regression                              Number of obs     =      3,677
                                                Wald chi2(8)      =     718.87
                                                Prob > chi2       =     0.0000
Log pseudolikelihood = -15010.614               Pseudo R2         =     0.1303

------------------------------------------------------------------------------
             |               Robust
      docvis |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .5505541   .2453175     2.24   0.025     .0697407    1.031368
    medicaid |   .2628822   .1197162     2.20   0.028     .0282428    .4975217
         age |   .3350604   .0696064     4.81   0.000     .1986344    .4714865
        age2 |  -.0021923   .0004576    -4.79   0.000    -.0030893   -.0012954
      educyr |    .018606   .0080461     2.31   0.021      .002836     .034376
      actlim |   .2053417   .0414248     4.96   0.000     .1241505     .286533
      totchr |     .24147   .0129175    18.69   0.000     .2161523    .2667878
      lpuhat |  -.4166838    .249347    -1.67   0.095    -.9053949    .0720272
       _cons |  -11.90647   2.661445    -4.47   0.000     -17.1228    -6.69013
------------------------------------------------------------------------------

. 
. * Program and bootstrap for Poisson two-step estimator
. program endogtwostep, eclass
  1.   version 10.1
  2.   tempname b
  3.   capture drop lpuhat2
  4.   regress private $xlist2 income ssiratio
  5.   predict lpuhat2, residual
  6.   poisson docvis private $xlist2 lpuhat2
  7.   matrix `b' = e(b)
  8.   ereturn post `b'
  9. end

. bootstrap _b, reps(400) seed(10101) nodots nowarn: endogtwostep

Bootstrap results                               Number of obs     =      3,677
                                                Replications      =        400

------------------------------------------------------------------------------
             |   Observed   Bootstrap                         Normal-based
             |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     private |   .5505541   .2567815     2.14   0.032     .0472716    1.053837
    medicaid |   .2628822   .1205813     2.18   0.029     .0265473    .4992172
         age |   .3350604   .0707275     4.74   0.000     .1964371    .4736838
        age2 |  -.0021923   .0004667    -4.70   0.000    -.0031071   -.0012776
      educyr |    .018606   .0083042     2.24   0.025     .0023301     .034882
      actlim |   .2053417   .0412756     4.97   0.000      .124443    .2862405
      totchr |     .24147   .0134522    17.95   0.000     .2151042    .2678359
     lpuhat2 |  -.4166838   .2617964    -1.59   0.111    -.9297953    .0964276
       _cons |  -11.90647   2.698704    -4.41   0.000    -17.19583   -6.617104
------------------------------------------------------------------------------

. 
. ***** Approach 3: Fully structural MLE 
. 
. * Fully structural model of endogeneity
. gsem (docvis <- private $xlist2 L, poisson) ///
>    (private <- $xlist2 income ssiratio L), nolog

Generalized structural equation model           Number of obs     =      3,677

Response       : docvis
Family         : Poisson
Link           : log

Response       : private
Family         : Gaussian
Link           : identity

Log likelihood = -12796.769

 ( 1)  [docvis]L = 1
--------------------------------------------------------------------------------
               |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
---------------+----------------------------------------------------------------
docvis         |
       private |   .6331611   .2211577     2.86   0.004     .1997001    1.066622
      medicaid |   .2675884   .1007469     2.66   0.008      .070128    .4650487
           age |   .3687153   .0671669     5.49   0.000     .2370706      .50036
          age2 |  -.0023977   .0004442    -5.40   0.000    -.0032682   -.0015271
        educyr |   .0176344   .0073809     2.39   0.017     .0031681    .0321006
        actlim |   .1852541   .0383531     4.83   0.000     .1100835    .2604248
        totchr |   .3038513   .0130363    23.31   0.000     .2783006     .329402
             L |          1  (constrained)
         _cons |  -13.71873   2.545692    -5.39   0.000     -18.7082   -8.729267
---------------+----------------------------------------------------------------
private        |
      medicaid |  -.3945375   .0216178   -18.25   0.000    -.4369076   -.3521673
           age |  -.0829381   .0293255    -2.83   0.005    -.1404151   -.0254611
          age2 |   .0005247    .000195     2.69   0.007     .0001424    .0009069
        educyr |   .0213581   .0021595     9.89   0.000     .0171255    .0255907
        actlim |  -.0297923   .0176412    -1.69   0.091    -.0643686    .0047839
        totchr |   .0185451   .0058515     3.17   0.002     .0070763    .0300138
        income |   .0026301    .000402     6.54   0.000     .0018422    .0034179
      ssiratio |  -.0724733   .0211615    -3.42   0.001    -.1139492   -.0309975
             L |  -.1355801   .0580826    -2.33   0.020    -.2494199   -.0217404
         _cons |   3.528705   1.095672     3.22   0.001     1.381227    5.676184
---------------+----------------------------------------------------------------
         var(L)|   .6679536   .0465516                      .5826712    .7657183
---------------+----------------------------------------------------------------
 var(e.private)|   .1850348   .0119217                      .1630837    .2099404
--------------------------------------------------------------------------------

. 
. ********** CLOSE OUTPUT
. 
. * log close
. * clear
. * exit
. 
. 
end of do-file