* AED16.do March 2015 For Stata version 12
capture log close // capture means program continues even if no log file open
log using AED16.txt, text replace
********** OVERVIEW OF AED16.do **********
* STATA Program
* copyright C 2015 by A. Colin Cameron
* Used for "Analyis of Economics Data: An Introduction to Econometrics"
* by A. Colin Cameron (2015) W.W. Norton
* To run you need file
* AED_EARNINGSAGE.DTA
* in your directory
********** SETUP **********
set more off
version 12
clear all
set mem 10m
* set linesize 82
set scheme s1manual /* Graphics scheme */
* aed16.do March 2015 For Stata version 12
capture log close // capture means program continues even if no log file open
log using aed16.txt, text replace
********** OVERVIEW OF aed16.do **********
* STATA Program
* copyright C 2012 by A. Colin Cameron
* Used for "Analyis of Economics Data: AN Introduction to Econometrics"
* by A. Colin Cameron (2012) W.W. Norton
* To run you need file
* AED_EARNINGSAGE.DTA
* in your directory
********** SETUP **********
set more off
version 12
clear all
set mem 10m
* set linesize 82
set scheme s1manual /* Graphics scheme */
************
* This STATA does analysis for Chapter 11
* 16.1 DATA EXAMPLE: EARNINGS, EDUCATION, AGE and WORKER TYpE
* 16.2 SINGLE INDICATOR VARIABLE
* 16.3 SETS OF INDICATOR VARIABLES
* 16.4 QUADRATIC MODEL AND POLYNOMIAL MODEL
* 16.5 INTERACTED REGRESSORS
* 16.6 NATURAL LOGARITHMS
**** 16.1 DATA EXAMPLE: EARNINGS, EDUCATION, AGE and WORKER TYpE
clear
use AED_EARNINGSAGE.DTA
describe
summarize
* Table 16.1
describe earnings age education dself dprivate dgovt agesq educbyage hours lnhours lnearnings
summarize earnings age education dself dprivate dgovt agesq educbyage hours lnhours lnearnings
**** 16.2 SINGLE INDICATOR VARIABLE
** Figure 16.1
* generated data
clear
set obs 60
set seed 10101
generate x = 1 + 2.5*_n/60 + rnormal(0,0.4)
replace x = -x if x < 0
generate d = runiform() > 0.5
generate dx = d*x
generate y1 = 1 + 3*d + x + rnormal(0,1)
generate y1true = 1 + 3*d + x
generate y2 = 1 + 2*d + 0.5*x + 1*x*d + rnormal(0,1)
generate y2true = 1 + 2*d + 0.5*x + 1*x*d
sort x
* Figure 16.1 - two panels
twoway (scatter y1 x if d==1) (line y1true x if d==1) (scatter y1 x if d==0) (line y1true x if d==0)
twoway (scatter y2 x if d==1) (line y2true x if d==1) (scatter y2 x if d==0) (line y2true x if d==0)
clear
use AED_EARNINGSAGE.DTA
describe
summarize
* With regressors
regress earnings dself age education
**** 16.3 SETS OF INDICATOR VARIABLES
* Base with no indicator variables
regress earnings age education
estimates store NOINDIC
* Reference group is self-employed
regress earnings age education dprivate dgovt
estimates store NOSELF
test dprivate dgovt
* Reference group is private
regress earnings age education dself dgovt
estimates store NOPRIVATE
test dself dgovt
* Reference group is government
regress earnings age education dself dprivate
estimates store NOGOVT
test dself dprivate
* No intercept
regress earnings age education dself dprivate dgovt, noconstant
estimates store NOINT
test (dself=dprivate) (dself=dgovt)
test (dself=dprivate) (dprivate=dgovt)
* Table 16.2
estimates table NOINDIC NOSELF NOPRIVATE NOGOVT NOINT, ///
keep(age education dself dprivate dgovt _cons) ///
b(%11.4f) se(%11.3f) t(%11.2f) stat(N r2 r2_a rmse F)
** Difference in means
* No regressors and drop self-employed
regress earnings dprivate dgovt
estimates store NOREG1
* No regressors and drop intercept
regress earnings dself dprivate dgovt, noconstant
estimates store NOREG2
estimates table NOREG1 NOREG2, keep(dself dprivate dgovt _cons) ///
b(%11.4f) se(%11.3f) t(%11.2f) stat(N r2 r2_a rmse F)
* ANOVA gives the same F statistic
gen typeworker = 1*dself + 2*dprivate + 3*dgovt
bysort typeworker: sum earnings
anova earnings typeworker
**** 16.4 QUADRATIC AND POLYNOMIAL MODELS
*** Examples of Quadratic models
** Figure 16.2
clear
set obs 100
generate x = 1 + 3*_n/100
generate y1 = 1 + 4*(x-2.5)^2
generate y2 = 1 + (x-4)^2
generate y3 = 1 + (x-1)^2
generate y4 = 10 - 4*(x-2.5)^2
generate y5 = 10 - (x-4)^2
generate y6 = 10 - (x-1)^2
* Figure 16.2 - six panels
twoway (line y1 x)
twoway (line y2 x)
twoway (line y3 x)
twoway (line y4 x)
twoway (line y5 x)
twoway (line y6 x)
*** Quadratic Model for Earnings on Age with Education as a regressor as well
clear
use AED_EARNINGSAGE.DTA
describe
summarize
* Linear Model
regress earnings age education
* Quadratic model
regress earnings age agesq education
di "Turning point is at " -_b[age]/(2*_b[agesq])
gen mequad = _b[age] + 2*_b[agesq]*age
sum mequad
sum mequad if age == 25
sum mequad if age == 65
sum age
di "MEM = " _b[age] + 2*_b[agesq]*r(mean)
**** 16.5 INTERACTED REGRESSORS
* No interaction
regress earnings age education
estimates store BASE
* Regression with interactions
regress earnings age education educbyage
estimates store INTERACT
test education educbyage
* Joint test for statistical significance of education
test education educbyage
* Joint test for statistical significance of age
test age educbyage
* THe regressors are highly correlated
cor educbyage age education
* ME for education using margins
regress earnings c.education##c.age
* AME
margins, dydx(*)
* MEM
margins, dydx(*) atmean
* MER marginal effect at a representative value e.g. age=25
margins, dydx(*) at(age=25)
* ME for education done manually
quietly regress earnings age education educbyage
* MEM
quietly sum age
scalar meanage = r(mean)
lincom _b[education] + meanage*_b[educbyage]
di "MEM = " _b[education] + meanage*_b[educbyage]
* AME
gen ME_Ed = _b[education] + age*_b[educbyage]
mean ME_Ed
* MER
quietly regress earnings age education educbyage
di "ME of education at age 25 = " _b[education] + 25*_b[educbyage]
di "ME of education at age 65 = " _b[education] + 65*_b[educbyage]
**** 16.6 NATURAL LOGARITHMS
* Levels as dependent variable
regress earnings age education dself dgovt lnhours
predict pearnings
* Natural logarithm as dependent variable
regress lnearnings age education dself dgovt lnhours
predict plnearnings
* Retransformation bias
di "s_e = " e(rmse) " and exp(s_e^2/2) = " exp(e(rmse)^2/2)
generate biasedpearnings = exp(plnearnings)
generate correctedpearnings = exp(e(rmse)^2/2)*biasedpearnings
sum earnings pearnings biasedpearnings correctedpearnings
correlate earnings pearnings correctedpearnings
********** CLOSE OUTPUT
log close