* AED16.do March 2015 For Stata version 12 

capture log close   // capture means program continues even if no log file open
log using AED16.txt, text replace

********** OVERVIEW OF AED16.do **********

* STATA Program 
* copyright C 2015 by A. Colin Cameron
* Used for "Analyis of Economics Data: An Introduction to Econometrics"
* by A. Colin Cameron (2015) W.W. Norton

* To run you need file
*   AED_EARNINGSAGE.DTA
* in your directory

********** SETUP **********

set more off
version 12
clear all
set mem 10m
* set linesize 82
set scheme s1manual  /* Graphics scheme */

* aed16.do March 2015 For Stata version 12

capture log close   // capture means program continues even if no log file open
log using aed16.txt, text replace

********** OVERVIEW OF aed16.do **********

* STATA Program 
* copyright C 2012 by A. Colin Cameron
* Used for "Analyis of Economics Data: AN Introduction to Econometrics"
* by A. Colin Cameron (2012) W.W. Norton

* To run you need file
*   AED_EARNINGSAGE.DTA
* in your directory

********** SETUP **********

set more off
version 12
clear all
set mem 10m
* set linesize 82
set scheme s1manual  /* Graphics scheme */

************

* This STATA does analysis for Chapter 11
*  16.1 DATA EXAMPLE: EARNINGS, EDUCATION, AGE and WORKER TYpE
*  16.2 SINGLE INDICATOR VARIABLE
*  16.3 SETS OF INDICATOR VARIABLES
*  16.4 QUADRATIC MODEL AND POLYNOMIAL MODEL
*  16.5 INTERACTED REGRESSORS
*  16.6 NATURAL LOGARITHMS

****  16.1 DATA EXAMPLE: EARNINGS, EDUCATION, AGE and WORKER TYpE

clear
use AED_EARNINGSAGE.DTA
describe 
summarize

* Table 16.1
describe earnings age education dself dprivate dgovt agesq educbyage hours lnhours lnearnings
summarize earnings age education dself dprivate dgovt agesq educbyage hours lnhours lnearnings

****  16.2 SINGLE INDICATOR VARIABLE

** Figure 16.1
* generated data
clear
set obs 60
set seed 10101
generate x = 1 + 2.5*_n/60 + rnormal(0,0.4)
replace x = -x if x < 0
generate d = runiform() > 0.5
generate dx = d*x
generate y1 = 1 + 3*d + x + rnormal(0,1)
generate y1true = 1 + 3*d + x
generate y2 = 1 + 2*d + 0.5*x + 1*x*d + rnormal(0,1)
generate y2true = 1 + 2*d + 0.5*x + 1*x*d
sort x
* Figure 16.1 - two panels
twoway (scatter y1 x if d==1) (line y1true x if d==1) (scatter y1 x if d==0) (line y1true x if d==0) 
twoway (scatter y2 x if d==1) (line y2true x if d==1) (scatter y2 x if d==0) (line y2true x if d==0)

clear
use AED_EARNINGSAGE.DTA
describe 
summarize

* With regressors 
regress earnings dself age education

****  16.3 SETS OF INDICATOR VARIABLES

* Base with no indicator variables
regress earnings age education
estimates store NOINDIC

* Reference group is self-employed
regress earnings age education dprivate dgovt
estimates store NOSELF
test dprivate dgovt

* Reference group is private
regress earnings age education dself dgovt 
estimates store NOPRIVATE
test dself dgovt

* Reference group is government
regress earnings age education dself dprivate 
estimates store NOGOVT
test dself dprivate

* No intercept
regress earnings age education dself dprivate dgovt, noconstant
estimates store NOINT
test (dself=dprivate) (dself=dgovt)
test (dself=dprivate) (dprivate=dgovt)

* Table 16.2
estimates table NOINDIC NOSELF NOPRIVATE NOGOVT NOINT, ///
 keep(age education dself dprivate dgovt _cons) ///
 b(%11.4f) se(%11.3f) t(%11.2f) stat(N r2 r2_a rmse F) 

** Difference in means

* No regressors and drop self-employed
regress earnings dprivate dgovt
estimates store NOREG1

* No regressors and drop intercept 
regress earnings dself dprivate dgovt, noconstant
estimates store NOREG2

estimates table NOREG1 NOREG2, keep(dself dprivate dgovt _cons) ///
 b(%11.4f) se(%11.3f) t(%11.2f) stat(N r2 r2_a rmse F) 

* ANOVA gives the same F statistic
gen typeworker = 1*dself + 2*dprivate + 3*dgovt
bysort typeworker: sum earnings
anova earnings typeworker


****  16.4 QUADRATIC AND POLYNOMIAL MODELS

*** Examples of Quadratic models 
** Figure 16.2
clear
set obs 100
generate x = 1 + 3*_n/100
generate y1 = 1 + 4*(x-2.5)^2
generate y2 = 1 + (x-4)^2
generate y3 = 1 + (x-1)^2
generate y4 = 10 - 4*(x-2.5)^2
generate y5 = 10 - (x-4)^2
generate y6 = 10 - (x-1)^2
* Figure 16.2 - six panels
twoway (line y1 x)
twoway (line y2 x)
twoway (line y3 x)
twoway (line y4 x)
twoway (line y5 x)
twoway (line y6 x)

*** Quadratic Model for Earnings on Age with Education as a regressor as well
clear
use AED_EARNINGSAGE.DTA
describe
summarize

* Linear Model
regress earnings age education

* Quadratic model
regress earnings age agesq education
di "Turning point is at " -_b[age]/(2*_b[agesq])
gen mequad = _b[age] + 2*_b[agesq]*age
sum mequad
sum mequad if age == 25
sum mequad if age == 65
sum age
di "MEM = " _b[age] + 2*_b[agesq]*r(mean)

****  16.5 INTERACTED REGRESSORS

* No interaction
regress earnings age education
estimates store BASE

* Regression with interactions
regress earnings age education educbyage
estimates store INTERACT
test education educbyage

* Joint test for statistical significance of education
test education educbyage
* Joint test for statistical significance of age
test age educbyage

* THe regressors are highly correlated
cor educbyage age education

* ME for education using margins 
regress earnings c.education##c.age
* AME
margins, dydx(*)
* MEM
margins, dydx(*) atmean
* MER marginal effect at a representative value e.g. age=25
margins, dydx(*) at(age=25)

* ME for education done manually
quietly regress earnings age education educbyage
* MEM
quietly sum age
scalar meanage = r(mean)
lincom _b[education] + meanage*_b[educbyage]
di "MEM = " _b[education] + meanage*_b[educbyage]
* AME
gen ME_Ed = _b[education] + age*_b[educbyage]
mean ME_Ed
* MER 
quietly regress earnings age education educbyage
di "ME of education at age 25 = " _b[education] + 25*_b[educbyage]
di "ME of education at age 65 = " _b[education] + 65*_b[educbyage]

****  16.6 NATURAL LOGARITHMS

* Levels as dependent variable
regress earnings age education dself dgovt lnhours
predict pearnings

* Natural logarithm as dependent variable
regress lnearnings age education dself dgovt lnhours
predict plnearnings 

* Retransformation bias
di "s_e = " e(rmse) " and exp(s_e^2/2) = " exp(e(rmse)^2/2)
generate biasedpearnings = exp(plnearnings)
generate correctedpearnings = exp(e(rmse)^2/2)*biasedpearnings
sum earnings pearnings biasedpearnings correctedpearnings
correlate earnings pearnings correctedpearnings


********** CLOSE OUTPUT
log close


