* AED12.do March 2015 For Stata version 12 

capture log close   // capture means program continues even if no log file open
log using AED12.txt, text replace

********** OVERVIEW OF AED12.do **********

* STATA Program 
* copyright C 2015 by A. Colin Cameron
* Used for "Analyis of Economics Data: An Introduction to Econometrics"
* by A. Colin Cameron (2015) W.W. Norton

* To run you need file
*   AED_EARNINGSAGE.DTA
* in your directory

********** SETUP **********

set more off
version 12
clear all
set mem 10m
* set linesize 82
set scheme s1manual  /* Graphics scheme */

************

* This STATA does analysis for Chapter 12
*  12.1 DATA EXAMPLE 
*  12.2 INDICATOR VARIABLE
*  12.3 NONLINEAR RELATIONSHIPS
*  12.4 TRANSFORMATION OF THE REGRESSOR
*  12.5 TRANSFORMATION OF THE DEPENDENT VARIABLE
*  12.6 ELASTICITIES
*  12.7 MODEL COMPARISON

****  12.1 DATA EXAMPLE 

clear
use AED_EARNINGSAGE.DTA
describe
summarize

* Table 12.1
sum earnings age agesq

* Linear Model
regress earnings age
predict pearnlin
generate melin = _b[age]
estimates store LINEAR
regress earnings age, vce(robust)
estimates store LINhet

* Figure 12.1 - first panel
sort age
lowess earnings age, generate(ylowess) 
graph twoway (scatter earnings age if earnings < 150000) (lfit earnings age) (line ylowess age)

****  12.2 INDICATOR VARIABLE

* Indicator Variable Model
regress earnings dage
estimates store DUMMY
regress earnings dage, vce(robust)

* Compare to difference in means
summarize earnings if dage == 1
summarize earnings if dage == 0

* Difference in means done manually
summarize earnings if dage == 0
scalar n0 = r(N)
scalar s0 = r(sd)
summarize earnings if dage == 1
scalar n1 = r(N)
scalar s1 = r(sd)
* Calculate standard error of difference in mean (for unequal variances)
scalar varofmeandiff = (s0^2)/n0 + (s1^2)/n1
scalar sterrorofmeandiff = sqrt(varofmeandiff)
di "standard error = " sterrorofmeandiff

* Difference in means test by Stata
ttest earnings, by(dage) unequal

* Figure 12.1 - second panel
graph twoway (scatter earnings dage if earnings < 150000) (lfit earnings dage) 

**** 12.3 NONLINEAR RELATIONSHIPS

** Figure 12.2
* generated data
clear
set obs 30
set seed 23456
* generate x = rnormal(3,1)
generate x = 1 + 5*_n/30 + rnormal(0,.4)
replace x = -x if x < 0
generate y1 = -2 + 2.8*x + rnormal(0,0.4)
generate y2 = 2 - 0.4*x + 0.4*x^2 + rnormal(0,0.4)
generate lnx = ln(x)
generate y3 = -1 + 9*ln(x) + rnormal(0,0.2)
generate lny4 = 0.2 + 0.40*x + rnormal(0,0.04)
generate y4 = exp(lny4)
generate lny5 = ln(0.06) + 3*ln(x) + rnormal(0,0.06)
generate y5 = exp(lny5)
* Figure 12.2 - three panels
graph twoway (scatter y3 x) (lfit y3 x)
graph twoway (scatter y4 x) (lfit y4 x)
graph twoway (scatter y5 x) (lfit y5 x)

** Figure 12.3 
* generated data
clear
set obs 1000
gen x = 0 + 3.5*_n/1000
gen y = 12 - 2*(x-3)^2
replace y = . if x < 1
gen tangent = 2 + 4*x
replace tangent = . if x < 1
gen discrete = .
replace discrete = 6 + 2*x if (x > 2 & x < 3)
* Figure 12.3 - two panels
twoway (line y x) (line tangent x)
twoway (line y x) (line discrete x)

****  12.4 TRANSFORMATIONS OF THE REGRESSOR

clear
use AED_EARNINGSAGE.DTA

* Linear log
regress earnings lnage
estimates store LINEARLOG

* Marginal effects
di "MER at age=25: " _b[lnage]/25
di "MER at age=65: " _b[lnage]/65
quietly sum age
di "MEM at mean age " r(mean) " = " _b[lnage]/r(mean)
* ME Done manually for each observation
quietly regress earnings lnage
generate melinlog = _b[lnage]/age
quietly sum melinlog
di "AME = " r(mean)

****  12.5 TRANSFORMATIONS OF THE DEPENDENT VARIABLE

**** Log linear
regress lnearnings age
estimates store LOGLINEAR

* Transformation bias
predict lnyhat
di "Multiply by " exp(0.5*e(rmse)^2)
generate yhatwrong = exp(lnyhat)
generate yhatcorrect = exp(0.5*e(rmse)^2)*yhatwrong
di "Correction multiple = " exp(0.5*e(rmse)^2)
summarize earnings yhatwrong yhatcorrect

* Check normality of residuals
predict uhat, resid
sum uhat
drop uhat

* Marginal effects
di "MER at age=25: " _b[age]*exp(0.5*e(rmse)^2)*exp(_b[_cons]+_b[age]*25)
di "MER at age=65: " _b[age]*exp(0.5*e(rmse)^2)*exp(_b[_cons]+_b[age]*65)
quietly sum age
di "MEM at mean age " r(mean) " = " _b[age]*exp(0.5*e(rmse)^2)*exp(_b[_cons]+_b[age]*r(mean))
* ME Done manually for each observation
generate meloglin = _b[age]*yhatcorrect
sum meloglin
di "AME = " r(mean)

**** Log log
regress lnearnings lnage
estimates store LOGLOG

* Transformation bias
predict lnyhat2
di "Multiply by " exp(0.5*e(rmse)^2)
generate yhatwrong2 = exp(lnyhat2)
generate yhatcorrect2 = exp(0.5*e(rmse)^2)*yhatwrong2
summarize earnings yhatwrong2 yhatcorrect2

* Marginal effects
di "MER at age=25: " _b[lnage]*exp(0.5*e(rmse)^2)*exp(_b[_cons]+_b[lnage]*ln(25))/25
di "MER at age=65: " _b[lnage]*exp(0.5*e(rmse)^2)*exp(_b[_cons]+_b[lnage]*ln(65))/65
quietly sum age
di "MEM at mean age " r(mean) " = " _b[lnage]*exp(0.5*e(rmse)^2)*exp(_b[_cons]+_b[lnage]*ln(r(mean)))/r(mean)
* ME Done manually for each observation
quietly regress lnearnings lnage
generate meloglog = _b[lnage]*yhatcorrect2/age
sum meloglog
di "AME = " r(mean)

****  12.6 ELASTICITIES

* Models already estimated

****  12.7 MODEL COMPARISON

* Redo as dropped after earlier clear
regress earnings age
estimates store LINEAR
regress earnings dage
estimates store DUMMY

* Log linear for dage
regress lnearnings dage
estimates store LOGDUMMY

* Table 12.3
estimates table LINEAR LINEARLOG LOGLINEAR LOGLOG DUMMY LOGDUMMY, ///
   b(%10.4f) t(%9.2f) stat(r2)
* Marginal effects - Average min and max
summarize melinlog meloglin meloglog

* Prediction
quietly regress earnings age agesq
predict yhat
correlate earnings yhatcorrect2 yhat

* Compare predictions on the basis of correlation of y and yhat
quietly regress earnings age
predict yhatlinear
quietly regress earnings lnage
predict yhatlinlog
correlate earnings yhatlinear yhatlinlog yhatcorrect yhatcorrect2

********** CLOSE OUTPUT
log close


