* AED07.do March 2015 For Stata version 12 

capture log close   // capture means program continues even if no log file open
log using AED07.txt, text replace

********** OVERVIEW OF AED07.do **********

* STATA Program 
* copyright C 2015 by A. Colin Cameron
* Used for "Analyis of Economics Data: An Introduction to Econometrics"
* by A. Colin Cameron (2015) W.W. Norton

* To run you need file
*   AED_EARNINGS.DTA  Same as chapter 2 
*   AED_EARNINGSBOTH.DTA  
* in your directory

********** SETUP **********

set more off
version 12
clear all
set mem 10m
* set linesize 82
set scheme s1manual  /* Graphics scheme */

************

* This STATA program does analysis for Chapter 6
*   7.1 ONE-SIDED HYPOTHESIS TESTS
*   7.2 GENERALIZATIONS
*   7.3 DIFFERENCE IN TWO MEANS
*   7.4 PROPORTIONS DATA

**** 7.1 ONE-SIDED HYPOTHESIS TESTS

clear
use AED_EARNINGS.DTA
summarize earnings

* Test H0: mu >= 40000 against HA: mu < 40000
ttest earnings = 40000

* Figure 7.1 - created elsewhere

**** 7.3 DIFFERENCE IN TWO MEANS

clear
use AED_EARNINGSBOTH.DTA
summarize

* First sample female  
summarize earnings if gender == 0
scalar n1 = r(N)
scalar mean1 = r(mean)
scalar s1 = r(sd)

* Second sample male
summarize earnings if gender == 1
scalar n2 = r(N)
scalar mean2 = r(mean)
scalar s2 = r(sd)

* Calculate difference in mean
scalar meandiff = mean1 - mean2

* Calculate standard error of difference in mean (for unequal variances)
scalar varofmeandiff = (s1^2)/n1 + (s2^2)/n2
scalar sterrorofmeandiff = sqrt(varofmeandiff)

* METHOD 1 - Used in book   use n1 + n2 - 2 degrees of freedom 
* This needs to be imlemented manually
* 95% confidence interval using n1 + n2 - 2 degrees of freedom 
scalar vbasic = n1 + n2 -2 
scalar tcrit = invttail(vbasic,.025)
scalar intwidth = tcrit*sterrorofmeandiff
display "Difference in means = " meandiff _n "t_.025;vbasic = " tcrit _n ///
  "standard error = " sterrorofmeandiff _n "interval width = " intwidth _n ///
  "95% confidence interval = (" meandiff-intwidth ", "  meandiff+intwidth ")"

* METHOD 2 - Used by Stata command ttest command with option unequal
* This ahs same se as above but with Satterthwaites' formula for degrees of freedom
ttest earnings, by(gender) unequal

**** 7.4 PROPORTIONS DATA

* Create 480 ones (vote Democrat) and 441 zeros (vote Republican)
clear
set obs 921
generate x = 0
replace x = 1 if _n <= 480
summarize x

* METHOD 1 - Used in book
* Hypothesis test using Stata command - same as manual above
prtest x == .5

* METHOD 1 - Repeated manually 
* Inference using Bernoulli results - this divides by n and not n-1 in getting standard error
sum x 
scalar seBern = sqrt(r(mean)*(1-r(mean))/r(N))
scalar cihalfwidth = invnormal(.975)*seBern
di _n "Confidence interval = ("  r(mean)-cihalfwidth  ", " r(mean)+cihalfwidth ")" 
scalar seH0 = sqrt(.5*(1-.5)/r(N))
scalar z = (r(mean)-.5)/seH0
di "z = " z "  p = " 2*(1-normal(abs(z)))

********** CLOSE OUTPUT
log close



