------------------------------------------------------------------------------------------------------
       log:  c:\Imbook\bwebpage\Section2\mma09p2npmore.txt
  log type:  text
 opened on:  17 May 2005, 14:17:35

. 
. ********** OVERVIEW OF MMA09P2NPMORE.DO **********
. 
. * STATA Program 
. * copyright C 2005 by A. Colin Cameron and Pravin K. Trivedi 
. * used for "Microeconometrics: Methods and Applications" 
. * by A. Colin Cameron and Pravin K. Trivedi (2005)
. * Cambridge University Press 
. 
. * Chapter 9.4-9.5 (pages 307-19)
. * More on nonparametric regression, including Figures 9.5 - 9.7
. 
. * It provides
. * (1) Nonparametric regression
. *       k-nearest neighbors regression: Figure 9.5 in chapter 9.4.2 (ch9ksmma)
. *       Lowess regression: Figure 9.6 in chapter 9.4.3 (ch9ksmlowess)
. *       Kernel regression (using Stata add-on kernreg)
. * (2) Nonparametric derivative estimation
. *       Figure 9.7 in chapter 9.5.5 (ch9kderiv)  
. * (3) Cross-validation - still incomplete
. * using generated data (see below)
. 
. * See also mma09p1np.do for nonparametric density estimation and regression 
. 
. * This program uses free Stata add-on command kernreg
. * To obtain in Stata give command search kernreg
. 
. ********** SETUP **********
. 
. di "mma09p2npmore.do Cameron and Trivedi: Stata nonparametrics with generated data"
mma09p2npmore.do Cameron and Trivedi: Stata nonparametrics with generated data

. set more off

. version 8.0

. set scheme s1mono  /* Graphics scheme */

.   
. ********** GENERATE DATA  **********
. 
. * Model is  y = 150 + 6.5*x - 0.15*x^2 + 0.001*x^3 + u
. * where     u ~ N[0, 25^2]
. *           x = 1, 2, 3, ... , 100
. *           e ~ N[0, 2^2]
. 
. set seed 10101

. set obs 100
obs was 0, now 100

. gen u = 25*invnorm(uniform())

. gen x = _n

. gen y = 150 + 6.5*x - 0.15*x^2 + 0.001*x^3 + u

. sum

    Variable |       Obs        Mean    Std. Dev.       Min        Max
-------------+--------------------------------------------------------
           u |       100    2.809606    25.26291  -71.97334   73.59318
           x |       100        50.5    29.01149          1        100
           y |       100    228.5596    35.25377   132.2952   345.5873

. 
. * Write data to a text (ascii) file so can use with programs other than Stata
. outfile y x using mma09p2npmore.asc, replace

. 
. ******** PARAMETRIC REGRESSION **********
. 
. * OLS regression on cubic polymomial
. gen xsquared = x^2

. gen xcubed = x^3

. reg y x xsquared xcubed

      Source |       SS       df       MS              Number of obs =     100
-------------+------------------------------           F(  3,    96) =   31.15
       Model |  60691.6801     3    20230.56           Prob > F      =  0.0000
    Residual |  62348.2994    96  649.461452           R-squared     =  0.4933
-------------+------------------------------           Adj R-squared =  0.4774
       Total |   123039.98    99  1242.82808           Root MSE      =  25.485

------------------------------------------------------------------------------
           y |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
           x |   6.055295   .9033915     6.70   0.000     4.262077    7.848513
    xsquared |  -.1402283   .0207284    -6.77   0.000    -.1813738   -.0990828
      xcubed |   .0009492   .0001349     7.03   0.000     .0006814    .0012171
       _cons |   155.1521   10.58835    14.65   0.000     134.1344    176.1698
------------------------------------------------------------------------------

. predict ycubic
(option xb assumed; fitted values)

. summarize y ycubic

    Variable |       Obs        Mean    Std. Dev.       Min        Max
-------------+--------------------------------------------------------
           y |       100    228.5596    35.25377   132.2952   345.5873
      ycubic |       100    228.5596    24.75979   161.0681   307.6293

. 
. ******** (1) NONPARAMETRIC REGRESSION **********
. 
. * K-NEAREST NEIGHBORS REGRESSION - FIGURE 9.5
. * ksm without options gives running mean = moving average = centered kNN
. * Here _N = 100 so bwidth = 0.05 gives 100*0.05 = 5 nearest neighbours
. graph twoway (scatter y x, msize(medsmall) msymbol(o)) /*
>   */ (lowess y x, mean noweight bwidth(0.05) clstyle(p1)) /* 
>   */ (lfit y x, clstyle(p3)) /*
>   */ (lowess y x, mean noweight bwidth(0.25) clstyle(p2)), /*  
>   */ scale (1.2) plotregion(style(none)) /*
>   */ title("k-Nearest Neighbours Regression as k Varies") /*
>   */ xtitle("Regressor x", size(medlarge)) xscale(titlegap(*5)) /* 
>   */ ytitle("Dependent variable y", size(medlarge)) yscale(titlegap(*5)) /*
>   */ legend(pos(12) ring(0) col(1)) legend(size(small)) /*
>   */ legend( label(1 "Actual Data") label(2 "kNN (k=5)") /*
>   */         label(3 "Linear OLS") label(4 "kNN (k=25)")) 

. graph save ch9ksmma, replace
(file ch9ksmma.gph saved)

. graph export ch9ksmma.wmf, replace
(file c:\Imbook\bwebpage\Section2\ch9ksmma.wmf written in Windows Metafile format)

. 
. * VERIFY THAT kNN SAME AS MOVING AVERAGE  
. * Do moving average by hand for k = 5
. gen yma5 = (y[_n-2] + y[_n-1] + y + y[_n+1] + y[_n+2])/5
(4 missing values generated)

. replace yma5 = (y[_n]+y[_n+1]+y[_n+2])/3 if _n==1
(1 real change made)

. replace yma5 = (y[_n-1]+y[_n]+y[_n+1]+y[_n+2])/4 if _n==2
(1 real change made)

. replace yma5 = (y[_n+1]+y[_n]+y[_n-1]+y[_n-2])/4 if _n==99
(1 real change made)

. replace yma5 = (y[_n]+y[_n-1]+y[_n-2])/3 if _n==100
(1 real change made)

. lowess y x, mean noweight bwidth(0.05) nogr gen(yknn5)

. sum yma5 yknn5

    Variable |       Obs        Mean    Std. Dev.       Min        Max
-------------+--------------------------------------------------------
        yma5 |       100    228.6037    26.63323   157.1434   297.4832
       yknn5 |       100    228.6037    26.63323   157.1434   297.4832

. 
. * LOWESS REGRESSION - FIGURE 9.6
. graph twoway (scatter y x, msize(medsmall) msymbol(o)) /*
>   */ (lowess y x, bwidth(0.25) clstyle(p1)) /* 
>   */ (line ycubic x, clstyle(p3)), /*
>   */ scale (1.2) plotregion(style(none)) /*
>   */ title("Lowess Nonparametric Regression") /*
>   */ xtitle("Regressor x", size(medlarge)) xscale(titlegap(*5)) /* 
>   */ ytitle("Dependent variable y", size(medlarge)) yscale(titlegap(*5)) /*
>   */ legend(pos(12) ring(0) col(1)) legend(size(small)) /*
>   */ legend( label(1 "Actual Data") label(2 "Lowess (k=25)") /*
>   */         label(3 "OLS Cubic Regression") )

. graph save ch9ksmlowess, replace
(file ch9ksmlowess.gph saved)

. graph export ch9ksmlowess.wmf, replace
(file c:\Imbook\bwebpage\Section2\ch9ksmlowess.wmf written in Windows Metafile format)

. 
. * KERNEL REGRESSION COMPARED TO k NEAREST NEIGHBORS REGRESSION
. * For this artificial example (with equally spaced x) 
. *  knn = kernel regression using uniform prior 
. * Kercode 1 = Uniform; 2 = Triangle; 3 = Epanechnikov; 4 = Quartic (Biweight);
. *         5 = Triweight; 6 = Gaussian; 7 = Cosinus
. * bwidth(#) defines width of the weight function window around each grid point.
. * npoint(#) specifies the number of equally spaced grid points over range of x.
. * Here bwidth(12) gives e.g. positive weight from x=15 to x=39 if current x=37  
. kernreg y x, bwidth(12) kercode(1) npoint(100) ylabel gen(pykernreg xkernreg)

. lowess y x, mean noweight bwidth(0.25) gen(yknn25)

. sum pykernreg yknn25

    Variable |       Obs        Mean    Std. Dev.       Min        Max
-------------+--------------------------------------------------------
   pykernreg |       100    228.6856    18.75275   181.1579   272.5488
      yknn25 |       100    228.6856    18.75275   181.1578   272.5488

. 
. ******** (2) DERIVATIVE ESTIMATION **********
. 
. * DERIVATIVE ESTIMATION - FIGURE 9.7
. * Here use Lowess regression
. lowess y x, xlab ylab bwidth(0.25) lowess nogr gen(yplowess)  

. * Need to first sort data on regressor if data on regressor are not ordered
. sort x 

. gen dydxlowess = (yplowess - yplowess[_n-1])/(x - x[_n-1])
(1 missing value generated)

. * And do the same for the earlier fitted cubic
. gen dydxcubic = (ycubic - ycubic[_n-1])/(x - x[_n-1])
(1 missing value generated)

. graph twoway (line dydxlowess x, clstyle(p1)) /* 
>   */ (line dydxcubic x, clstyle(p3)), /*
>   */ scale (1.2) plotregion(style(none)) /*
>   */ title("Nonparametric Derivative Estimation") /*
>   */ xtitle("Regressor x", size(medlarge)) xscale(titlegap(*5)) /* 
>   */ ytitle("Dependent variable y", size(medlarge)) yscale(titlegap(*5)) /*
>   */ legend(pos(12) ring(0) col(1)) legend(size(small)) /*
>   */ legend( label(1 "From Lowess (k=25)") /*
>   */ label(2 "From OLS Cubic Regression") )

. graph save ch9kderiv, replace
(file ch9kderiv.gph saved)

. graph export ch9kderiv.wmf, replace
(file c:\Imbook\bwebpage\Section2\ch9kderiv.wmf written in Windows Metafile format)

. 
. ******** (3) CROSS-VALIDATION [PRELIMINARY] **********
. 
. /* The following does not work. 
>    I need to figure out use of macros */
. 
. forvalues i = 5/25 { 
  2.   scalar bd`i' = 0.01*`i'
  3.   global bw`i' = bd`i'
  4.   lowess y x, mean noweight bwidth($bw`i') gen(py`i') nogr
  5.   gen cv`i' = sum(3/2*(y-py`i')^2)
  6. } 

. sum

    Variable |       Obs        Mean    Std. Dev.       Min        Max
-------------+--------------------------------------------------------
           u |       100    2.809606    25.26291  -71.97334   73.59318
           x |       100        50.5    29.01149          1        100
           y |       100    228.5596    35.25377   132.2952   345.5873
    xsquared |       100      3383.5    3024.356          1      10000
      xcubed |       100      255025    289320.7          1    1000000
-------------+--------------------------------------------------------
      ycubic |       100    228.5596    24.75979   161.0681   307.6293
        yma5 |       100    228.6037    26.63323   157.1434   297.4832
       yknn5 |       100    228.6037    26.63323   157.1434   297.4832
   pykernreg |       100    228.6856    18.75275   181.1579   272.5488
    xkernreg |       100        50.5    29.01149          1        100
-------------+--------------------------------------------------------
      yknn25 |       100    228.6856    18.75275   181.1578   272.5488
    yplowess |       100    228.6494    25.46305   156.8217   302.5474
  dydxlowess |        99    1.471977     2.20262  -1.953159   6.964434
   dydxcubic |        99    1.480416    2.100452  -.8495026   6.342957
         py5 |       100    228.0408    8.046055   217.6967   243.0812
-------------+--------------------------------------------------------
         cv5 |       100    84655.13     34359.8   10940.13   162417.9
         py6 |       100    228.0408    8.046055   217.6967   243.0812
         cv6 |       100    84655.13     34359.8   10940.13   162417.9
         py7 |       100    228.0408    8.046055   217.6967   243.0812
         cv7 |       100    84655.13     34359.8   10940.13   162417.9
-------------+--------------------------------------------------------
         py8 |       100    228.0408    8.046055   217.6967   243.0812
         cv8 |       100    84655.13     34359.8   10940.13   162417.9
         py9 |       100    228.0408    8.046055   217.6967   243.0812
         cv9 |       100    84655.13     34359.8   10940.13   162417.9
        py10 |       100    228.0408    8.046055   217.6967   243.0812
-------------+--------------------------------------------------------
        cv10 |       100    84655.13     34359.8   10940.13   162417.9
        py11 |       100    228.0408    8.046055   217.6967   243.0812
        cv11 |       100    84655.13     34359.8   10940.13   162417.9
        py12 |       100    228.0408    8.046055   217.6967   243.0812
        cv12 |       100    84655.13     34359.8   10940.13   162417.9
-------------+--------------------------------------------------------
        py13 |       100    228.0408    8.046055   217.6967   243.0812
        cv13 |       100    84655.13     34359.8   10940.13   162417.9
        py14 |       100    228.0408    8.046055   217.6967   243.0812
        cv14 |       100    84655.13     34359.8   10940.13   162417.9
        py15 |       100    228.0408    8.046055   217.6967   243.0812
-------------+--------------------------------------------------------
        cv15 |       100    84655.13     34359.8   10940.13   162417.9
        py16 |       100    228.0408    8.046055   217.6967   243.0812
        cv16 |       100    84655.13     34359.8   10940.13   162417.9
        py17 |       100    228.0408    8.046055   217.6967   243.0812
        cv17 |       100    84655.13     34359.8   10940.13   162417.9
-------------+--------------------------------------------------------
        py18 |       100    228.0408    8.046055   217.6967   243.0812
        cv18 |       100    84655.13     34359.8   10940.13   162417.9
        py19 |       100    228.0408    8.046055   217.6967   243.0812
        cv19 |       100    84655.13     34359.8   10940.13   162417.9
        py20 |       100    228.0408    8.046055   217.6967   243.0812
-------------+--------------------------------------------------------
        cv20 |       100    84655.13     34359.8   10940.13   162417.9
        py21 |       100    228.0408    8.046055   217.6967   243.0812
        cv21 |       100    84655.13     34359.8   10940.13   162417.9
        py22 |       100    228.0408    8.046055   217.6967   243.0812
        cv22 |       100    84655.13     34359.8   10940.13   162417.9
-------------+--------------------------------------------------------
        py23 |       100    228.0408    8.046055   217.6967   243.0812
        cv23 |       100    84655.13     34359.8   10940.13   162417.9
        py24 |       100    228.0408    8.046055   217.6967   243.0812
        cv24 |       100    84655.13     34359.8   10940.13   162417.9
        py25 |       100    228.0408    8.046055   217.6967   243.0812
-------------+--------------------------------------------------------
        cv25 |       100    84655.13     34359.8   10940.13   162417.9

. * Then need to choose the `i' with minimum cv`i'
. * Problem here is that this gives e.g. $bw5 = 5 not 0.05
. 
. ********** CLOSE OUTPUT
. log close
       log:  c:\Imbook\bwebpage\Section2\mma09p2npmore.txt
  log type:  text
 closed on:  17 May 2005, 14:17:43
----------------------------------------------------------------------------------------------------
