# python_neural_net.py  NEURAL NET April 2024 based on # ass2s24q5_revised.py 
  
# A. Colin Cameron, Dept. of Economics, University of California - Davis

# This neural net example comes from Aurelion Gerien "Hands-on Machine Learning
# with Scikit-Learn, Keras and TensorFlow" 3rd edition chapter 10
# https://github.com/ageron/handson-ml3/

# This uses Keras - for documentation see https://keras.io/api/

# The program uses tensorflow which is not in the base environment for Anaconda
# And it uses Keras which is loaded within tensorflow
# Make a special environment for this program (do not use the base enviroment)
# This environment needs the modules
#   tensorflow
#   scikitlearn
#   os 

# Set up the working directory 
import os
os.getcwd()
os.chdir("c:/Users/ccameron/Dropbox/Desktop/Teaching/240f/assignments/")
os.getcwd()

# Import tensorflow which also brings in tensorflow.keras
import tensorflow as tf

# Read in the data which are provided in sklearn
# https://scikit-learn.org/stable/datasets/real_world.html
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
# FInd information about data and its organization
print(housing)
# Shows that housing.data has the features and housing.target has the outcome
print(housing.feature_names)
print(housing.target_names)
print(housing.data.shape, housing.target.shape)
# Mean of data - housing price in units of $100,000
import numpy as np
np.average(housing.target,axis=0)
np.average(housing.data,axis=0)

# Load and split the California housing dataset
from sklearn.model_selection import train_test_split
# First split into analysis data (75%) and test data (25%) 
X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target, train_size=0.75, random_state=42)
print(X_train_full.shape, y_train_full.shape,X_test.shape,y_test.shape)
# Second split analysis data into training data (75%) and validation data (25%)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, train_size=0.75, random_state=42)
print(X_train.shape, y_train.shape,X_valid.shape, y_valid.shape)

# IMPORTANT: Clear the session to reset the name counters
# This is in case you have already run the code below
tf.keras.backend.clear_session()

# IMPORTANT: Set the seed for reproducability 
# It is not enough to e.g. np.random.seed(42) and tf.random.set_seed(42)
# Instead do the following more complicated from 
# https://stackoverflow.com/questions/36288235/how-to-get-stable-results-with-tensorflow-setting-random-seed

import random
SEED = 0
def set_seeds(seed=SEED):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    np.random.seed(seed)
def set_global_determinism(seed=SEED):
    set_seeds(seed=seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'   
set_global_determinism(seed=SEED)

# (1) Begin analysis - first define the neural net

# Normalize inputs to have mean 0 and variance 1
norm_layer = tf.keras.layers.Normalization(input_shape=X_train.shape[1:])

# Use a Sequential neural net
# The simplest neural net with a single stack of layers connected sequentially
# For nonsequential uses the Functional API https://keras.io/api/models/
 
# Here there are 3 hidden layers 
# And the final command says not to transform the output (no activationh)
# (We would transform if e.g. want to be between 0 and 1)
# The activation function for the hidden layers is reclu 
# ReLU Recitifed linear unit activation function (: max(x,0))
# For other activation functions see https://keras.io/api/layers/activations/
model = tf.keras.Sequential([
    norm_layer,
    tf.keras.layers.Dense(50, activation="relu"),
    tf.keras.layers.Dense(50, activation="relu"),
    tf.keras.layers.Dense(50, activation="relu"),
    tf.keras.layers.Dense(1)
])

# Summarize model and parameters
# Weight parameters are slope coefficients and bias parameters are intercepts
# Here we have three hidden layers with each having 50 units
# The 8 inputs give 50*8 weights + 50 biases = 450 parameters
# Then 50 inputs give 50*50 weights + 50 biases = 2550 parameters
# Then 50 inputs give 50*50 weights + 50 biases = 2550 parameters
# Then 50 inputs to get output give 50 weights + 1 bias 
model.summary()

# Model's list of layers
model.layers

# Get the weights and biases for the first hidden layer
# These are the initialized values - 0 for biases and random for weights
hidden1 = model.layers[1]
hidden1.name
model.get_layer('dense') is hidden1
weights, biases = hidden1.get_weights()
weights.shape
weights
biases.shape
biases

# (2) Define the optimization method, loss function, ...

# Optimzers are given at https://keras.io/api/optimizers/ 
# Here we use the Adam optimizer which as a stochastic gradient descent method
# The specified learning rate of 0.001 is the default for Adam
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

# Configure the model using the compile method
# For other methods see https://keras.io/api/models/model_training_apis/
# Loss is MSE See https://keras.io/api/metrics/ for other loss functions
model.compile(loss="mse", optimizer=optimizer, metrics=["RootMeanSquaredError"])

# (3) Run the neural net

# Normalize inputs to mean 0 and variance 1
norm_layer.adapt(X_train)

# Train the model
# Here only 50 epochs or iterations
history = model.fit(X_train, y_train, batch_size=32, epochs=30,
                    validation_data=(X_valid, y_valid))

# Details on the iterations
history.params
print(history.epoch)
print(history.history)

# (4) Evaluate how well the neural net performs in the test data set
mse_test, rmse_test = model.evaluate(X_test, y_test)
rmse_test

# Predict for the first three observations in the test dataset
X_new = X_test[:3]
X_new
y_pred = model.predict(X_new)
y_pred
y_new = y_test[:3]
y_new

# END