Demo entry 6601136

40

   

Submitted by anonymous on Jun 05, 2017 at 03:23
Language: Python 3. Code size: 13.0 kB.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri May 26 19:42:44 2017

@author: Miranda
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt

rawdata = pd.read_csv('car.csv')
monthdate = rawdata["Date"]

x_date_all = np.array([dt.datetime.strptime(d, '%b-%Y') for d in monthdate])
# NB: This code may not work due to the Excel clash on MAC. If not work, please open this csv in Excel 
# and change the format of "Date" into mmm-yyyy


data_all = rawdata["Data"]

"""
Plots of Raw data & Transformations
"""
# Original Data
fig_org = plt.figure()
plt.plot(x_date_all,data_all,label='Original Data')
plt.title('Original Data')
plt.xlabel('Date')
plt.ylabel('Data')
plt.legend(loc='upper left')
fig_org.savefig('Original Data.pdf')

# Log Version
fig_log = plt.figure()
plt.plot(x_date_all,np.log(data_all),label="Log Version")
plt.title('Log_Original Data')
plt.xlabel('Date')
plt.ylabel('Data')
plt.legend(loc='upper left')
fig_log.savefig('Log_Original Data.pdf')

# Power - 3
fig_pow = plt.figure()
plt.plot(x_date_all,np.power(data_all,3),label="Data with Power of 3")
plt.title('Power of 3_Original Data')
plt.xlabel('Date')
plt.ylabel('Data')
plt.legend(loc='upper left')
fig_pow.savefig('Power of 3_Original Data.pdf')


"""
Split Into In-sample and Out-sample
"""
# Define the In-Sample & Out-Sample Size
test_len =12
train_len = len(rawdata) - test_len
               
# Define the Training Data & Testing Data
Dtrain, Dtest = rawdata[0:train_len], rawdata[train_len:]

is_date = np.array([dt.datetime.strptime(d, '%b-%Y') for d in Dtrain["Date"]])
is_data = Dtrain["Data"]

os_date = np.array([dt.datetime.strptime(d, '%b-%Y') for d in Dtest["Date"]])
os_data = Dtest["Data"]


# Plot the In-Sample Data
fig_is = plt.figure()
plt.plot(is_date,is_data,label='In-Sample Data')
plt.title('In-Sample Data')
plt.xlabel('Date')
plt.ylabel('Data')
fig_is.savefig('In-Sample Data.pdf')


"""
MA
"""
# Calculate the 2x12 Moving Average
is_data_ma = is_data.rolling(2, center=True).mean().rolling(12,center=True).mean()

# Plot the 2x12 Moving Average
fig_ma = plt.figure()
plt.plot(is_date,is_data_ma,'r',label='2x12 MA smoothed data')
plt.plot(is_date,is_data,label='In-Sample data')
plt.title('2x12-MA Smoothed Data')
plt.xlabel('Date')
plt.ylabel('Data')
plt.legend(loc='upper left')
fig_ma.savefig('2x12 Moving Average.pdf')


"""
Drift method
"""
from dateutil.relativedelta import relativedelta

# In-Sample Data
dft_dates = is_date.tolist()
dft_values = is_data.tolist()

# Get the First and Last Observation
first_observation = dft_values[0]
last_observation = dft_values[131]

# Get the Number of Elements
T = len(dft_values)

# Use a Loop to Forecast Using Drift Method
drift_forecast = dft_values
for i in range(0, 12):
    h = i + 1
    dft_dates.append(dft_dates[-1] + relativedelta(months=+1))
    drift_forecast.append( last_observation + h*(last_observation - first_observation)/(T-1) )

drft_fc = pd.Series(drift_forecast)

# Plot the Forecast Using Drift Method
dt = []
dt.append(dft_dates[0])
dt.append(dft_dates[143])

ot=[]
ot.append(dft_values[0])
ot.append(dft_values[143]) # Prepare for plotting the line between the first and last observation


fig_drft = plt.figure()
plt.plot( dft_dates,drft_fc,'r',label = 'Drift Method' )
plt.plot( dft_dates,data_all,'c',label = 'Sample Data')
plt.title('Drift Method')
plt.xlabel('Date')
plt.ylabel('Data')
plt.legend(loc='upper left')

plt.plot( dt,ot,'y--') # Plot the line between the first and last observation for better visualisation

fig_drft.savefig('Forecast-Drift.pdf')
        

"""
Holt's Multiplicatve Method
"""
from holtwinters import multiplicative  
x = is_data.tolist() 
m_hm = 12
fc_hm = 12

y_hm ,Y_hm, alpha, beta, gamma,rmse_hm = multiplicative( x, m_hm, fc_hm )

fig_H_mul= plt.figure()

plt.plot(is_data,label='Training data')
plt.plot(data_all[132:144],label='Testing data')
plt.plot(np.arange(132,132+12,1),Y_hm,label='HW Multiplicative Forecast')
plt.legend(loc='upper left')

plt.title('HW Multiplicative Method')

fig_H_mul.savefig('Holt-multiplicative Method.pdf')



'''
Seasonal ARIMA(SARIMAX)
'''
import statsmodels.api as smapi
from scipy.optimize import brute


'''
    Check Autocorrelation
'''
# Draw ACF and PACF
smapi.graphics.tsa.plot_acf(is_data, lags=50, alpha = 0.05) 
smapi.graphics.tsa.plot_pacf(is_data, lags=50, alpha = 0.05)


'''
    Get Stationarity
'''
s_Dtrain = Dtrain

s_Dtrain['Date'] = pd.to_datetime(s_Dtrain['Date'])
s_Dtrain = s_Dtrain.set_index('Date')

sa_data = s_Dtrain['Data']

plt.figure()
plt.plot(sa_data)

smapi.graphics.tsa.plot_acf(sa_data, lags=30, alpha = 0.05) 
smapi.graphics.tsa.plot_pacf(sa_data, lags=30, alpha = 0.05)

# Quartic Root
sa_getsta_qr = np.power(sa_data,0.25)

plt.figure()
plt.plot(sa_getsta_qr)

smapi.graphics.tsa.plot_acf(sa_getsta_qr, lags=30, alpha = 0.05) 
smapi.graphics.tsa.plot_pacf(sa_getsta_qr, lags=30, alpha = 0.05)


# Differencing
sa_getsta_diff = sa_getsta_qr

for d in range(1,3):
    sa_getsta_diff = sa_getsta_diff.diff(1)[1:]

plt.figure()
plt.plot(sa_getsta_diff)

smapi.graphics.tsa.plot_acf(sa_getsta_diff, lags=30, alpha = 0.05) 
smapi.graphics.tsa.plot_pacf(sa_getsta_diff, lags=30, alpha = 0.05) 

'''
   Stationarity
'''
sa_sta_data = sa_getsta_diff

'''
    Find the Optimal Model
'''
def objfunc(order_full, endog):
    converted_order = [int(x) for x in order_full]
    try:
        fit = smapi.tsa.statespace.SARIMAX(endog,trend='n', order=order_full[:3],seasonal_order=converted_order[3:]).fit()
        return fit.aic
    except:
        return np.inf
    
grid = (slice(0, 6, 1), slice(1,4,1), slice(0, 6, 1),slice(0, 6, 1), slice(0,4,1), slice(0, 6, 1), slice(12,13, 1))
optimal = brute(objfunc, grid, args=(sa_sta_data,),finish=None)
converted_order = [int(x) for x in optimal]

optimal_seasonal_model = smapi.tsa.statespace.SARIMAX(sa_sta_data, trend='n',order=converted_order[:3],seasonal_order=converted_order[3:],)
results = optimal_seasonal_model.fit()
print(results.summary())

# Check the Fitness visually
sta_forecast = results.predict(end="2016-1-1")

plt.figure()
plt.plot(sta_forecast[1:])
plt.plot(sa_sta_data)

#  SARIMAX(1, 1, 1)x(5, 1, 1, 12) 

'''
    Fit the model and Forecast Using the In-Sample Data
'''
sarima_model = smapi.tsa.statespace.SARIMAX(s_Dtrain['Data'], order=(1,1,1), seasonal_order=(5,1,1,12))
sarima_results = sarima_model.fit(disp=False)
print(sarima_results.summary())  

sarima_forcast = sarima_results.predict(end="2016-12-1")[1:]

fig_sa = plt.figure()

plt.plot(is_data,label='Training data')
plt.plot(data_all[132:144],label='Testing data')
plt.plot(np.arange(train_len, train_len+12, 1),sarima_forcast[131:143],label='SARIMA Forecast')

plt.legend(loc='upper left')
plt.title('SARIMA (1,1,1)x(5,1,1,12)')

fig_sa.savefig('SARIMA.pdf')


"""
NN Network
"""

import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error as mse

from keras.layers.core import Dense
from keras.models import Sequential

data_nn = pd.read_csv('car.csv', usecols=[1])
data_nn = data_nn.dropna()  # Drop all Nans
data_nn = data_nn.values

'''
    Plot the time series
'''
plt.figure()
plt.plot(data_nn)

'''
    Scaling
'''
scaler = MinMaxScaler(feature_range=(0, 1))
data_nn = scaler.fit_transform(data_nn)

'''
    Splitting
'''
train_nn,test_nn = data_nn[0:train_len,:], data_nn[train_len:len(data_nn),:]

# Set Time Window
time_lag= 12


Xall, Yall = [], []
for i in range(time_lag, len(data_nn)):
    Xall.append(data_nn[i-time_lag:i, 0])
    Yall.append(data_nn[i, 0])
    
Xall = np.array(Xall)    # Convert them from list to array
Yall = np.array(Yall)

test_size = 12
train_size = len(Xall) - test_size
               
Xtrain = Xall[:train_size, :]
Ytrain = Yall[:train_size]

Xtest = Xall[:test_size, :]
Ytest = Yall[:test_size]

'''
    Define the Model 
'''

model = Sequential()
model.add(Dense(30, input_dim=time_lag, activation='relu'))
model.add(Dense(1))

'''
    Compiling model for use
'''
model.compile(loss='mean_squared_error', optimizer='adam')

'''
    TrainNN
'''
model.fit(Xtrain, Ytrain, epochs=100, batch_size=2,verbose=2, validation_split=0.05)

trainPredict = model.predict(Xtrain)

'''
    Plot Prediction
'''
# Shift for Plotting
trainPredictPlot = np.empty_like(data_nn)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[time_lag:len(trainPredict)+time_lag, :] = scaler.inverse_transform(trainPredict)
                 

#Plot
fig_nnis = plt.figure()

plt.plot(scaler.inverse_transform(data_nn), label='Trianing Data')
plt.plot(trainPredictPlot, label='Train Prediction')

plt.legend()
plt.title('Neutral Network - Train Prediction')

fig_nnis.savefig('Neutral Network - Train Prediction.pdf')


# Train Score
trainScore = math.sqrt(mse(Ytrain, trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))


'''
    Dynamic Forecast
'''
dynamic_prediction = np.copy(data_nn[:len(data_nn) - test_size])


for i in range(len(data_nn) - test_size, len(data_nn)):
   last_feature = np.reshape(dynamic_prediction[i-time_lag:i], (1,time_lag))
   next_pred = model.predict(last_feature)
   dynamic_prediction = np.append(dynamic_prediction,next_pred)
   
dynamic_prediction = dynamic_prediction.reshape(-1,1)
dynamic_predictionplot = scaler.inverse_transform(dynamic_prediction)


# Plot the Dynamic Forecast
fig_nndy = plt.figure()

plt.plot(scaler.inverse_transform(data_nn[:len(data_nn) - test_size]), label='Training Data')
plt.plot(np.arange(len(data_nn) - test_size, len(data_nn), 1),scaler.inverse_transform(data_nn[-test_size:]),label='Testing Data')
plt.plot(np.arange(len(data_nn) - test_size, len(data_nn), 1),dynamic_predictionplot[-test_size:], label='Out of Sample Prediction')

plt.legend(loc = "upper left")

plt.title('Neutral Network - Out of Sample Presition')

fig_nndy.savefig('Neutral Network - Out of Sample Presition.pdf')

# Calculate Root Mean Squared Error  
testScore_dyfc = math.sqrt(mse( Ytest ,dynamic_prediction[-test_size:]))
print('Test Score: {0:.2f}'.format(testScore_dyfc))


"""
Forecast 
"""
from sklearn.metrics import mean_squared_error as mse
# sklearn.metrics.mean_squared_error(y_true, y_pred, sample_weight=None, multioutput='uniform_average'

'''
    RMSE Calculation
'''
# RMSE From All 4 Models
y_test = np.array(Dtest['Data']).reshape(-1,1)
y_dr = drft_fc[-12:].reshape(-1,1)
y_sa = np.array(sarima_forcast[-12:]).reshape(-1,1)

rmse_dr = math.sqrt( mse(y_test, y_dr) )  # Drift Method
rmse_hm = rmse_hm                      # Holt-Winter's Multiplicative Model
rmse_sa = math.sqrt( mse(y_test, y_sa) )  # SARIMA(1, 1, 1)x(5, 1, 1, 12)
rmse_nn = testScore_dyfc               # Neutral Network

print("Drift RMSE: " + str(rmse_dr))
print("HW-M RMSE: " + str(rmse_hm))
print("SARIMAX RMSE: " + str(rmse_sa))
print("NN RMSE: " + str(rmse_nn))

'''
    Combination
'''
# SARIMA - HW
residual1 = np.resize(sarima_results.resid, (train_len,))
residual2 = np.resize(Dtrain['Data'].values - y_hm[:train_len], (train_len,))

covariance = np.cov(residual1,residual2)

var1 = covariance[0][0]
var2 = covariance[1][1]

# Correlation Coefficent
rho = covariance[0][1] / (np.sqrt(var1*var2))

wopt1 = (var2 - rho*np.sqrt(var1*var2))/(var1+var2-2*rho*np.sqrt(var1*var2))  # Weight to SARIMA
wopt2 = 1 - wopt1 # Weight to HW-M

# Forecast Combination
forecast1 = sarima_results.forecast(test_len) 
forecast2 = np.array(Y_hm)

forecast_comb = wopt1 * forecast1 + wopt2 * np.reshape(forecast2, (test_len,))

# Plot the Forecast Combination
fig_fccb = plt.figure()

plt.plot(data_all,'b', label = "Train/Test Data")
plt.plot(np.arange(train_len,train_len+test_len),forecast_comb,'r', label = "Combined Forecasts")

plt.plot(np.arange(train_len,train_len+test_len),forecast1,'c:',label = "SARIMA")
plt.plot(np.arange(train_len,train_len+test_len),forecast2,'m:', label = "HW-M")
plt.legend(loc="upper left")

fig_fccb.savefig('Combination - HW-M & SARIMA.pdf')

# Overall RMSE Calculation
print("Combined RMSE: " + str(math.sqrt(mse(Dtest['Data'].values,forecast_comb))))
print("SARIMAX RMSE: " + str(math.sqrt(mse(Dtest['Data'].values,forecast1))))
print("HW RMSE: " + str(math.sqrt(mse(Dtest['Data'].values, forecast2))))


'''
    Forecast - Neutral Network
'''
## Optimal: Neutral Network

# Fit the Model Using All Data
model = Sequential()
model.add(Dense(30, input_dim=time_lag, activation='relu'))
model.add(Dense(1))


model.compile(loss='mean_squared_error', optimizer='adam')

model.fit(Xall, Yall, epochs=100, batch_size=2,verbose=2, validation_split=0.05)

Forecasting = model.predict(Xall)

#Plot the forecasting
ForecastingPlot = np.empty_like(data_nn)
ForecastingPlot[:, :] = np.nan            
ForecastingPlot[time_lag:, :] = scaler.inverse_transform(Forecasting)

fig_fc = plt.figure()

plt.plot(scaler.inverse_transform(data_nn), label='Original')
plt.plot(ForecastingPlot, label='Forecast')

plt.legend()
plt.title('Optimal Forecasting Model - One Step')

fig_fc.savefig('Forecasting - Optimal.pdf')

This snippet took 0.03 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).