# Demo entry 6346709

python

Submitted by anonymous on Feb 12, 2017 at 17:35
Language: Python. Code size: 8.0 kB.

```#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Mon Feb 6 05:23:38 2017

@author: Chase Ross

The following produces the key regression results from Fama French (1996). The data is available from Ken French's website.
"""

import pandas as pd
import numpy as np

def regression(inputPath, outputPath):
df1 = pd.read_csv(inputPath, parse_dates=['Date'], dayfirst=False, index_col='Date')
df1 = df1.loc['1963-07-01':'1993-12-01'] # Sample used in Fama French (1996)
#df1 = df1.loc['1932-01-01':'2016-12-01'] # Full sample
#df1 = df1.loc['1963-07-01':'2016-12-01'] # From 1963 to Present

# Adjust 5x5 portfolios for risk free rate.
portfolios = ["SMALL LoBM",	"ME1 BM2",	"ME1 BM3",	"ME1 BM4",	"SMALL HiBM",	"ME2 BM1", "ME2 BM2",	"ME2 BM3",	"ME2 BM4",	"ME2 BM5",	"ME3 BM1",	"ME3 BM2",	"ME3 BM3",	"ME3 BM4",	"ME3 BM5",	"ME4 BM1",	"ME4 BM2",	"ME4 BM3",	"ME4 BM4",	"ME4 BM5",	"BIG LoBM",	"ME5 BM2",	"ME5 BM3",	"ME5 BM4",	"BIG HiBM"]
for x in portfolios:
df1[x] = df1[x] - df1['RF']

# Add column of constants for alpha/intercept.
df1['constant']=1
coefficients = ["a",	"b",	"s",	"h"]

# Split these into the X and Y matrices.

Y = df1[["SMALL LoBM",	"ME1 BM2",	"ME1 BM3",	"ME1 BM4",	"SMALL HiBM",	"ME2 BM1",	"ME2 BM2",	"ME2 BM3",	"ME2 BM4",	"ME2 BM5",	"ME3 BM1",	"ME3 BM2",	"ME3 BM3",	"ME3 BM4",	"ME3 BM5",	"ME4 BM1",	"ME4 BM2",	"ME4 BM3",	"ME4 BM4",	"ME4 BM5",	"BIG LoBM",	"ME5 BM2",	"ME5 BM3",	"ME5 BM4",	"BIG HiBM"]].copy()
X = df1[['constant', "Mkt-RF",	"SMB",	"HML"]].copy()
means = Y.mean()
stdevs = Y.std()

# Now Matrix Algebra

# Note, we can check matrix dimensions to make sure the algebra will go through.
#print(X.shape)
#print(Y.shape)

#Calculate betas: (X'X)^-1 X"Y
bhat = np.dot(np.linalg.inv(np.dot(X.T,X)),np.dot(X.T,Y))
coefficientMatrix = pd.DataFrame(bhat, columns=portfolios, index=coefficients)

#Calculat Standard Errors and T Statistics
portfolioCounter = 0
n = X.shape[0]
k = X.shape[1]
numberPortfolios = Y.shape[1]
standardErrorsdf = pd.DataFrame(index=coefficients)
prediction = np.dot(X,bhat)
residualMatrix = Y - prediction

#Variance covariance matrix for first portfolio
while portfolioCounter < numberPortfolios:
ehat = residualMatrix.ix[:,portfolioCounter] #extrac the residuals for a certain portfolio, so just take that column
VCV = np.true_divide(1,n-k)*np.dot(np.dot(ehat.T,ehat),np.linalg.inv(np.dot(X.T,X)))
stderrPortfolio = np.sqrt(np.diagonal(VCV))
stderrPortfolio = pd.DataFrame(stderrPortfolio, index = coefficients)
standardErrorsdf = pd.concat([standardErrorsdf,stderrPortfolio],axis=1)
portfolioCounter +=1

standardErrorsdf.columns= portfolios

## Generate T Statistics
tStatMatrix = pd.DataFrame(index = coefficients, columns=portfolios)
tStatMatrix = coefficientMatrix.div(standardErrorsdf)

# Now slice and dice to make the 5x5's
meanArray= np.array(means) # convert dataframes to arrays
stdArray= np.array(stdevs)
bArray = np.array(bhat)
size = ["label", "small_size", "2_size", "3_size", "4_size", "big_size"]
BEME = ["low_be/me", "2_be/me", "3_be/me", "4_be/me", "high_be/me"]

#   First, Means
meanLabel = np.array(["Means", " ", " ", " ", " "])
mean1 = meanArray[0:5]
mean2 = meanArray[5:10]
mean3 = meanArray[10:15]
mean4 = meanArray[15:20]
mean5 = meanArray[20:25]
mean = np.transpose(np.column_stack((meanLabel, mean1, mean2, mean3, mean4, mean5)))
meanMatrix = pd.DataFrame(mean, columns=BEME, index=size)

stdLabel = np.array(["Standard Deviations", " ", " ", " ", " "])
std1 = stdArray[0:5]
std2 = stdArray[5:10]
std3 = stdArray[10:15]
std4 = stdArray[15:20]
std5 = stdArray[20:25]
stdev = np.transpose(np.column_stack((stdLabel,std1, std2, std3, std4, std5)))
stdMatrix = pd.DataFrame(stdev, columns=BEME, index=size)

print "Summary Statistics Exported."

#   Now, coefficients
alphaLabel = np.array(["Alpha Coefficient", " ", " ", " ", " "])
alpha1 = bArray[0, 0:5]
alpha2 = bArray[0, 5:10]
alpha3 = bArray[0, 10:15]
alpha4 = bArray[0, 15:20]
alpha5 = bArray[0, 20:25]
alpha = np.transpose(np.column_stack((alphaLabel, alpha1, alpha2, alpha3, alpha4, alpha5)))
alphaMatrix = pd.DataFrame(alpha, columns=BEME, index=size)

betaLabel = np.array(["Beta Coefficient", " ", " ", " ", " "])
beta1 = bArray[1, 0:5]
beta2 = bArray[1, 5:10]
beta3 = bArray[1, 10:15]
beta4 = bArray[1, 15:20]
beta5 = bArray[1, 20:25]
beta = np.transpose(np.column_stack((betaLabel,beta1, beta2, beta3, beta4, beta5)))
betaMatrix = pd.DataFrame(beta, columns=BEME, index=size)

sLabel = np.array(["s Coefficient", " ", " ", " ", " "])
s1 = bArray[2, 0:5]
s2 = bArray[2, 5:10]
s3 = bArray[2, 10:15]
s4 = bArray[2, 15:20]
s5 = bArray[2, 20:25]
s = np.transpose(np.column_stack((sLabel, s1, s2, s3, s4, s5)))
sMatrix = pd.DataFrame(s, columns=BEME, index=size)

hLabel = np.array(["h Coefficient", " ", " ", " ", " "])
h1 = bArray[3, 0:5]
h2 = bArray[3, 5:10]
h3 = bArray[3, 10:15]
h4 = bArray[3, 15:20]
h5 = bArray[3, 20:25]
h = np.transpose(np.column_stack((hLabel, h1, h2, h3, h4, h5)))
hMatrix = pd.DataFrame(h, columns=BEME, index=size)

print "Regression Coefficients Exported."

#   Third, T-Stats
tArray = np.array(tStatMatrix)

alphaLabel = np.array(["Alpha t-stat", " ", " ", " ", " "])
alpha1 = tArray[0, 0:5]
alpha2 = tArray[0, 5:10]
alpha3 = tArray[0, 10:15]
alpha4 = tArray[0, 15:20]
alpha5 = tArray[0, 20:25]
alpha = np.transpose(np.column_stack((alphaLabel, alpha1, alpha2, alpha3, alpha4, alpha5)))
alphaMatrix = pd.DataFrame(alpha, columns=BEME, index=size)

betaLabel = np.array(["Beta t-stat", " ", " ", " ", " "])
beta1 = tArray[1, 0:5]
beta2 = tArray[1, 5:10]
beta3 = tArray[1, 10:15]
beta4 = tArray[1, 15:20]
beta5 = tArray[1, 20:25]
beta = np.transpose(np.column_stack((betaLabel,beta1, beta2, beta3, beta4, beta5)))
betaMatrix = pd.DataFrame(beta, columns=BEME, index=size)

sLabel = np.array(["s t-stat", " ", " ", " ", " "])
s1 = tArray[2, 0:5]
s2 = tArray[2, 5:10]
s3 = tArray[2, 10:15]
s4 = tArray[2, 15:20]
s5 = tArray[2, 20:25]
s = np.transpose(np.column_stack((sLabel, s1, s2, s3, s4, s5)))
sMatrix = pd.DataFrame(s, columns=BEME, index=size)

hLabel = np.array(["h t-stat", " ", " ", " ", " "])
h1 = tArray[3, 0:5]
h2 = tArray[3, 5:10]
h3 = tArray[3, 10:15]
h4 = tArray[3, 15:20]
h5 = tArray[3, 20:25]
h = np.transpose(np.column_stack((hLabel, h1, h2, h3, h4, h5)))
hMatrix = pd.DataFrame(h, columns=BEME, index=size)