# Demo entry 3960550

Bank Dataset NB Classifier

Submitted by anonymous on Mar 09, 2016 at 19:16
Language: Python 3. Code size: 3.5 kB.

import csv
import random
import numpy

with open('bank/bank-full.csv', 'rb') as csvfile:
data=[[row[1],row[2],row[3],row[4],row[6],row[7],row[8],row[10],row[15],row[16]] for row in bankreader]

attributes=data[0]
data.pop(0)
accuracies=numpy.zeros(10)

for itr in range(10):

print "Iteration: ",itr+1
random.shuffle(data)
train = data[:len(data)/2]
test = data[len(data)/2:]
confusion={}
confusion['yes']={}
confusion['no']={}
confusion['yes']['yes']=0
confusion['yes']['no']=0
confusion['no']['yes']=0
confusion['no']['no']=0

prior={}   # ['yes' , 'no']
prior['yes']=prior['no']=0.0

for record in train:
if record[9]=='yes':
prior['yes']+=1
elif record[9]=='no':
prior['no']+=1

prior['yes']/=len(train)
prior['no']/=len(train)

probs={}    # probabilities
probs['yes']={}
probs['no']={}

ties=0

for attr in attributes:
probs['yes'][attr]={}
probs['no'][attr]={}

for record in train:
for j in range(9):
Class=record[9]
if record[j] in probs[Class][attributes[j]].keys():
probs[Class][attributes[j]][record[j]]+=1
else:
probs[Class][attributes[j]][record[j]]=1

for ckey in probs:
for akey in probs[ckey]:
for vkey in probs[ckey][akey]:
probs[ckey][akey][vkey]=(probs[ckey][akey][vkey] + 1)/(prior[ckey]*len(train) + 2) # for smoothing
#print ckey,akey,vkey,probs[ckey][akey][vkey]

correct=0.0
print len(test)

for record in test:
probability={}
probability['yes']=prior['yes']
probability['no']=prior['no']
for j in range(9):

if record[j] in probs['yes'][attributes[j]]:
value=probs['yes'][attributes[j]][record[j]]
else:
value=(1.0)/(prior['yes']*len(train) + 2)       # smoothing

probability['yes']*=value

if record[j] in probs['no'][attributes[j]]:
value=probs['no'][attributes[j]][record[j]]
else:
value=(1.0)/(prior['no']*len(train) + 2)       #smoothing

probability['no']*=value

#print probability['yes'],probability['no']

predicted='no'

if probability['yes'] > probability['no']:
predicted='yes'

if probability['yes'] == probability['no']:
ties+=1

if record[9]==predicted:
correct+=1
else:
print record

confusion[record[9]][predicted]+=1

accuracies[itr]=correct/len(test) * 100
#print confusion['yes']['yes']+confusion['yes']['no']+confusion['no']['yes']+confusion['no']['no']
print "Accuracy:",correct/len(test)*100
print confusion
print "ties:",ties
print "\n\n"

print accuracies
print "Mean:",numpy.mean(accuracies)
print "Std Dev:",numpy.std(accuracies)

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.