Demo entry 3960550

Bank Dataset NB Classifier

   

Submitted by anonymous on Mar 09, 2016 at 19:16
Language: Python 3. Code size: 3.5 kB.

import csv
import random
import numpy

with open('bank/bank-full.csv', 'rb') as csvfile:
    bankreader = csv.reader(csvfile, delimiter=';', quotechar='"')
    data=[[row[1],row[2],row[3],row[4],row[6],row[7],row[8],row[10],row[15],row[16]] for row in bankreader]
        

attributes=data[0]
data.pop(0)
accuracies=numpy.zeros(10)


for itr in range(10):
    
    print "Iteration: ",itr+1
    random.shuffle(data)
    train = data[:len(data)/2]
    test = data[len(data)/2:]
    confusion={}
    confusion['yes']={}
    confusion['no']={}
    confusion['yes']['yes']=0
    confusion['yes']['no']=0
    confusion['no']['yes']=0
    confusion['no']['no']=0
    
    prior={}   # ['yes' , 'no']
    prior['yes']=prior['no']=0.0
    
    for record in train:
        if record[9]=='yes':
            prior['yes']+=1
        elif record[9]=='no':
            prior['no']+=1
    
    prior['yes']/=len(train)
    prior['no']/=len(train)
    
    
    probs={}    # probabilities
    probs['yes']={}
    probs['no']={}
    
    ties=0
    
    for attr in attributes:
        probs['yes'][attr]={}
        probs['no'][attr]={}
        
    
    for record in train:
        for j in range(9):
            Class=record[9]
            if record[j] in probs[Class][attributes[j]].keys():
                probs[Class][attributes[j]][record[j]]+=1
            else:
                probs[Class][attributes[j]][record[j]]=1

    
    for ckey in probs:
        for akey in probs[ckey]:
            for vkey in probs[ckey][akey]:                                
                probs[ckey][akey][vkey]=(probs[ckey][akey][vkey] + 1)/(prior[ckey]*len(train) + 2) # for smoothing
                #print ckey,akey,vkey,probs[ckey][akey][vkey]
    
    
    correct=0.0
    print len(test)
    
    for record in test:
        probability={}
        probability['yes']=prior['yes']
        probability['no']=prior['no']        
        for j in range(9):
            
            if record[j] in probs['yes'][attributes[j]]:
                value=probs['yes'][attributes[j]][record[j]]
            else:
                value=(1.0)/(prior['yes']*len(train) + 2)       # smoothing
                
            probability['yes']*=value
            
            
            if record[j] in probs['no'][attributes[j]]:
                value=probs['no'][attributes[j]][record[j]]
            else:
                value=(1.0)/(prior['no']*len(train) + 2)       #smoothing
                
            probability['no']*=value
            
            
        #print probability['yes'],probability['no']
        
        predicted='no'
        
        if probability['yes'] > probability['no']:
            predicted='yes'
            
        if probability['yes'] == probability['no']:
            ties+=1
        
        if record[9]==predicted:
            correct+=1
        else:
            print record
            
        confusion[record[9]][predicted]+=1
            
    
    accuracies[itr]=correct/len(test) * 100
    #print confusion['yes']['yes']+confusion['yes']['no']+confusion['no']['yes']+confusion['no']['no']
    print "Accuracy:",correct/len(test)*100
    print confusion
    print "ties:",ties
    print "\n\n"  
    
    
print accuracies
print "Mean:",numpy.mean(accuracies)
print "Std Dev:",numpy.std(accuracies)
            

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).