Demo entry 56578

pca

   

Submitted by B on Oct 26, 2012 at 05:52
Language: Python. Code size: 1.7 kB.

from __future__ import division
import numpy as np
import matplotlib.pyplot as pl

def standardize_columns(arr):
    rows, cols = arr.shape
    for col in xrange(cols):
    	avg = sum(arr[:,col])/rows
    	sigma_squared = sum([(elem - avg)**2 for elem in arr[:,col]])/rows
        arr[:,col] -= avg
        arr[:,col] /= np.sqrt(sigma_squared)

#Get the data
fn = "data.txt"
f = open(fn, 'r')
S = [s for s in f.readlines()]
G = [s.split() for s in S]
data = [[float(s) for s in g] for g in G]
data = np.array(data)
standardize_columns(data)

#Get the labels and annotation offsets
nfn = "names.txt"
nf = open(nfn, 'r')
S = [s.split('\t') for s in nf.readlines()]
labels = [s[0] for s in S]
coords = [s[1] for s in S]
offsets = []
for c in coords:
	c = c.split(',')
	x = int(c[0])
	y = int(c[1])
	offsets.append((x,y))

f.close()
nf.close()

#Compute the SVD
U,S,V = np.linalg.svd(data, full_matrices=False)

#Set the truncation value to 2 for a 2-dimensional plot
k=2
Uk, Sk, Vk = U[:,:k], S[:k], V[:k,:]
Xk = Uk * Sk        # Projection of X into the k-dimensional space

pl.figure()
ax = pl.subplot(111)
ax.plot(Xk[:,0], Xk[:,1], 'ro')

#Annotate the points with labels
for label, x, y, offset in zip(labels, Xk[:,0], Xk[:,1], offsets):
	ax.annotate(
		label,
		xy = (x,y), xytext=offset,
		textcoords = 'offset points', ha='right', va='bottom',
		bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
        arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))

ax.grid()
pl.title('PCA for Standardized Properties of Common Engineering Materials')
pl.show()

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).