# Demo entry 56578

pca

Submitted by B on Oct 26, 2012 at 05:52
Language: Python. Code size: 1.7 kB.

```from __future__ import division
import numpy as np
import matplotlib.pyplot as pl

def standardize_columns(arr):
rows, cols = arr.shape
for col in xrange(cols):
avg = sum(arr[:,col])/rows
sigma_squared = sum([(elem - avg)**2 for elem in arr[:,col]])/rows
arr[:,col] -= avg
arr[:,col] /= np.sqrt(sigma_squared)

#Get the data
fn = "data.txt"
f = open(fn, 'r')
S = [s for s in f.readlines()]
G = [s.split() for s in S]
data = [[float(s) for s in g] for g in G]
data = np.array(data)
standardize_columns(data)

#Get the labels and annotation offsets
nfn = "names.txt"
nf = open(nfn, 'r')
S = [s.split('\t') for s in nf.readlines()]
labels = [s[0] for s in S]
coords = [s[1] for s in S]
offsets = []
for c in coords:
c = c.split(',')
x = int(c[0])
y = int(c[1])
offsets.append((x,y))

f.close()
nf.close()

#Compute the SVD
U,S,V = np.linalg.svd(data, full_matrices=False)

#Set the truncation value to 2 for a 2-dimensional plot
k=2
Uk, Sk, Vk = U[:,:k], S[:k], V[:k,:]
Xk = Uk * Sk        # Projection of X into the k-dimensional space

pl.figure()
ax = pl.subplot(111)
ax.plot(Xk[:,0], Xk[:,1], 'ro')

#Annotate the points with labels
for label, x, y, offset in zip(labels, Xk[:,0], Xk[:,1], offsets):
ax.annotate(
label,
xy = (x,y), xytext=offset,
textcoords = 'offset points', ha='right', va='bottom',
bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))

ax.grid()
pl.title('PCA for Standardized Properties of Common Engineering Materials')
pl.show()
```

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.