# Demo entry 6687689

py

Submitted by Wu Shichao on Dec 30, 2017 at 09:28
Language: Python 3. Code size: 5.0 kB.

```from math import pi,exp,gamma
import numpy as np
import matplotlib.pyplot as plt

def zeroMean(dataMat):
meanVal = np.mean(dataMat,axis=0)     #按列求均值，即求各个特征的均值
newData = dataMat-meanVal
return newData,meanVal

def percentage2n(eigVals,percentage):
sortArray = np.sort(eigVals)   #升序
sortArray = sortArray[-1::-1]  #逆转，即降序
arraySum = sum(sortArray)
tmpSum = 0
num = 0
for i in sortArray:
tmpSum += i
num += 1
if tmpSum >= arraySum*percentage:
return num

def pu(k,d):     # 函数p(U)
multi = 1.0
for i in range(1,k+1):
multi *= gamma((dimensions-i+1)/2.0)*pi**(-(dimensions-i+1)/2.0)
return multi*2**(-k)

def az(k,d,lahatlist,lalist):   # |Az|
Az = 1.0
for i in range(1,k+1):
mul = 1.0
for j in range(i+1, dimensions+1):
mul *= ((lahatlist[j-1]**(-1)-lahatlist[i-1]**(-1))*(lalist[i-1]-lalist[j-1]))
Az *= mul
return Az*numbers

dimensions = 10
numbers = 100

mean = np.zeros(dimensions)
cov = np.array([[10,0,0,0,0,0,0,0,0,0],
[ 0,8,0,0,0,0,0,0,0,0],
[ 0,0,6,0,0,0,0,0,0,0],
[ 0,0,0,4,0,0,0,0,0,0],
[ 0,0,0,0,2,0,0,0,0,0],
[ 0,0,0,0,0,1,0,0,0,0],
[ 0,0,0,0,0,0,1,0,0,0],
[ 0,0,0,0,0,0,0,1,0,0],
[ 0,0,0,0,0,0,0,0,1,0],
[ 0,0,0,0,0,0,0,0,0,1]])

x1,x2,x3,x4,x5,x6,x7,x8,x9,x10 = np.random.multivariate_normal(mean,cov,numbers).T
x = np.array([x1,x2,x3,x4,x5,x6,x7,x8,x9,x10])

mat = []
for i in range(0,numbers):
data = []
for j in range(0,len(x)):
data.append(x[j][i])
mat.append(data)

rawData = np.array(mat)
newData,meanVal = zeroMean(rawData)
covMat = np.cov(newData,rowvar=0)
eigVals,eigVects = np.linalg.eig(np.mat(covMat))
k = percentage2n(eigVals, 0.95)
eigValIndice = np.argsort(eigVals)            #对特征值从小到大排序
n_eigValIndice = eigValIndice[-1:-(k+1):-1]   #最大的n个特征值的下标
n_eigVect = eigVects[:,n_eigValIndice]        #最大的n个特征值对应的特征向量
lowDDataMat = newData*n_eigVect               #低维特征空间的数据
reconMat = (lowDDataMat*n_eigVect.T)+meanVal  #重构数据

# True eigenvalues
covMat_true = np.cov(rawData,rowvar=0)
eigVals_true,eigVects_true = np.linalg.eig(np.mat(covMat_true))

percents = np.linspace(0,1,1000)
numK = []

for percent in percents:
numK.append(percentage2n(eigVals,percent))

S = newData[0]*newData[0].reshape((dimensions,1))
for i in range(1,numbers):
S += newData[i]*newData[i].reshape((dimensions,1))

S /= numbers
lambd,eigVects_S = np.linalg.eig(S)

lambdlist = []
for i in range(0,len(lambd)):
lambdlist.append(lambd[i])

# 最大似然估计
maxlike = []
nueHatlist = []
mullalist = []
for k in range(1,dimensions):
nueHat = 0
for j in range(k+1,dimensions+1):
nueHat += lambd[j-1]
nueHat /= (dimensions-k)
mulla = lambd[0]
for j in range(2,k+1):
mulla *= lambd[j-1]
p = mulla**(-numbers/2.0)*nueHat**(-numbers*(dimensions-k)/2.0)
maxlike.append(p)
nueHatlist.append(nueHat)
mullalist.append(mulla)

# PPCA
ppca = []
for k in range(1,dimensions):
biglambd = lambdlist[:k]
for i in range(k+1,dimensions+1):
biglambd.append(nueHatlist[k-1])
m = dimensions*(dimensions-1)/2.0-(dimensions-k)*(dimensions-k-1)/2.0
ppca.append(np.log(pu(k,dimensions)*mullalist[k-1]**(-numbers/2.0)*nueHatlist[k-1]**(-numbers*(dimensions-k)/2.0)*(2*pi)**((m+k)/2.0)*az(k,dimensions,biglambd,lambdlist)**(-0.5)*numbers**(-4*k/2.0)))

# BIC
bic = []
for k in range(1,dimensions):
m = dimensions*(dimensions-1)/2.0-(dimensions-k)*(dimensions-k-1)/2.0
bic.append(np.log(mullalist[k-1]**(-numbers/2.0)*nueHatlist[k-1]**(-numbers*(dimensions-k)/2.0)*numbers**(-(m+k)/2.0)))

# RR-N
rrn = []
for k in range(1,dimensions):
sumlak = sum(lambd[:k])/k
rrn.append(np.log(sumlak**(-numbers*k/2.0)*nueHatlist[k-1]**(-numbers*(dimensions-k)/2.0)))

# Eigenvalue thresholding
plt.title("Eigenvalue thresholding")
plt.plot(percents,numK)
plt.xlim(0,1)
plt.ylim(1,10)
plt.ylabel("k")
plt.xlabel("Percentage")
plt.show()

# k的选择

plt.title("True eigenvalues")
plt.plot(range(1,dimensions+1),eigVals_true,marker='.')
plt.xlabel("k")
plt.ylabel("eigenvalues")
plt.show()

plt.title("Observed eigenvalues")
plt.plot(range(1,dimensions+1),lambd,marker='.')
plt.xlabel("k")
plt.ylabel("eigenvalues")
plt.show()

plt.title("Maximized likelihood")
plt.plot(range(1,dimensions),np.log(maxlike),marker='.')
plt.xlabel("dimensionality k")
plt.ylabel("p")
plt.show()

plt.title("Laplace evidence")
plt.plot(range(1,dimensions),ppca,marker='.')
plt.xlabel("dimensionality k")
plt.ylabel("p")
plt.show()

plt.title("BIC evidence")
plt.plot(range(1,dimensions),bic,marker='.')
plt.xlabel("dimensionality k")
plt.ylabel("p")
plt.show()

plt.title("RR-N likelihood")
plt.plot(range(1,dimensions),rrn,marker='.')
plt.xlabel("dimensionality k")
plt.ylabel("p")
plt.show()
```

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.