Demo entry 6687689

py

   

Submitted by Wu Shichao on Dec 30, 2017 at 09:28
Language: Python 3. Code size: 5.0 kB.

from math import pi,exp,gamma
import numpy as np
import matplotlib.pyplot as plt

def zeroMean(dataMat):        
    meanVal = np.mean(dataMat,axis=0)     #按列求均值,即求各个特征的均值  
    newData = dataMat-meanVal  
    return newData,meanVal  

def percentage2n(eigVals,percentage):  
    sortArray = np.sort(eigVals)   #升序  
    sortArray = sortArray[-1::-1]  #逆转,即降序  
    arraySum = sum(sortArray)  
    tmpSum = 0  
    num = 0  
    for i in sortArray:  
        tmpSum += i  
        num += 1  
        if tmpSum >= arraySum*percentage:  
            return num  

def pu(k,d):     # 函数p(U)
    multi = 1.0
    for i in range(1,k+1):
        multi *= gamma((dimensions-i+1)/2.0)*pi**(-(dimensions-i+1)/2.0)
    return multi*2**(-k)

def az(k,d,lahatlist,lalist):   # |Az|
    Az = 1.0
    for i in range(1,k+1):
        mul = 1.0
        for j in range(i+1, dimensions+1):
            mul *= ((lahatlist[j-1]**(-1)-lahatlist[i-1]**(-1))*(lalist[i-1]-lalist[j-1]))
    Az *= mul
    return Az*numbers

dimensions = 10
numbers = 100

mean = np.zeros(dimensions)
cov = np.array([[10,0,0,0,0,0,0,0,0,0],
                [ 0,8,0,0,0,0,0,0,0,0],
                [ 0,0,6,0,0,0,0,0,0,0],
                [ 0,0,0,4,0,0,0,0,0,0],
                [ 0,0,0,0,2,0,0,0,0,0],
                [ 0,0,0,0,0,1,0,0,0,0],
                [ 0,0,0,0,0,0,1,0,0,0],
                [ 0,0,0,0,0,0,0,1,0,0],
                [ 0,0,0,0,0,0,0,0,1,0],
                [ 0,0,0,0,0,0,0,0,0,1]])

x1,x2,x3,x4,x5,x6,x7,x8,x9,x10 = np.random.multivariate_normal(mean,cov,numbers).T
x = np.array([x1,x2,x3,x4,x5,x6,x7,x8,x9,x10])

mat = []
for i in range(0,numbers):
    data = []
    for j in range(0,len(x)):
        data.append(x[j][i])
    mat.append(data)

rawData = np.array(mat)
newData,meanVal = zeroMean(rawData)  
covMat = np.cov(newData,rowvar=0)  
eigVals,eigVects = np.linalg.eig(np.mat(covMat))
k = percentage2n(eigVals, 0.95)
eigValIndice = np.argsort(eigVals)            #对特征值从小到大排序  
n_eigValIndice = eigValIndice[-1:-(k+1):-1]   #最大的n个特征值的下标  
n_eigVect = eigVects[:,n_eigValIndice]        #最大的n个特征值对应的特征向量  
lowDDataMat = newData*n_eigVect               #低维特征空间的数据  
reconMat = (lowDDataMat*n_eigVect.T)+meanVal  #重构数据  

# True eigenvalues
covMat_true = np.cov(rawData,rowvar=0)  
eigVals_true,eigVects_true = np.linalg.eig(np.mat(covMat_true))

percents = np.linspace(0,1,1000)
numK = []

for percent in percents:
    numK.append(percentage2n(eigVals,percent))

S = newData[0]*newData[0].reshape((dimensions,1))
for i in range(1,numbers):
    S += newData[i]*newData[i].reshape((dimensions,1))

S /= numbers
lambd,eigVects_S = np.linalg.eig(S)

lambdlist = []
for i in range(0,len(lambd)):
    lambdlist.append(lambd[i])

# 最大似然估计
maxlike = []
nueHatlist = []
mullalist = []
for k in range(1,dimensions):
    nueHat = 0
    for j in range(k+1,dimensions+1):
        nueHat += lambd[j-1]
    nueHat /= (dimensions-k)
    mulla = lambd[0]
    for j in range(2,k+1):
        mulla *= lambd[j-1]
    p = mulla**(-numbers/2.0)*nueHat**(-numbers*(dimensions-k)/2.0)
    maxlike.append(p)
    nueHatlist.append(nueHat)
    mullalist.append(mulla)

# PPCA
ppca = []
for k in range(1,dimensions):
    biglambd = lambdlist[:k]
    for i in range(k+1,dimensions+1):
        biglambd.append(nueHatlist[k-1])
    m = dimensions*(dimensions-1)/2.0-(dimensions-k)*(dimensions-k-1)/2.0
    ppca.append(np.log(pu(k,dimensions)*mullalist[k-1]**(-numbers/2.0)*nueHatlist[k-1]**(-numbers*(dimensions-k)/2.0)*(2*pi)**((m+k)/2.0)*az(k,dimensions,biglambd,lambdlist)**(-0.5)*numbers**(-4*k/2.0)))

# BIC
bic = []
for k in range(1,dimensions):
    m = dimensions*(dimensions-1)/2.0-(dimensions-k)*(dimensions-k-1)/2.0
    bic.append(np.log(mullalist[k-1]**(-numbers/2.0)*nueHatlist[k-1]**(-numbers*(dimensions-k)/2.0)*numbers**(-(m+k)/2.0)))

# RR-N
rrn = []
for k in range(1,dimensions):
    sumlak = sum(lambd[:k])/k
    rrn.append(np.log(sumlak**(-numbers*k/2.0)*nueHatlist[k-1]**(-numbers*(dimensions-k)/2.0)))

# Eigenvalue thresholding
plt.title("Eigenvalue thresholding")
plt.plot(percents,numK)
plt.xlim(0,1)
plt.ylim(1,10)
plt.ylabel("k")
plt.xlabel("Percentage")
plt.show()

# k的选择

plt.title("True eigenvalues")
plt.plot(range(1,dimensions+1),eigVals_true,marker='.')
plt.xlabel("k")
plt.ylabel("eigenvalues")
plt.show()

plt.title("Observed eigenvalues")
plt.plot(range(1,dimensions+1),lambd,marker='.')
plt.xlabel("k")
plt.ylabel("eigenvalues")
plt.show()

plt.title("Maximized likelihood")
plt.plot(range(1,dimensions),np.log(maxlike),marker='.')
plt.xlabel("dimensionality k")
plt.ylabel("p")
plt.show()

plt.title("Laplace evidence")
plt.plot(range(1,dimensions),ppca,marker='.')
plt.xlabel("dimensionality k")
plt.ylabel("p")
plt.show()

plt.title("BIC evidence")
plt.plot(range(1,dimensions),bic,marker='.')
plt.xlabel("dimensionality k")
plt.ylabel("p")
plt.show()

plt.title("RR-N likelihood")
plt.plot(range(1,dimensions),rrn,marker='.')
plt.xlabel("dimensionality k")
plt.ylabel("p")
plt.show()

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).