Demo entry 6777290

dfsdf

   

Submitted by anonymous on Dec 09, 2018 at 13:45
Language: Python 3. Code size: 2.6 kB.

##############################################################################
## LDA Analysis
##############################################################################

##### Step 1: Computing the d-dimensional mean vectors

mean_vectors = []
for clase in range(1,len(label_dict)+1):
    mean_vectors.append(np.mean(X_train_orig[Y_train==clase], axis=0))
    #print('Mean Vector class %s: %s\n' %(clase, mean_vectors[clase-1]))


##### Step 2: Computing the Scatter Matrices
    
####### Within-class scatter matrix S_w
    
nComp = 13;
S_W = np.zeros((nComp, nComp))

for cl,mv in zip(range(1,nComp), mean_vectors):
    class_sc_mat = np.zeros((nComp,nComp))                  # scatter matrix for every class
    for row in X_train_orig[Y_train == cl]:
        row, mv = row.reshape(nComp,1), mv.reshape(nComp,1) # make column vectors
        class_sc_mat += (row-mv).dot((row-mv).T)
    S_W += class_sc_mat                             # sum class scatter matrices

#print('within-class Scatter Matrix:\n', S_W)

####### Between-class scatter matrix S_B

overall_mean = np.mean(X_train_orig, axis=0)

S_B = np.zeros((nComp,nComp))
for i,mean_vec in enumerate(mean_vectors):  
    n = X_train_orig[Y_train==i+1,:].shape[0]
    mean_vec = mean_vec.reshape(nComp,1) # make column vector
    overall_mean = overall_mean.reshape(nComp,1) # make column vector
    S_B += n * (mean_vec - overall_mean).dot((mean_vec - overall_mean).T)

#print('\nbetween-class Scatter Matrix:\n', S_B)


##### Step 3: Solving the generalized eigenvalue problem for the matrix (S_W)^-1(S_B)

eig_vals, eig_vecs = np.linalg.eig(np.linalg.inv(S_W).dot(S_B))

##### Step 4: Selecting linear discriminants for the new feature subspace

####### Make a list of (eigenvalue, eigenvector) tuples
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]

####### Sort the (eigenvalue, eigenvector) tuples from high to low
eig_pairs = sorted(eig_pairs, key=lambda k: k[0], reverse=True)

eigv_sum = sum(eig_vals)

print('\nVariance explained for LDA:\n')
variance_explained_LDA = []
for i,j in enumerate(eig_pairs):
    variance_explained_LDA.append((j[0]/eigv_sum).real)
    print('eigenvalue {0:}: {1:.2%}'.format(i+1, (j[0]/eigv_sum).real))


# If we take a look at the eigenvalues, we can already see that nComp-(c-1) (where c is 
# the number of class labels) eigenvalues are 0. The reason why these are close 
# to 0 is not that they are not informative.In LDA, the number of linear 
# discriminants is at most c−1.

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).