# Demo entry 3973258

cogn-distance

Submitted by anonymous on Mar 10, 2016 at 12:27
Language: Python 3. Code size: 3.5 kB.

```from __future__ import division

import numpy as np
import pandas as pd
from scipy.spatial.distance import pdist, squareform

def ensure_symmetric(M):
m, n = M.shape
if m != n:
raise ValueError("M is not square!")

def barycenter(counts, coords):
"""Calculate the barycenter for the given counts and coordinates

Arguments
---------
counts : an iterable
Counts for each point (this should have length m)
coords : an m * n numpy.ndarray
Array containing coordinates for m points in n dimensions

Returns
-------
barycenter_coords : numpy.ndarray of length n
Coordinates of the barycenter (n dimensions)

"""
m, n = coords.shape

if len(counts) != m:
raise ValueError("'counts' should have the same number of items "
"(now: {}) as rows of 'coords' (now: {})".format(
len(counts), m))

# Transposing twice because of broadcasting rules
a = (coords.T * counts).T
return a.sum(axis=0) / sum(counts)

def sa_vector(counts, S, normalize=True):
"""Calculate the similarity adapted vector for the given counts and
coordinates

Arguments
---------
counts : an iterable
Counts for each point (this should have length m)
S : a symmetric numpy.ndarray
Similarity matrix
normalize : True|False
Whether to normalize cordinates

Returns
-------
coords : numpy.ndarray of length n
Coordinates of the barycenter (n dimensions)

"""
ensure_symmetric(S)

if len(counts) != len(S):
raise ValueError("'counts' should have the same number of items "
"(now: {}) as rows of similarity matrix (now: {})"
.format(len(counts), len(S)))

# Transposing twice because of broadcasting rules
raw_sa_vector = (S.T * counts).T.sum(axis=0)
return raw_sa_vector / raw_sa_vector.sum() if normalize else raw_sa_vector

def weighted_cosine(u, v, S):
ensure_symmetric(S)
if len(u) != len(v) != len(S):
raise ValueError("Vectors or similarity matrix of different length.")

u = u / np.sum(u)
v = v / np.sum(v)

return u.dot(S).dot(v) / np.sqrt(u.dot(S).dot(u) * v.dot(S).dot(v))

def as_square_matrix(M, compare_by, *args, **kwargs):
"""Calculate pairwise distances or similarities

Arguments
---------
M : an m * n numpy.ndarray
m observations in n-dimensional space
compare_by : a function to compare row vectors or a string
The function takes at least two vectors. Extra arguments are supplied
as args or kwargs. If string, this is passed to
`scipy.spatial.distance.pdist` (e.g., 'euclidean', 'minkowski' etc.).

"""
if isinstance(M, pd.DataFrame):
idx = M.index
M = M.as_matrix()
else:
idx = None

if callable(compare_by):
n = len(M)
S = np.empty((n, n))
# Also calculate diagonal, since we don't know if this is a distance or
# similarity measure
for i in range(n):
for j in range(i, n):
S[i, j] = S[j, i] = compare_by(M[i], M[j], *args, **kwargs)
else:
# Assume compare_by is a label like 'euclidean'
S = squareform(pdist(M, compare_by))

if isinstance(idx, pd.Index):
S = pd.DataFrame(S, index=idx, columns=idx)

return S
```

This snippet took 0.00 seconds to highlight.

Back to the Entry List or Home.