#!/usr/bin/python3
import sklearn.metrics as metrics
import numpy as np
from scipy.spatial.distance import pdist, euclidean
from sklearn.utils import safe_indexing
import sklearn.metrics as metrics
[docs]def davies_bouldin_score(X, labels):
"""
Taken from:
https://github.com/scikit-learn/scikit-learn/pull/12760
to avoid errors
"""
n_samples, _ = X.shape
n_labels =max(labels)+1
intra_dists = np.zeros(n_labels)
centroids = np.zeros((n_labels, len(X[0])), dtype=np.float)
for k in range(n_labels):
cluster_k = safe_indexing(X, labels == k)
centroid = cluster_k.mean(axis=0)
centroids[k] = centroid
intra_dists[k] = np.average(metrics.pairwise_distances(
cluster_k, [centroid]))
centroid_distances = metrics.pairwise_distances(centroids)
if np.allclose(intra_dists, 0) or np.allclose(centroid_distances, 0):
return 0.0
centroid_distances[centroid_distances == 0] = np.inf
combined_intra_dists = intra_dists[:, None] + intra_dists
scores = np.amax(combined_intra_dists / centroid_distances, axis=1)
return np.mean(scores)
[docs]def dunn_index(X, labels):
"""
Calculate the Dunn Index for the provided clustering
:param points: a numpy array of data points
:param labels: a numpy array of labels for each point
"""
n_labels = max(labels)+1
intra_dists = np.zeros(n_labels)
centroids = np.zeros((n_labels, len(X[0])), dtype=np.float)
for k in range(n_labels):
cluster_k = safe_indexing(X, labels == k)
centroid = cluster_k.mean(axis=0)
centroids[k] = centroid
intra_dists[k] = np.average(metrics.pairwise_distances(
cluster_k, [centroid]))
centroid_distances = metrics.pairwise_distances(centroids)
for i in range(0,len(centroid_distances)):
centroid_distances[i][i] = np.inf
return min([min(x) for x in centroid_distances])/max(intra_dists)
if __name__ == '__main__':
pass