Phase transitions, a measure of learning
https://rumble.com/vcj8fk-1.65-phase-transitions-a-measure-of-learning.html
import numpy as np
X2 = (X / 4) + np.array([2, 0])
X2 = X2.sample(X2.shape[0] // 10)
data = X.append(X1).append(X2)
plt.close()
plt.scatter(data[0], data[1])
from sklearn.cluster import KMeans
dists = []
for k in range(2, 10, 1):
faddc = Faddc(n_cluster=k, feature_count=2)
faddc.fit(data.values)
centroids_df = pd.DataFrame(faddc.m_centroids)
plt.close()
plt.scatter(X[0], X[1])
plt.scatter(X1[0], X1[1])
plt.scatter(X2[0], X2[1])
plt.scatter(centroids_df[0], centroids_df[1], s=faddc.m_count)
# k - 1, since the last centroids is always just the last data point
plt.savefig('faddc_k' + str(k - 1) + '_2scales.png', dpi=300)
kmeans = KMeans(n_clusters=k - 1, random_state=0).fit(data)
centroids_df = pd.DataFrame(kmeans.cluster_centers_)
plt.close()
plt.scatter(X[0], X[1])
plt.scatter(X1[0], X1[1])
plt.scatter(X2[0], X2[1])
labels_df = pd.DataFrame(kmeans.labels_)
count_df = labels_df[0].value_counts()
count_df = count_df.reset_index().sort_values('index')[0] # strange little thing, need to ensure it is ordered correctly for plot
plt.scatter(centroids_df[0], centroids_df[1], s=count_df)
plt.savefig('kmeans_k' + str(k - 1) + '_2scales.png', dpi=300)
y, y_err = faddc.predict(data.values)
dists.append([k, np.min(y_err, axis=1).sum(), kmeans.inertia_])
dists_df = pd.DataFrame(dists, columns=['k', 'faddc', 'kmeans'])
dists_df['faddc_diff'] = dists_df['faddc'] - dists_df['faddc'].shift(-1)
dists_df['kmeans_diff'] = dists_df['kmeans'] - dists_df['kmeans'].shift(-1)
dists_df['k'] = dists_df['k'] - 1
plt.close()
dists_df['faddc_diff'] = dists_df['faddc_diff'] / dists_df['faddc_diff'].max()
dists_df['kmeans_diff'] = dists_df['kmeans_diff'] / dists_df['kmeans_diff'].max()
plt.plot(dists_df['k'], dists_df['faddc_diff'], label='faddc')
plt.plot(dists_df['k'], dists_df['kmeans_diff'], label='kmeans')
Comments
Post a Comment