Skip to content

Commit

Permalink
Merge pull request #19 from longyangking/main
Browse files Browse the repository at this point in the history
Add interactive PCA and Kmeans
  • Loading branch information
longyangking authored Jun 28, 2024
2 parents 5fd6533 + e91f3fa commit d51842b
Show file tree
Hide file tree
Showing 9 changed files with 525 additions and 41 deletions.
18 changes: 18 additions & 0 deletions examples/interactive_Kmeans.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import sys
sys.path.append('..')

import numpy as np
from vidar import interactive_kmeans

rng = np.random.RandomState(0)
n_samples = 1000
cov = [[0.4, 0], [0, 0.4]]
X = np.concatenate([
rng.multivariate_normal(mean=[-2, 0], cov=cov, size=n_samples),
rng.multivariate_normal(mean=[2, 0], cov=cov, size=n_samples),
rng.multivariate_normal(mean=[0.3, 1], cov=cov, size=n_samples)
])

n_clusters = 2
app = interactive_kmeans(X, n_clusters)
app.show()
17 changes: 17 additions & 0 deletions examples/interactive_PCA.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import sys
sys.path.append('..')

import numpy as np
from vidar import interactive_PCA

rng = np.random.RandomState(0)
n_samples = 1000
cov = [[1, 0], [0, 1]]
X = np.concatenate([
rng.multivariate_normal(mean=[-2, 0], cov=cov, size=n_samples),
rng.multivariate_normal(mean=[2, 0], cov=cov, size=n_samples)])


n_components = 1
app = interactive_PCA(X, n_components)
app.show()
130 changes: 130 additions & 0 deletions notebooks/yang/Kmeans interactive _ more clusters.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# K-means interactive\n",
"\n",
"> Yang"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib qt\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from matplotlib.widgets import Button\n",
"from matplotlib.widgets import PolygonSelector\n",
"from sklearn.cluster import KMeans\n",
"\n",
"def colors_from_lbs(lbs, colors=None):\n",
" mpl_20 = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',\n",
" '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf',\n",
" '#3397dc', '#ff993e', '#3fca3f', '#df5152', '#a985ca',\n",
" '#ad7165', '#e992ce', '#999999', '#dbdc3c', '#35d8e9']\n",
" \n",
" if colors is None:\n",
" colors = np.array(mpl_20)\n",
" else:\n",
" colors = np.array(colors)\n",
" lbs = np.array(lbs) % len(colors)\n",
" return colors[lbs]\n",
"\n",
"rng = np.random.RandomState(0)\n",
"n_samples = 1000\n",
"cov = [[0.4, 0], [0, 0.4]]\n",
"X = np.concatenate([\n",
" rng.multivariate_normal(mean=[-2, 0], cov=cov, size=n_samples), \n",
" rng.multivariate_normal(mean=[2, 0], cov=cov, size=n_samples),\n",
" rng.multivariate_normal(mean=[0.5, 1], cov=cov, size=n_samples)\n",
" ])\n",
"\n",
"n_clusters=3\n",
"kmeans = KMeans(n_clusters=n_clusters, random_state=0, n_init=\"auto\")\n",
"labels = kmeans.fit_predict(X)\n",
"\n",
"centers = kmeans.cluster_centers_\n",
"\n",
"fig, (ax_orig, ax_redim) = plt.subplots(1, 2, figsize=(12, 6))\n",
"\n",
"def plot_figure(axe_list, X, centers):\n",
" ax_orig, ax_redim = axe_list\n",
"\n",
" kmeans.cluster_centers_ = np.array(centers, dtype=np.float64)\n",
" labels = kmeans.predict(X) \n",
"\n",
" ax_orig.clear()\n",
" ax_orig.scatter(X[:, 0], X[:, 1], alpha=0.3, label=\"samples\", c=colors_from_lbs(labels))\n",
" ax_orig.scatter(centers[:,0], centers[:,1], s=50, c='black', edgecolors='r')\n",
" ax_orig.set(\n",
" aspect=\"auto\", \n",
" title=\"Interactive K-means\",\n",
" xlabel=\"first feature\",\n",
" ylabel=\"second feature\",\n",
" )\n",
"\n",
" ax_redim.clear()\n",
" class_name = ['class {0}'.format(i+1) for i in range(len(centers))]\n",
"\n",
" # update labels\n",
" counts = [np.sum(labels==i) for i in range(len(centers))]\n",
" \n",
"\n",
" ax_redim.bar(class_name, counts, label=class_name, color=colors_from_lbs(range(len(centers))))\n",
" ax_redim.set(\n",
" aspect=\"auto\",\n",
" title=\"Clustering results\",\n",
" xlabel=\"Main feature\",\n",
" ylabel=\"Number of samples\",\n",
" )\n",
" fig.canvas.draw_idle()\n",
"\n",
"plot_figure((ax_orig, ax_redim), X, centers)\n",
"\n",
"def onselect(verts):\n",
" centers = np.array(verts)\n",
" plot_figure((ax_orig, ax_redim), X, centers)\n",
"\n",
"selector = PolygonSelector(ax_orig, onselect=onselect, \n",
" props=dict(color='r', linestyle='', linewidth=3, alpha=0.6, label=f\"Component\"))\n",
"selector.verts = centers\n",
"\n",
"plt.tight_layout()\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.19"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
124 changes: 124 additions & 0 deletions notebooks/yang/Kmeans interactive _ more.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# K-means interactive\n",
"\n",
"> Yang"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib qt\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from matplotlib.widgets import Button\n",
"from matplotlib.widgets import PolygonSelector\n",
"from sklearn.cluster import KMeans\n",
"\n",
"def colors_from_lbs(lbs, colors=None):\n",
" mpl_20 = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',\n",
" '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf',\n",
" '#3397dc', '#ff993e', '#3fca3f', '#df5152', '#a985ca',\n",
" '#ad7165', '#e992ce', '#999999', '#dbdc3c', '#35d8e9']\n",
" \n",
" if colors is None:\n",
" colors = np.array(mpl_20)\n",
" else:\n",
" colors = np.array(colors)\n",
" lbs = np.array(lbs) % len(colors)\n",
" return colors[lbs]\n",
"\n",
"rng = np.random.RandomState(0)\n",
"n_samples = 1000\n",
"cov = [[0.4, 0], [0, 0.4]]\n",
"X = np.concatenate([\n",
" rng.multivariate_normal(mean=[-2, 0], cov=cov, size=n_samples), \n",
" rng.multivariate_normal(mean=[2, 0], cov=cov, size=n_samples),\n",
" rng.multivariate_normal(mean=[0.3, 1], cov=cov, size=n_samples)\n",
" ])\n",
"\n",
"kmeans = KMeans(n_clusters=2, random_state=0, n_init=\"auto\")\n",
"labels = kmeans.fit_predict(X)\n",
"\n",
"centers = kmeans.cluster_centers_\n",
"\n",
"fig, (ax_orig, ax_redim) = plt.subplots(1, 2, figsize=(12, 6))\n",
"\n",
"def plot_figure(axe_list, X, centers):\n",
" ax_orig, ax_redim = axe_list\n",
"\n",
" kmeans.cluster_centers_ = np.array(centers, dtype=np.float64)\n",
" labels = kmeans.predict(X) \n",
"\n",
" ax_orig.clear()\n",
" ax_orig.scatter(X[:, 0], X[:, 1], alpha=0.3, label=\"samples\", c=colors_from_lbs(labels))\n",
" ax_orig.scatter(centers[:,0], centers[:,1], s=50, c='black', edgecolors='r')\n",
" ax_orig.set(\n",
" aspect=\"auto\", \n",
" title=\"Interactive K-means\",\n",
" xlabel=\"first feature\",\n",
" ylabel=\"second feature\",\n",
" )\n",
"\n",
" ax_redim.clear()\n",
" class_name = ['class {0}'.format(i+1) for i in range(len(centers))]\n",
"\n",
" # update labels\n",
" counts = [np.sum(labels==i) for i in range(len(centers))]\n",
" \n",
"\n",
" ax_redim.bar(class_name, counts, \n",
" label=class_name,\n",
" color=colors_from_lbs(range(len(centers))))\n",
" ax_redim.set(\n",
" aspect=\"auto\",\n",
" title=\"Clustering results\",\n",
" xlabel=\"Main feature\",\n",
" ylabel=\"Number of samples\",\n",
" )\n",
" fig.canvas.draw_idle()\n",
"\n",
"plot_figure((ax_orig, ax_redim), X, centers)\n",
"\n",
"def onselect(verts):\n",
" centers = np.array(verts)\n",
" plot_figure((ax_orig, ax_redim), X, centers)\n",
"\n",
"selector = PolygonSelector(ax_orig, onselect=onselect, \n",
" props=dict(color='r', linestyle='', linewidth=3, alpha=0.6, label=f\"Component\"))\n",
"selector.verts = centers\n",
"\n",
"plt.tight_layout()\n",
"plt.show()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.19"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
43 changes: 4 additions & 39 deletions notebooks/yang/Kmeans interactive.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -63,13 +63,13 @@
" ax_orig.scatter(centers[:,0], centers[:,1], s=50, c='black', edgecolors='r')\n",
" ax_orig.set(\n",
" aspect=\"auto\", \n",
" title=\"K-means\",\n",
" title=\"Interactive K-means\",\n",
" xlabel=\"first feature\",\n",
" ylabel=\"second feature\",\n",
" )\n",
"\n",
" ax_redim.clear()\n",
" class_name = ['class {0}'.format(i) for i in range(len(centers))]\n",
" class_name = ['class {0}'.format(i+1) for i in range(len(centers))]\n",
"\n",
" # update labels\n",
" counts = [np.sum(labels==i) for i in range(len(centers))]\n",
Expand All @@ -95,17 +95,6 @@
" props=dict(color='r', linestyle='', linewidth=3, alpha=0.6, label=f\"Component\"))\n",
"selector.verts = centers\n",
"\n",
"\n",
"# ax_redim.hist((X @ component.T - x_center @ component.T),50)\n",
"# ax_redim.set(\n",
"# aspect=\"auto\",\n",
"# title=\"1-dimensional dataset after dimension reduction\",\n",
"# xlabel=\"Main feature\",\n",
"# ylabel=\"Number of samples\",\n",
"# )\n",
"#_asp = np.diff(ax_orig.get_ylim())[0] / np.diff(ax_orig.get_xlim())[0]\n",
"#ax_redim.set_aspect(_asp)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()\n"
]
Expand All @@ -115,31 +104,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"# pca = PCA(n_components=1).fit(X)\n",
"# component = pca.components_.reshape(-1)\n",
"\n",
"# # print(pca.components_)\n",
"# # print(pca.explained_variance_)\n",
"# # print(list(zip(pca.components_, pca.explained_variance_)))\n",
"\n",
"# # fig, (ax_orig, ax_redim) = plt.subplots(1, 2, figsize=(12, 6))\n",
"# # ax_orig.scatter(X[:, 0], X[:, 1], alpha=0.3, label=\"samples\")\n",
"# # x_center = np.mean(X, axis=0)\n",
"\n",
"# comp_vector = [component, x_center]\n",
"\n",
"# ax_orig.set(\n",
"# aspect=\"auto\", \n",
"# title=\"2-dimensional dataset with principal components\",\n",
"# xlabel=\"first feature\",\n",
"# ylabel=\"second feature\",\n",
"# )\n",
"\n",
"\n"
]
"source": []
}
],
"metadata": {
Expand Down
Loading

0 comments on commit d51842b

Please sign in to comment.