0% found this document useful (0 votes)
10 views9 pages

From Import Import As Import As From Import From Import From Import From Import

Uploaded by

Mr Sonu
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
10 views9 pages

From Import Import As Import As From Import From Import From Import From Import

Uploaded by

Mr Sonu
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 9

from sklearn.

cluster import KMeans, BisectingKMeans,


AgglomerativeClustering, DBSCAN
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import dendrogram
from sklearn import metrics

# Data points
dataset1 = np.array([
[0.4005, 0.5306], # p1
[0.2148, 0.3854], # p2
[0.3457, 0.3156], # p3
[0.2652, 0.1875], # p4
[0.0789, 0.4139], # p5
[0.4548, 0.3022] # p6
])

# Number of clusters
k = 2

# Initialize KMeans with the number of clusters


kmeans = KMeans(n_clusters=k, random_state=0)# Fit the KMeans
algorithm to the data
kmeans.fit(dataset1)

KMeans(n_clusters=2, random_state=0)

# Get the cluster centers (centroids)


centroids = kmeans.cluster_centers_

# Get the labels for each point


labels = kmeans.labels_

# Plot the data points


plt.scatter(dataset1[:, 0], dataset1[:, 1], c=labels, cmap='viridis',
marker='o')

# Plot the centroids


plt.scatter(centroids[:, 0], centroids[:, 1], c='red', marker='x',
s=200, label='Centroids')

# Add labels and title


plt.xlabel('X Coordinate')
plt.ylabel('Y Coordinate')
plt.title('K-Means Clustering')
plt.legend()
plt.show()
centers = [[1, 1], [-1, -1], [1, -1]]
dataset2, labels_true = make_blobs(
n_samples=750, centers=centers, cluster_std=0.4, random_state=0
)

dataset2 = StandardScaler().fit_transform(dataset2)

plt.scatter(dataset2[:, 0], dataset2[:, 1])


plt.show()
kmeans2 = KMeans(n_clusters=3, random_state=42)
kmeans2.fit(dataset2)

KMeans(n_clusters=3, random_state=42)

# Get the cluster centers (centroids)


centroids = kmeans2.cluster_centers_

# Get the labels for each point


labels = kmeans2.labels_

# Plot the data points


plt.scatter(dataset2[:, 0], dataset2[:, 1], c=labels, cmap='viridis',
marker='o')

# Plot the centroids


plt.scatter(centroids[:, 0], centroids[:, 1], c='red', marker='x',
s=200, label='Centroids')

# Add labels and title


plt.xlabel('X Coordinate')
plt.ylabel('Y Coordinate')
plt.title('K-Means Clustering')
plt.legend()
plt.show()
# linkage = {‘ward’, ‘complete’, ‘average’, ‘single’}, default=’ward’
agg = AgglomerativeClustering(linkage='single')
# Fit the algorithm to the data
agg.fit(dataset1)

n_clusters = agg.n_clusters_

labels = agg.labels_

# Plot the data points


plt.scatter(dataset1[:, 0], dataset1[:, 1], c=labels, cmap='viridis',
marker='o')

# Add labels and title


plt.xlabel('X Coordinate')
plt.ylabel('Y Coordinate')
plt.title('Agglomerative Clustering')
plt.legend()
plt.show()

No artists with labels found to put in legend. Note that artists


whose label start with an underscore are ignored when legend() is
called with no argument.
def plot_dendrogram(model, **kwargs):
# Create linkage matrix and then plot the dendrogram

# create the counts of samples under each node


counts = np.zeros(model.children_.shape[0])
n_samples = len(model.labels_)
for i, merge in enumerate(model.children_):
current_count = 0
for child_idx in merge:
if child_idx < n_samples:
current_count += 1 # leaf node
else:
current_count += counts[child_idx - n_samples]
counts[i] = current_count

linkage_matrix = np.column_stack(
[model.children_, model.distances_, counts]
).astype(float)

# Plot the corresponding dendrogram


dendrogram(linkage_matrix, **kwargs)
# setting distance_threshold=0 ensures we compute the full tree.
model = AgglomerativeClustering(distance_threshold=0, n_clusters=None,
linkage='single')

model = model.fit(dataset1)

plt.title("Hierarchical Clustering Dendrogram")


# plot the top three levels of the dendrogram
plot_dendrogram(model, truncate_mode="level", p=3)
plt.xlabel("Number of points in node (or index of point if no
parenthesis).")
plt.show()

agg2 = AgglomerativeClustering(linkage='ward')
agg2.fit(dataset2)

# Get the labels for each point


labels = agg2.labels_

# Plot the data points


plt.scatter(dataset2[:, 0], dataset2[:, 1], c=labels, cmap='viridis',
marker='o')
# Add labels and title
plt.xlabel('X Coordinate')
plt.ylabel('Y Coordinate')
plt.title('Agglomerative Clustering')
plt.legend()
plt.show()

No artists with labels found to put in legend. Note that artists


whose label start with an underscore are ignored when legend() is
called with no argument.

db = DBSCAN(eps=0.3, min_samples=10).fit(dataset2)
labels = db.labels_

# Number of clusters in labels, ignoring noise if present.


n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

print("Estimated number of clusters: %d" % n_clusters_)


print("Estimated number of noise points: %d" % n_noise_)
Estimated number of clusters: 3
Estimated number of noise points: 18

print(f"Homogeneity: {metrics.homogeneity_score(labels_true,
labels):.3f}")
print(f"Completeness: {metrics.completeness_score(labels_true,
labels):.3f}")
print(f"V-measure: {metrics.v_measure_score(labels_true,
labels):.3f}")
print(f"Adjusted Rand Index: {metrics.adjusted_rand_score(labels_true,
labels):.3f}")
print(
"Adjusted Mutual Information:"
f" {metrics.adjusted_mutual_info_score(labels_true, labels):.3f}"
)
print(f"Silhouette Coefficient: {metrics.silhouette_score(dataset2,
labels):.3f}")

Homogeneity: 0.953
Completeness: 0.883
V-measure: 0.917
Adjusted Rand Index: 0.952
Adjusted Mutual Information: 0.916
Silhouette Coefficient: 0.626

unique_labels = set(labels)
core_samples_mask = np.zeros_like(labels, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True

colors = [plt.cm.Spectral(each) for each in np.linspace(0, 1,


len(unique_labels))]
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = [0, 0, 0, 1]

class_member_mask = labels == k

xy = dataset2[class_member_mask & core_samples_mask]


plt.plot(
xy[:, 0],
xy[:, 1],
"o",
markerfacecolor=tuple(col),
markeredgecolor="k",
markersize=14,
)

xy = dataset2[class_member_mask & ~core_samples_mask]


plt.plot(
xy[:, 0],
xy[:, 1],
"o",
markerfacecolor=tuple(col),
markeredgecolor="k",
markersize=6,
)

plt.title(f"Estimated number of clusters: {n_clusters_}")


plt.show()

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy