JustPaste
HomeCategoriesAboutDonateContactTerms of UsePrivacy Policy
JustPaste

Free online notepad — write and share instantly

Navigate

  • Home
  • Timeline
  • Categories

Info

  • About
  • Donate
  • Contact

Legal

  • Terms of Use
  • Privacy Policy

© 2026 JustPaste.app. All rights reserved.

Made with ♥ by JustPaste

Untitled Page | JustPaste.app
8 months ago120 views
vbh
import numpy as np
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt

# Your Kmeans class goes here
class Kmeans:
    '''Implementing Kmeans algorithm.'''

    def __init__(self, n_clusters, max_iter=100, random_state=123):
        self.n_clusters = n_clusters
        self.max_iter = max_iter
        self.random_state = random_state

    def initializ_centroids(self, X):
        np.random.RandomState(self.random_state)
        random_idx = np.random.permutation(X.shape[0])
        centroids = X[random_idx[:self.n_clusters]]
        return centroids

    def compute_centroids(self, X, labels):
        centroids = np.zeros((self.n_clusters, X.shape[1]))
        for k in range(self.n_clusters):
            centroids[k, :] = np.mean(X[labels == k, :], axis=0)
        return centroids

    def compute_distance(self, X, centroids):
        distance = np.zeros((X.shape[0], self.n_clusters))
        for k in range(self.n_clusters):
            row_norm = norm(X - centroids[k, :], axis=1)
            distance[:, k] = np.square(row_norm)
        return distance

    def find_closest_cluster(self, distance):
        return np.argmin(distance, axis=1)

    def compute_sse(self, X, labels, centroids):
        distance = np.zeros(X.shape[0])
        for k in range(self.n_clusters):
            distance[labels == k] = norm(X[labels == k] - centroids[k], axis=1)
        return np.sum(np.square(distance))
    
    def fit(self, X):
        self.centroids = self.initializ_centroids(X)
        for i in range(self.max_iter):
            old_centroids = self.centroids.copy()
            distance = self.compute_distance(X, old_centroids)
            self.labels = self.find_closest_cluster(distance)
            self.centroids = self.compute_centroids(X, self.labels)
            if np.all(old_centroids == self.centroids):
                break
        self.error = self.compute_sse(X, self.labels, self.centroids)
    
    def predict(self, X):
        distance = self.compute_distance(X, self.centroids)
        return self.find_closest_cluster(distance)

# Load dataset (unsupervised, so no labels used)
iris = load_iris()
X = iris.data  # shape (150, 4)

# Instantiate and fit Kmeans
kmeans = Kmeans(n_clusters=3, max_iter=100, random_state=42)
kmeans.fit(X)

print("Cluster centers:\n", kmeans.centroids)
print("Cluster labels:", kmeans.labels)
print("SSE:", kmeans.error)

# Optional: visualize in 2D with first two features
plt.scatter(X[:, 0], X[:, 1], c=kmeans.labels, cmap='viridis', marker='o', alpha=0.6)
plt.scatter(kmeans.centroids[:, 0], kmeans.centroids[:, 1], s=200, c='red', marker='X')
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.title("K-means clustering on Iris dataset (unsupervised)")
plt.show()

⚠️Content was pasted as plain text and auto-formatted as a code block. Use the Code Block button in the editor for proper formatting.

← Back to timeline