KMeans
- implemented KMeans - fixed non seeded rng - fixed display exception with NoTargets - added basic test cases to app
This commit is contained in:
@@ -83,7 +83,8 @@ class Dataset:
|
||||
splitted = [data[ data[:,0] == k ] for k in classes ]
|
||||
total_each = np.average([len(x) for x in splitted]).astype(int)
|
||||
|
||||
rng = np.random.default_rng()
|
||||
seed = np.random.randint(0, 4294967295)
|
||||
rng = np.random.default_rng(seed)
|
||||
data = []
|
||||
for x in splitted:
|
||||
samples = rng.choice(x, size=total_each, replace=True, shuffle=False)
|
||||
|
||||
@@ -79,7 +79,7 @@ class MLAlgorithm(ABC):
|
||||
print(f"Loss test : {self.test_loss():0.5f}")
|
||||
if self._target_type == TargetType.Regression:
|
||||
print(f"R^2 : {self.test_r_squared():0.5f}")
|
||||
else:
|
||||
elif self._target_type != TargetType.NoTarget:
|
||||
conf = self.test_confusion_matrix()
|
||||
print(f"Accuracy : {conf.accuracy():0.5f} - classes {conf.accuracy_per_class()}")
|
||||
print(f"Precision : {conf.precision():0.5f} - classes {conf.precision_per_class()}")
|
||||
|
||||
51
src/learning/unsupervised.py
Normal file
51
src/learning/unsupervised.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import math as math
|
||||
import numpy as np
|
||||
|
||||
from abc import abstractmethod
|
||||
from learning.ml import MLAlgorithm
|
||||
from learning.data import Dataset, Data
|
||||
NOT_ZERO = 1e-15
|
||||
|
||||
class KMeans(MLAlgorithm):
|
||||
def __init__(self, dataset: Dataset, clusters:int) -> None:
|
||||
super().__init__(dataset)
|
||||
dimensions = self._learnset.x.shape[1]
|
||||
self.total = clusters
|
||||
self.centroids = np.random.rand(clusters, dimensions)
|
||||
|
||||
def _h0(self, x:np.ndarray) -> np.ndarray:
|
||||
diff = x[:, np.newaxis] - self.centroids
|
||||
distances = np.linalg.norm(diff, axis=2)
|
||||
return np.argmin(distances, axis=1)
|
||||
|
||||
def _learning_step(self) -> float:
|
||||
assignments = self._h0(self._learnset.x)
|
||||
centroids = []
|
||||
|
||||
for k in range(self.total):
|
||||
assigned_points = self._learnset.x[assignments == k]
|
||||
|
||||
if len(assigned_points) > 0:
|
||||
mean = np.mean(assigned_points, axis=0)
|
||||
centroids.append(mean)
|
||||
else:
|
||||
self.total -= 1
|
||||
|
||||
self.centroids = np.array(centroids)
|
||||
return self._predict_loss(self._learnset)
|
||||
|
||||
def _predict_loss(self, dataset:Data) -> float:
|
||||
assignments = self._h0(dataset.x)
|
||||
loss = 0.0
|
||||
|
||||
for k in range(self.total):
|
||||
assigned_points = dataset.x[assignments == k]
|
||||
if len(assigned_points) > 0:
|
||||
diff = assigned_points - self.centroids[k]
|
||||
loss += np.sum(np.linalg.norm(diff, axis=1) ** 2)
|
||||
return loss
|
||||
|
||||
def _get_parameters(self):
|
||||
return self.centroids.copy()
|
||||
def _set_parameters(self, parameters):
|
||||
self.centroids = parameters
|
||||
Reference in New Issue
Block a user