- implemented KMeans
- fixed non seeded rng
- fixed display exception with NoTargets
- added basic test cases to app
This commit is contained in:
2024-08-12 22:09:41 +02:00
parent 7739878a2c
commit 8e8e0b2d51
4 changed files with 83 additions and 5 deletions

View File

@@ -2,12 +2,14 @@ import random
from typing import Any from typing import Any
import numpy as np import numpy as np
import sklearn import sklearn
import sklearn.cluster
import sklearn.linear_model import sklearn.linear_model
import sklearn.model_selection import sklearn.model_selection
import sklearn.neural_network import sklearn.neural_network
from learning.data import Dataset, TargetType from learning.data import Dataset, TargetType
from learning.supervised import LinearRegression, LogisticRegression, MultiLayerPerceptron from learning.supervised import LinearRegression, LogisticRegression, MultiLayerPerceptron
from learning.ml import MLAlgorithm from learning.ml import MLAlgorithm
from learning.unsupervised import KMeans
DATASET = "datasets/" DATASET = "datasets/"
REGRESSION = DATASET + "regression/" REGRESSION = DATASET + "regression/"
@@ -75,6 +77,23 @@ def iris() -> tuple[Dataset, MLAlgorithm, Any]:
size = [4, 3] size = [4, 3]
return (ds, MultiLayerPerceptron(ds, size), sklearn.neural_network.MLPClassifier(size, 'relu')) return (ds, MultiLayerPerceptron(ds, size), sklearn.neural_network.MLPClassifier(size, 'relu'))
# ********************
# MultiLayerPerceptron
# ********************
def frogs_no_target() -> tuple[Dataset, MLAlgorithm, Any]:
ds = Dataset(CLASSIFICATION + "frogs.csv", "Species", TargetType.NoTarget)
ds.remove(["Family", "Genus", "RecordID", "Species"])
clusters = 10
return (ds, KMeans(ds, clusters), sklearn.cluster.KMeans(clusters))
def iris_no_target() -> tuple[Dataset, MLAlgorithm, Any]:
ds = Dataset(CLASSIFICATION + "iris.csv", "Class", TargetType.NoTarget)
ds.remove(["Class"])
ds.normalize()
clusters = 3
return (ds, KMeans(ds, clusters), sklearn.cluster.KMeans(clusters))
# ******************** # ********************
# Main & random # Main & random
# ******************** # ********************
@@ -82,17 +101,24 @@ def iris() -> tuple[Dataset, MLAlgorithm, Any]:
if __name__ == "__main__": if __name__ == "__main__":
np.set_printoptions(linewidth=np.inf, formatter={'float': '{:>10.5f}'.format}) np.set_printoptions(linewidth=np.inf, formatter={'float': '{:>10.5f}'.format})
rand = random.randint(0, 4294967295) rand = random.randint(0, 4294967295)
#rand = 1997847910 # LiR for power_plant
#rand = 347617386 # LoR for electrical_grid
#rand = 1793295160 # MLP for iris
#rand = 885416001 # KMe for frogs_no_target
np.random.seed(rand) np.random.seed(rand)
print(f"Using seed: {rand}") print(f"Using seed: {rand}")
ds, ml, sk = electrical_grid() ds, ml, sk = iris()
ml.learn(10000, verbose=True)
epochs, _, _ = ml.learn(1000, verbose=True)
ml.display_results() ml.display_results()
np.random.seed(rand) np.random.seed(rand)
learn, test, valid = ds.get_dataset() learn, test, valid = ds.get_dataset()
sk.set_params(max_iter=epochs)
sk.fit(learn.x, learn.y) sk.fit(learn.x, learn.y)
print(f"Sklearn : {sk.score(test.x, test.y):0.5f}") print(f"Sklearn : {abs(sk.score(test.x, test.y)):0.5f}")
print("========================") print("========================")
ml.plot() ml.plot()

View File

@@ -83,7 +83,8 @@ class Dataset:
splitted = [data[ data[:,0] == k ] for k in classes ] splitted = [data[ data[:,0] == k ] for k in classes ]
total_each = np.average([len(x) for x in splitted]).astype(int) total_each = np.average([len(x) for x in splitted]).astype(int)
rng = np.random.default_rng() seed = np.random.randint(0, 4294967295)
rng = np.random.default_rng(seed)
data = [] data = []
for x in splitted: for x in splitted:
samples = rng.choice(x, size=total_each, replace=True, shuffle=False) samples = rng.choice(x, size=total_each, replace=True, shuffle=False)

View File

@@ -79,7 +79,7 @@ class MLAlgorithm(ABC):
print(f"Loss test : {self.test_loss():0.5f}") print(f"Loss test : {self.test_loss():0.5f}")
if self._target_type == TargetType.Regression: if self._target_type == TargetType.Regression:
print(f"R^2 : {self.test_r_squared():0.5f}") print(f"R^2 : {self.test_r_squared():0.5f}")
else: elif self._target_type != TargetType.NoTarget:
conf = self.test_confusion_matrix() conf = self.test_confusion_matrix()
print(f"Accuracy : {conf.accuracy():0.5f} - classes {conf.accuracy_per_class()}") print(f"Accuracy : {conf.accuracy():0.5f} - classes {conf.accuracy_per_class()}")
print(f"Precision : {conf.precision():0.5f} - classes {conf.precision_per_class()}") print(f"Precision : {conf.precision():0.5f} - classes {conf.precision_per_class()}")

View File

@@ -0,0 +1,51 @@
import math as math
import numpy as np
from abc import abstractmethod
from learning.ml import MLAlgorithm
from learning.data import Dataset, Data
NOT_ZERO = 1e-15
class KMeans(MLAlgorithm):
def __init__(self, dataset: Dataset, clusters:int) -> None:
super().__init__(dataset)
dimensions = self._learnset.x.shape[1]
self.total = clusters
self.centroids = np.random.rand(clusters, dimensions)
def _h0(self, x:np.ndarray) -> np.ndarray:
diff = x[:, np.newaxis] - self.centroids
distances = np.linalg.norm(diff, axis=2)
return np.argmin(distances, axis=1)
def _learning_step(self) -> float:
assignments = self._h0(self._learnset.x)
centroids = []
for k in range(self.total):
assigned_points = self._learnset.x[assignments == k]
if len(assigned_points) > 0:
mean = np.mean(assigned_points, axis=0)
centroids.append(mean)
else:
self.total -= 1
self.centroids = np.array(centroids)
return self._predict_loss(self._learnset)
def _predict_loss(self, dataset:Data) -> float:
assignments = self._h0(dataset.x)
loss = 0.0
for k in range(self.total):
assigned_points = dataset.x[assignments == k]
if len(assigned_points) > 0:
diff = assigned_points - self.centroids[k]
loss += np.sum(np.linalg.norm(diff, axis=1) ** 2)
return loss
def _get_parameters(self):
return self.centroids.copy()
def _set_parameters(self, parameters):
self.centroids = parameters