- implemented KMeans - fixed non seeded rng - fixed display exception with NoTargets - added basic test cases to app
124 lines
4.5 KiB
Python
124 lines
4.5 KiB
Python
from abc import ABC, abstractmethod
|
|
from plot import Plot
|
|
from tqdm import tqdm
|
|
from learning.data import ConfusionMatrix, Dataset, Data, TargetType
|
|
|
|
import numpy as np
|
|
|
|
|
|
class MLAlgorithm(ABC):
|
|
""" Classe generica per gli algoritmi di Machine Learning """
|
|
_target_type: TargetType
|
|
_learnset: Data
|
|
_validset: Data
|
|
_testset: Data
|
|
_learn_loss: list[float]
|
|
_valid_loss: list[float]
|
|
|
|
def __init__(self, dataset:Dataset) -> None:
|
|
learn, test, valid = dataset.get_dataset()
|
|
self._target_type = dataset.target_type
|
|
self._learnset = learn
|
|
self._validset = valid
|
|
self._testset = test
|
|
|
|
def with_bias(self, x:np.ndarray) -> np.ndarray:
|
|
return np.hstack([x, np.ones(shape=(x.shape[0], 1))])
|
|
|
|
def learn(self, epochs:int, early_stop:float=0.0000001, max_patience:int=10, verbose:bool=False) -> tuple[int, list, list]:
|
|
learn = []
|
|
valid = []
|
|
count = 0
|
|
patience = 0
|
|
trange = range(epochs)
|
|
if verbose: trange = tqdm(trange, bar_format="Epochs {percentage:3.0f}% [{bar}] {elapsed}{postfix}")
|
|
|
|
try:
|
|
for _ in trange:
|
|
if count > 1 and valid[-2] - valid[-1] < early_stop:
|
|
if patience >= max_patience:
|
|
self._set_parameters(backup)
|
|
break
|
|
patience += 1
|
|
else:
|
|
backup = self._get_parameters()
|
|
patience = 0
|
|
|
|
count += 1
|
|
learn.append(self._learning_step())
|
|
valid.append(self.validation_loss())
|
|
|
|
if verbose: trange.set_postfix({"learn": f"{learn[-1]:2.5f}", "validation": f"{valid[-1]:2.5f}"})
|
|
except KeyboardInterrupt: pass
|
|
if verbose: print(f"Loop ended after {count} epochs")
|
|
|
|
self._learn_loss = learn
|
|
self._valid_loss = valid
|
|
return (count, learn, valid)
|
|
|
|
def learning_loss(self) -> float:
|
|
return self._predict_loss(self._learnset)
|
|
|
|
def validation_loss(self) -> float:
|
|
return self._predict_loss(self._validset)
|
|
|
|
def test_loss(self) -> float:
|
|
return self._predict_loss(self._testset)
|
|
|
|
def plot(self, skip:int=1000) -> None:
|
|
skip = skip if len(self._learn_loss) > skip else 0
|
|
plot = Plot("Loss", "Time", "Mean Loss")
|
|
plot.line("training", "blue", data=self._learn_loss[skip:])
|
|
plot.line("validation", "red", data=self._valid_loss[skip:])
|
|
plot.wait()
|
|
|
|
def display_results(self) -> None:
|
|
print("======== RESULT ========")
|
|
print(f"Loss learn : {self.learning_loss():0.5f}")
|
|
print(f"Loss valid : {self.validation_loss():0.5f}")
|
|
print(f"Loss test : {self.test_loss():0.5f}")
|
|
if self._target_type == TargetType.Regression:
|
|
print(f"R^2 : {self.test_r_squared():0.5f}")
|
|
elif self._target_type != TargetType.NoTarget:
|
|
conf = self.test_confusion_matrix()
|
|
print(f"Accuracy : {conf.accuracy():0.5f} - classes {conf.accuracy_per_class()}")
|
|
print(f"Precision : {conf.precision():0.5f} - classes {conf.precision_per_class()}")
|
|
print(f"Recall : {conf.recall():0.5f} - classes {conf.recall_per_class()}")
|
|
print(f"F1 score : {conf.f1_score():0.5f} - classes {conf.f1_score_per_class()}")
|
|
print(f"Specificity: {conf.specificity():0.5f} - classes {conf.specificity_per_class()}")
|
|
|
|
def test_confusion_matrix(self) -> ConfusionMatrix:
|
|
if self._target_type != TargetType.Classification\
|
|
and self._target_type != TargetType.MultiClassification:
|
|
return None
|
|
|
|
h0 = self._h0(self._testset.x)
|
|
y = self._testset.y
|
|
if h0.ndim == 1:
|
|
h0 = np.where(h0 > 0.5, 1, 0)
|
|
else:
|
|
h0 = np.argmax(h0, axis=1)
|
|
y = np.argmax(y, axis=1)
|
|
return ConfusionMatrix(y, h0)
|
|
|
|
def test_r_squared(self) -> float:
|
|
if self._target_type != TargetType.Regression:
|
|
return 0
|
|
|
|
h0 = self._h0(self._testset.x)
|
|
y_mean = np.mean(self._testset.y)
|
|
ss_total = np.sum((self._testset.y - y_mean) ** 2)
|
|
ss_resid = np.sum((self._testset.y - h0) ** 2)
|
|
return 1 - (ss_resid / ss_total)
|
|
|
|
@abstractmethod
|
|
def _h0(self, x:np.ndarray) -> np.ndarray: pass
|
|
@abstractmethod
|
|
def _learning_step(self) -> float: pass
|
|
@abstractmethod
|
|
def _predict_loss(self, dataset:Data) -> float: pass
|
|
@abstractmethod
|
|
def _get_parameters(self): pass
|
|
@abstractmethod
|
|
def _set_parameters(self, parameters): pass
|