diff --git a/src/app.py b/src/app.py index 3fac393..9e3c932 100644 --- a/src/app.py +++ b/src/app.py @@ -1,14 +1,14 @@ -import random -from typing import Any import numpy as np import sklearn import sklearn.cluster import sklearn.linear_model import sklearn.model_selection import sklearn.neural_network + +from typing import Any +from learning.ml import MLAlgorithm from learning.data import Dataset, TargetType from learning.supervised import LinearRegression, LogisticRegression, MultiLayerPerceptron -from learning.ml import MLAlgorithm from learning.unsupervised import KMeans DATASET = "datasets/" @@ -67,8 +67,8 @@ def frogs() -> tuple[Dataset, MLAlgorithm, Any]: ds = Dataset(CLASSIFICATION + "frogs.csv", "Species", TargetType.MultiClassification) ds.remove(["Family", "Genus", "RecordID"]) ds.factorize(["Species"]) - size = [8, 5] - return (ds, MultiLayerPerceptron(ds, size, 0.1), sklearn.neural_network.MLPClassifier(size, 'relu')) + size = [18, 15, 12, 10, 8] + return (ds, MultiLayerPerceptron(ds, size), sklearn.neural_network.MLPClassifier(size, 'relu')) def iris() -> tuple[Dataset, MLAlgorithm, Any]: ds = Dataset(CLASSIFICATION + "iris.csv", "Class", TargetType.MultiClassification) @@ -100,16 +100,17 @@ def iris_no_target() -> tuple[Dataset, MLAlgorithm, Any]: if __name__ == "__main__": np.set_printoptions(linewidth=np.inf, formatter={'float': '{:>10.5f}'.format}) - rand = random.randint(0, 4294967295) + rand = np.random.randint(0, 4294967295) #rand = 1997847910 # LiR for power_plant #rand = 347617386 # LoR for electrical_grid #rand = 1793295160 # MLP for iris + #rand = 2914000170 # MLP for frogs #rand = 885416001 # KMe for frogs_no_target np.random.seed(rand) print(f"Using seed: {rand}") - ds, ml, sk = iris() + ds, ml, sk = frogs() epochs, _, _ = ml.learn(1000, verbose=True) ml.display_results() @@ -122,7 +123,3 @@ if __name__ == "__main__": print("========================") ml.plot() - -# migliori parametri trovati per electrical_grid -# temp = np.array([-48.28601, 0.00429, 0.07933, 0.02144, -0.04225, 0.36898, 0.24723, 0.36445, 0.21437, 0.29666, 0.22532, 0.38619, 0.24171, -113.65430]) -# ml._set_parameters(temp) diff --git a/src/learning/data.py b/src/learning/data.py index 7678b21..03b3fd8 100644 --- a/src/learning/data.py +++ b/src/learning/data.py @@ -1,5 +1,5 @@ -import pandas as pd import numpy as np +import pandas as pd from enum import Enum from typing_extensions import Self @@ -184,16 +184,3 @@ class ConfusionMatrix: specificity_per_class = self.specificity_per_class() support = np.sum(self.matrix, axis=1) return np.average(specificity_per_class, weights=support) - - -if __name__ == "__main__": - ds = Dataset("datasets\\classification\\frogs.csv", "Species", TargetType.MultiClassification) - ds.remove(["Family", "Genus", "RecordID"]) - ds.factorize(["Species"]) - - np.random.seed(0) - learn, test, valid = ds.get_dataset() - print(learn) - print(test) - print(valid) - diff --git a/src/learning/functions.py b/src/learning/functions.py new file mode 100644 index 0000000..d5b6068 --- /dev/null +++ b/src/learning/functions.py @@ -0,0 +1,55 @@ +import numpy as np + +NOT_ZERO = 1e-15 +LEAKY_RELU = 0.2 + + +# ********** +# For NN +# ********** + +def relu(x:np.ndarray) -> np.ndarray: + return np.where(x < 0, 0, x) +def relu_derivative(x:np.ndarray) -> np.ndarray: + return np.where(x < 0, 0, 1) + +def lrelu(x:np.ndarray) -> np.ndarray: + return np.where(x < 0, LEAKY_RELU * x, x) +def lrelu_derivative(x:np.ndarray) -> np.ndarray: + return np.where(x < 0, LEAKY_RELU, 1) + +def softmax(x:np.ndarray) -> np.ndarray: + x = x - np.max(x, axis=1, keepdims=True) # for overflow + exp_x = np.exp(x) + sum_x = np.sum(exp_x, axis=1, keepdims=True) + return exp_x / sum_x +def softmax_derivative(h0:np.ndarray, y:np.ndarray) -> np.ndarray: + return h0 - y + +# ********** +# For loss +# ********** + +def square_loss(h0:np.ndarray, y:np.ndarray) -> float: + return np.mean((h0 - y) ** 2) / 2 + +def log_loss(h0:np.ndarray, y:np.ndarray) -> float: + return np.mean(- y*np.log(h0 + NOT_ZERO) - (1-y)*np.log(1-h0 + NOT_ZERO)) + +def cross_entropy_loss(h0:np.ndarray, y:np.ndarray) -> float: + return -np.mean(np.sum(y*np.log(h0 + NOT_ZERO), axis=1)) # mean is not "correct", but useful for comparing models + + +# ********** +# Randoms +# ********** + +def r_squared(h0:np.ndarray, y:np.ndarray) -> float: + y_mean = np.mean(y) + ss_resid = np.sum((y - h0) ** 2) + ss_total = np.sum((y - y_mean) ** 2) + return 1 - (ss_resid / ss_total) + +def with_bias(x:np.ndarray) -> np.ndarray: + ones = np.ones(shape=(x.shape[0], 1)) + return np.hstack([x, ones]) diff --git a/src/learning/ml.py b/src/learning/ml.py index e1a4cb0..a72f1d4 100644 --- a/src/learning/ml.py +++ b/src/learning/ml.py @@ -1,10 +1,11 @@ +import sys +import numpy as np + from abc import ABC, abstractmethod from plot import Plot from tqdm import tqdm from learning.data import ConfusionMatrix, Dataset, Data, TargetType - -import numpy as np - +from learning.functions import r_squared class MLAlgorithm(ABC): """ Classe generica per gli algoritmi di Machine Learning """ @@ -22,14 +23,12 @@ class MLAlgorithm(ABC): self._validset = valid self._testset = test - def with_bias(self, x:np.ndarray) -> np.ndarray: - return np.hstack([x, np.ones(shape=(x.shape[0], 1))]) - def learn(self, epochs:int, early_stop:float=0.0000001, max_patience:int=10, verbose:bool=False) -> tuple[int, list, list]: learn = [] valid = [] count = 0 patience = 0 + best = (sys.float_info.max, []) trange = range(epochs) if verbose: trange = tqdm(trange, bar_format="Epochs {percentage:3.0f}% [{bar}] {elapsed}{postfix}") @@ -45,13 +44,19 @@ class MLAlgorithm(ABC): patience = 0 count += 1 - learn.append(self._learning_step()) - valid.append(self.validation_loss()) + learn_loss = self._learning_step() + valid_loss = self.validation_loss() + if valid_loss < best[0]: + best = (valid_loss, self._get_parameters()) + + learn.append(learn_loss) + valid.append(valid_loss) if verbose: trange.set_postfix({"learn": f"{learn[-1]:2.5f}", "validation": f"{valid[-1]:2.5f}"}) except KeyboardInterrupt: pass if verbose: print(f"Loop ended after {count} epochs") + self._set_parameters(best[1]) self._learn_loss = learn self._valid_loss = valid return (count, learn, valid) @@ -104,12 +109,7 @@ class MLAlgorithm(ABC): def test_r_squared(self) -> float: if self._target_type != TargetType.Regression: return 0 - - h0 = self._h0(self._testset.x) - y_mean = np.mean(self._testset.y) - ss_total = np.sum((self._testset.y - y_mean) ** 2) - ss_resid = np.sum((self._testset.y - h0) ** 2) - return 1 - (ss_resid / ss_total) + return r_squared(self._h0(self._testset.x), self._testset.y) @abstractmethod def _h0(self, x:np.ndarray) -> np.ndarray: pass diff --git a/src/learning/supervised.py b/src/learning/supervised.py index edbab38..aac983f 100644 --- a/src/learning/supervised.py +++ b/src/learning/supervised.py @@ -4,7 +4,7 @@ import numpy as np from abc import abstractmethod from learning.ml import MLAlgorithm from learning.data import Dataset, Data -NOT_ZERO = 1e-15 +from learning.functions import cross_entropy_loss, log_loss, lrelu, lrelu_derivative, softmax, softmax_derivative, square_loss, with_bias class GradientDescent(MLAlgorithm): theta:np.ndarray @@ -22,12 +22,12 @@ class GradientDescent(MLAlgorithm): regularization = (self.lambd / m) * self.theta regularization[0] = 0 - derivative = self.alpha * (1/m) * np.sum((self._h0(x) - y) * self.with_bias(x).T, axis=1) + derivative = self.alpha * np.mean((self._h0(x) - y) * with_bias(x).T, axis=1) self.theta -= derivative + regularization - return self._loss(x, y, m) + return self._loss(x, y) def _predict_loss(self, dataset:Data) -> float: - return self._loss(dataset.x, dataset.y, dataset.size) + return self._loss(dataset.x, dataset.y) def _get_parameters(self): return self.theta.copy() @@ -36,31 +36,30 @@ class GradientDescent(MLAlgorithm): self.theta = parameters @abstractmethod - def _loss(self, x:np.ndarray, y:np.ndarray, m:int) -> float: pass - + def _loss(self, x:np.ndarray, y:np.ndarray) -> float: pass class LinearRegression(GradientDescent): def _h0(self, x: np.ndarray) -> np.ndarray: - return self.theta.dot(self.with_bias(x).T) + return self.theta.dot(with_bias(x).T) - def _loss(self, x:np.ndarray, y:np.ndarray, m:int) -> float: - diff = (self._h0(x) - y) - return 1/(2*m) * np.sum(diff ** 2) + def _loss(self, x:np.ndarray, y:np.ndarray) -> float: + return square_loss(self._h0(x), y) class LogisticRegression(GradientDescent): def _h0(self, x: np.ndarray) -> np.ndarray: - return 1 / (1 + np.exp(-self.theta.dot(self.with_bias(x).T))) + return 1 / (1 + np.exp(-self.theta.dot(with_bias(x).T))) - def _loss(self, x:np.ndarray, y:np.ndarray, m:int) -> float: - h0 = self._h0(x) - diff = - y*np.log(h0 + NOT_ZERO) - (1-y)*np.log(1-h0 + NOT_ZERO) - return 1/m * np.sum(diff) + def _loss(self, x:np.ndarray, y:np.ndarray) -> float: + return log_loss(self._h0(x), y) class MultiLayerPerceptron(MLAlgorithm): layers: list[np.ndarray] activations: list[np.ndarray] + previous_delta: list[np.ndarray] + momentum: float + learning_rate: float - def __init__(self, dataset:Dataset, layers:list[int], learning_rate:float=0.1) -> None: + def __init__(self, dataset:Dataset, layers:list[int], learning_rate:float=0.1, momentum:float=0.9) -> None: super().__init__(dataset) input = self._learnset.x.shape[1] output = self._learnset.y.shape[1] @@ -71,54 +70,52 @@ class MultiLayerPerceptron(MLAlgorithm): self.layers = [] self.activations = [] + self.previous_delta = [] + self.momentum = momentum self.learning_rate = learning_rate for next in layers: current = np.random.rand(input + 1, next) * np.sqrt(2 / input) # +1 bias, sqrt is He init self.layers.append(current) + self.previous_delta.append(np.zeros(current.shape)) input = next def _h0(self, x:np.ndarray) -> np.ndarray: self.activations = [x] for layer in self.layers: - x = self.with_bias(x) - x = x.dot(layer) - x = x * (x > 0) # activation function ReLU + x = lrelu(with_bias(x).dot(layer)) self.activations.append(x) # saving activation result - return self.softmax(x) + return softmax(x) def _learning_step(self) -> float: x, y, m, _ = self._learnset.as_tuple() - delta = self._h0(x) - y # first term is derivative of softmax + delta = softmax_derivative(self._h0(x), y) for l in reversed(range(len(self.layers))): activation = self.activations[l] - deltaW = np.dot(self.with_bias(activation).T, delta) / m + deltaW = np.dot(with_bias(activation).T, delta) / m + deltaW *= self.learning_rate + deltaW += self.momentum * self.previous_delta[l] - if l > 0: - delta = np.dot(delta, self.layers[l][:-1].T) # ignoring bias - delta[activation <= 0] = 0 # derivative ReLU - self.layers[l] -= deltaW * self.learning_rate + delta = np.dot(delta, self.layers[l][:-1].T) # ignoring bias + delta *= lrelu_derivative(activation) + + self.layers[l] -= deltaW + self.previous_delta[l] = deltaW return self._predict_loss(self._learnset) - def softmax(self, input:np.ndarray) -> np.ndarray: - input = input - np.max(input, axis=1, keepdims=True) # for overflow - exp_input = np.exp(input) - total_sum = np.sum(exp_input, axis=1, keepdims=True) - return exp_input / total_sum - - def _predict_loss(self, dataset:Data) -> float: # cross-entropy - diff = dataset.y * np.log(self._h0(dataset.x) + NOT_ZERO) - return -np.mean(np.sum(diff, axis=1)) - + def _predict_loss(self, dataset:Data) -> float: + return cross_entropy_loss(self._h0(dataset.x), dataset.y) def _get_parameters(self): - parameters = [] - for x in self.layers: - parameters.append(x.copy()) + parameters = { 'layers': [], 'previous_delta': [] } + for x in range(len(self.layers)): + parameters['layers'].append(self.layers[x].copy()) + parameters['previous_delta'].append(self.previous_delta[x].copy()) return parameters def _set_parameters(self, parameters): - self.layers = parameters + self.layers = parameters['layers'] + self.previous_delta = parameters['previous_delta'] diff --git a/src/learning/unsupervised.py b/src/learning/unsupervised.py index e9320e1..0e7aee1 100644 --- a/src/learning/unsupervised.py +++ b/src/learning/unsupervised.py @@ -1,10 +1,8 @@ import math as math import numpy as np -from abc import abstractmethod from learning.ml import MLAlgorithm from learning.data import Dataset, Data -NOT_ZERO = 1e-15 class KMeans(MLAlgorithm): def __init__(self, dataset: Dataset, clusters:int) -> None: