- moved common functions to separate file
- removed unused values and imports
- added momentum to NN
This commit is contained in:
2024-08-14 08:17:38 +02:00
parent 8e8e0b2d51
commit 142fe5ccdf
6 changed files with 115 additions and 81 deletions

View File

@@ -1,14 +1,14 @@
import random
from typing import Any
import numpy as np import numpy as np
import sklearn import sklearn
import sklearn.cluster import sklearn.cluster
import sklearn.linear_model import sklearn.linear_model
import sklearn.model_selection import sklearn.model_selection
import sklearn.neural_network import sklearn.neural_network
from typing import Any
from learning.ml import MLAlgorithm
from learning.data import Dataset, TargetType from learning.data import Dataset, TargetType
from learning.supervised import LinearRegression, LogisticRegression, MultiLayerPerceptron from learning.supervised import LinearRegression, LogisticRegression, MultiLayerPerceptron
from learning.ml import MLAlgorithm
from learning.unsupervised import KMeans from learning.unsupervised import KMeans
DATASET = "datasets/" DATASET = "datasets/"
@@ -67,8 +67,8 @@ def frogs() -> tuple[Dataset, MLAlgorithm, Any]:
ds = Dataset(CLASSIFICATION + "frogs.csv", "Species", TargetType.MultiClassification) ds = Dataset(CLASSIFICATION + "frogs.csv", "Species", TargetType.MultiClassification)
ds.remove(["Family", "Genus", "RecordID"]) ds.remove(["Family", "Genus", "RecordID"])
ds.factorize(["Species"]) ds.factorize(["Species"])
size = [8, 5] size = [18, 15, 12, 10, 8]
return (ds, MultiLayerPerceptron(ds, size, 0.1), sklearn.neural_network.MLPClassifier(size, 'relu')) return (ds, MultiLayerPerceptron(ds, size), sklearn.neural_network.MLPClassifier(size, 'relu'))
def iris() -> tuple[Dataset, MLAlgorithm, Any]: def iris() -> tuple[Dataset, MLAlgorithm, Any]:
ds = Dataset(CLASSIFICATION + "iris.csv", "Class", TargetType.MultiClassification) ds = Dataset(CLASSIFICATION + "iris.csv", "Class", TargetType.MultiClassification)
@@ -100,16 +100,17 @@ def iris_no_target() -> tuple[Dataset, MLAlgorithm, Any]:
if __name__ == "__main__": if __name__ == "__main__":
np.set_printoptions(linewidth=np.inf, formatter={'float': '{:>10.5f}'.format}) np.set_printoptions(linewidth=np.inf, formatter={'float': '{:>10.5f}'.format})
rand = random.randint(0, 4294967295) rand = np.random.randint(0, 4294967295)
#rand = 1997847910 # LiR for power_plant #rand = 1997847910 # LiR for power_plant
#rand = 347617386 # LoR for electrical_grid #rand = 347617386 # LoR for electrical_grid
#rand = 1793295160 # MLP for iris #rand = 1793295160 # MLP for iris
#rand = 2914000170 # MLP for frogs
#rand = 885416001 # KMe for frogs_no_target #rand = 885416001 # KMe for frogs_no_target
np.random.seed(rand) np.random.seed(rand)
print(f"Using seed: {rand}") print(f"Using seed: {rand}")
ds, ml, sk = iris() ds, ml, sk = frogs()
epochs, _, _ = ml.learn(1000, verbose=True) epochs, _, _ = ml.learn(1000, verbose=True)
ml.display_results() ml.display_results()
@@ -122,7 +123,3 @@ if __name__ == "__main__":
print("========================") print("========================")
ml.plot() ml.plot()
# migliori parametri trovati per electrical_grid
# temp = np.array([-48.28601, 0.00429, 0.07933, 0.02144, -0.04225, 0.36898, 0.24723, 0.36445, 0.21437, 0.29666, 0.22532, 0.38619, 0.24171, -113.65430])
# ml._set_parameters(temp)

View File

@@ -1,5 +1,5 @@
import pandas as pd
import numpy as np import numpy as np
import pandas as pd
from enum import Enum from enum import Enum
from typing_extensions import Self from typing_extensions import Self
@@ -184,16 +184,3 @@ class ConfusionMatrix:
specificity_per_class = self.specificity_per_class() specificity_per_class = self.specificity_per_class()
support = np.sum(self.matrix, axis=1) support = np.sum(self.matrix, axis=1)
return np.average(specificity_per_class, weights=support) return np.average(specificity_per_class, weights=support)
if __name__ == "__main__":
ds = Dataset("datasets\\classification\\frogs.csv", "Species", TargetType.MultiClassification)
ds.remove(["Family", "Genus", "RecordID"])
ds.factorize(["Species"])
np.random.seed(0)
learn, test, valid = ds.get_dataset()
print(learn)
print(test)
print(valid)

55
src/learning/functions.py Normal file
View File

@@ -0,0 +1,55 @@
import numpy as np
NOT_ZERO = 1e-15
LEAKY_RELU = 0.2
# **********
# For NN
# **********
def relu(x:np.ndarray) -> np.ndarray:
return np.where(x < 0, 0, x)
def relu_derivative(x:np.ndarray) -> np.ndarray:
return np.where(x < 0, 0, 1)
def lrelu(x:np.ndarray) -> np.ndarray:
return np.where(x < 0, LEAKY_RELU * x, x)
def lrelu_derivative(x:np.ndarray) -> np.ndarray:
return np.where(x < 0, LEAKY_RELU, 1)
def softmax(x:np.ndarray) -> np.ndarray:
x = x - np.max(x, axis=1, keepdims=True) # for overflow
exp_x = np.exp(x)
sum_x = np.sum(exp_x, axis=1, keepdims=True)
return exp_x / sum_x
def softmax_derivative(h0:np.ndarray, y:np.ndarray) -> np.ndarray:
return h0 - y
# **********
# For loss
# **********
def square_loss(h0:np.ndarray, y:np.ndarray) -> float:
return np.mean((h0 - y) ** 2) / 2
def log_loss(h0:np.ndarray, y:np.ndarray) -> float:
return np.mean(- y*np.log(h0 + NOT_ZERO) - (1-y)*np.log(1-h0 + NOT_ZERO))
def cross_entropy_loss(h0:np.ndarray, y:np.ndarray) -> float:
return -np.mean(np.sum(y*np.log(h0 + NOT_ZERO), axis=1)) # mean is not "correct", but useful for comparing models
# **********
# Randoms
# **********
def r_squared(h0:np.ndarray, y:np.ndarray) -> float:
y_mean = np.mean(y)
ss_resid = np.sum((y - h0) ** 2)
ss_total = np.sum((y - y_mean) ** 2)
return 1 - (ss_resid / ss_total)
def with_bias(x:np.ndarray) -> np.ndarray:
ones = np.ones(shape=(x.shape[0], 1))
return np.hstack([x, ones])

View File

@@ -1,10 +1,11 @@
import sys
import numpy as np
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from plot import Plot from plot import Plot
from tqdm import tqdm from tqdm import tqdm
from learning.data import ConfusionMatrix, Dataset, Data, TargetType from learning.data import ConfusionMatrix, Dataset, Data, TargetType
from learning.functions import r_squared
import numpy as np
class MLAlgorithm(ABC): class MLAlgorithm(ABC):
""" Classe generica per gli algoritmi di Machine Learning """ """ Classe generica per gli algoritmi di Machine Learning """
@@ -22,14 +23,12 @@ class MLAlgorithm(ABC):
self._validset = valid self._validset = valid
self._testset = test self._testset = test
def with_bias(self, x:np.ndarray) -> np.ndarray:
return np.hstack([x, np.ones(shape=(x.shape[0], 1))])
def learn(self, epochs:int, early_stop:float=0.0000001, max_patience:int=10, verbose:bool=False) -> tuple[int, list, list]: def learn(self, epochs:int, early_stop:float=0.0000001, max_patience:int=10, verbose:bool=False) -> tuple[int, list, list]:
learn = [] learn = []
valid = [] valid = []
count = 0 count = 0
patience = 0 patience = 0
best = (sys.float_info.max, [])
trange = range(epochs) trange = range(epochs)
if verbose: trange = tqdm(trange, bar_format="Epochs {percentage:3.0f}% [{bar}] {elapsed}{postfix}") if verbose: trange = tqdm(trange, bar_format="Epochs {percentage:3.0f}% [{bar}] {elapsed}{postfix}")
@@ -45,13 +44,19 @@ class MLAlgorithm(ABC):
patience = 0 patience = 0
count += 1 count += 1
learn.append(self._learning_step())
valid.append(self.validation_loss())
learn_loss = self._learning_step()
valid_loss = self.validation_loss()
if valid_loss < best[0]:
best = (valid_loss, self._get_parameters())
learn.append(learn_loss)
valid.append(valid_loss)
if verbose: trange.set_postfix({"learn": f"{learn[-1]:2.5f}", "validation": f"{valid[-1]:2.5f}"}) if verbose: trange.set_postfix({"learn": f"{learn[-1]:2.5f}", "validation": f"{valid[-1]:2.5f}"})
except KeyboardInterrupt: pass except KeyboardInterrupt: pass
if verbose: print(f"Loop ended after {count} epochs") if verbose: print(f"Loop ended after {count} epochs")
self._set_parameters(best[1])
self._learn_loss = learn self._learn_loss = learn
self._valid_loss = valid self._valid_loss = valid
return (count, learn, valid) return (count, learn, valid)
@@ -104,12 +109,7 @@ class MLAlgorithm(ABC):
def test_r_squared(self) -> float: def test_r_squared(self) -> float:
if self._target_type != TargetType.Regression: if self._target_type != TargetType.Regression:
return 0 return 0
return r_squared(self._h0(self._testset.x), self._testset.y)
h0 = self._h0(self._testset.x)
y_mean = np.mean(self._testset.y)
ss_total = np.sum((self._testset.y - y_mean) ** 2)
ss_resid = np.sum((self._testset.y - h0) ** 2)
return 1 - (ss_resid / ss_total)
@abstractmethod @abstractmethod
def _h0(self, x:np.ndarray) -> np.ndarray: pass def _h0(self, x:np.ndarray) -> np.ndarray: pass

View File

@@ -4,7 +4,7 @@ import numpy as np
from abc import abstractmethod from abc import abstractmethod
from learning.ml import MLAlgorithm from learning.ml import MLAlgorithm
from learning.data import Dataset, Data from learning.data import Dataset, Data
NOT_ZERO = 1e-15 from learning.functions import cross_entropy_loss, log_loss, lrelu, lrelu_derivative, softmax, softmax_derivative, square_loss, with_bias
class GradientDescent(MLAlgorithm): class GradientDescent(MLAlgorithm):
theta:np.ndarray theta:np.ndarray
@@ -22,12 +22,12 @@ class GradientDescent(MLAlgorithm):
regularization = (self.lambd / m) * self.theta regularization = (self.lambd / m) * self.theta
regularization[0] = 0 regularization[0] = 0
derivative = self.alpha * (1/m) * np.sum((self._h0(x) - y) * self.with_bias(x).T, axis=1) derivative = self.alpha * np.mean((self._h0(x) - y) * with_bias(x).T, axis=1)
self.theta -= derivative + regularization self.theta -= derivative + regularization
return self._loss(x, y, m) return self._loss(x, y)
def _predict_loss(self, dataset:Data) -> float: def _predict_loss(self, dataset:Data) -> float:
return self._loss(dataset.x, dataset.y, dataset.size) return self._loss(dataset.x, dataset.y)
def _get_parameters(self): def _get_parameters(self):
return self.theta.copy() return self.theta.copy()
@@ -36,31 +36,30 @@ class GradientDescent(MLAlgorithm):
self.theta = parameters self.theta = parameters
@abstractmethod @abstractmethod
def _loss(self, x:np.ndarray, y:np.ndarray, m:int) -> float: pass def _loss(self, x:np.ndarray, y:np.ndarray) -> float: pass
class LinearRegression(GradientDescent): class LinearRegression(GradientDescent):
def _h0(self, x: np.ndarray) -> np.ndarray: def _h0(self, x: np.ndarray) -> np.ndarray:
return self.theta.dot(self.with_bias(x).T) return self.theta.dot(with_bias(x).T)
def _loss(self, x:np.ndarray, y:np.ndarray, m:int) -> float: def _loss(self, x:np.ndarray, y:np.ndarray) -> float:
diff = (self._h0(x) - y) return square_loss(self._h0(x), y)
return 1/(2*m) * np.sum(diff ** 2)
class LogisticRegression(GradientDescent): class LogisticRegression(GradientDescent):
def _h0(self, x: np.ndarray) -> np.ndarray: def _h0(self, x: np.ndarray) -> np.ndarray:
return 1 / (1 + np.exp(-self.theta.dot(self.with_bias(x).T))) return 1 / (1 + np.exp(-self.theta.dot(with_bias(x).T)))
def _loss(self, x:np.ndarray, y:np.ndarray, m:int) -> float: def _loss(self, x:np.ndarray, y:np.ndarray) -> float:
h0 = self._h0(x) return log_loss(self._h0(x), y)
diff = - y*np.log(h0 + NOT_ZERO) - (1-y)*np.log(1-h0 + NOT_ZERO)
return 1/m * np.sum(diff)
class MultiLayerPerceptron(MLAlgorithm): class MultiLayerPerceptron(MLAlgorithm):
layers: list[np.ndarray] layers: list[np.ndarray]
activations: list[np.ndarray] activations: list[np.ndarray]
previous_delta: list[np.ndarray]
momentum: float
learning_rate: float
def __init__(self, dataset:Dataset, layers:list[int], learning_rate:float=0.1) -> None: def __init__(self, dataset:Dataset, layers:list[int], learning_rate:float=0.1, momentum:float=0.9) -> None:
super().__init__(dataset) super().__init__(dataset)
input = self._learnset.x.shape[1] input = self._learnset.x.shape[1]
output = self._learnset.y.shape[1] output = self._learnset.y.shape[1]
@@ -71,54 +70,52 @@ class MultiLayerPerceptron(MLAlgorithm):
self.layers = [] self.layers = []
self.activations = [] self.activations = []
self.previous_delta = []
self.momentum = momentum
self.learning_rate = learning_rate self.learning_rate = learning_rate
for next in layers: for next in layers:
current = np.random.rand(input + 1, next) * np.sqrt(2 / input) # +1 bias, sqrt is He init current = np.random.rand(input + 1, next) * np.sqrt(2 / input) # +1 bias, sqrt is He init
self.layers.append(current) self.layers.append(current)
self.previous_delta.append(np.zeros(current.shape))
input = next input = next
def _h0(self, x:np.ndarray) -> np.ndarray: def _h0(self, x:np.ndarray) -> np.ndarray:
self.activations = [x] self.activations = [x]
for layer in self.layers: for layer in self.layers:
x = self.with_bias(x) x = lrelu(with_bias(x).dot(layer))
x = x.dot(layer)
x = x * (x > 0) # activation function ReLU
self.activations.append(x) # saving activation result self.activations.append(x) # saving activation result
return self.softmax(x) return softmax(x)
def _learning_step(self) -> float: def _learning_step(self) -> float:
x, y, m, _ = self._learnset.as_tuple() x, y, m, _ = self._learnset.as_tuple()
delta = self._h0(x) - y # first term is derivative of softmax delta = softmax_derivative(self._h0(x), y)
for l in reversed(range(len(self.layers))): for l in reversed(range(len(self.layers))):
activation = self.activations[l] activation = self.activations[l]
deltaW = np.dot(self.with_bias(activation).T, delta) / m deltaW = np.dot(with_bias(activation).T, delta) / m
deltaW *= self.learning_rate
deltaW += self.momentum * self.previous_delta[l]
if l > 0: delta = np.dot(delta, self.layers[l][:-1].T) # ignoring bias
delta = np.dot(delta, self.layers[l][:-1].T) # ignoring bias delta *= lrelu_derivative(activation)
delta[activation <= 0] = 0 # derivative ReLU
self.layers[l] -= deltaW * self.learning_rate self.layers[l] -= deltaW
self.previous_delta[l] = deltaW
return self._predict_loss(self._learnset) return self._predict_loss(self._learnset)
def softmax(self, input:np.ndarray) -> np.ndarray: def _predict_loss(self, dataset:Data) -> float:
input = input - np.max(input, axis=1, keepdims=True) # for overflow return cross_entropy_loss(self._h0(dataset.x), dataset.y)
exp_input = np.exp(input)
total_sum = np.sum(exp_input, axis=1, keepdims=True)
return exp_input / total_sum
def _predict_loss(self, dataset:Data) -> float: # cross-entropy
diff = dataset.y * np.log(self._h0(dataset.x) + NOT_ZERO)
return -np.mean(np.sum(diff, axis=1))
def _get_parameters(self): def _get_parameters(self):
parameters = [] parameters = { 'layers': [], 'previous_delta': [] }
for x in self.layers: for x in range(len(self.layers)):
parameters.append(x.copy()) parameters['layers'].append(self.layers[x].copy())
parameters['previous_delta'].append(self.previous_delta[x].copy())
return parameters return parameters
def _set_parameters(self, parameters): def _set_parameters(self, parameters):
self.layers = parameters self.layers = parameters['layers']
self.previous_delta = parameters['previous_delta']

View File

@@ -1,10 +1,8 @@
import math as math import math as math
import numpy as np import numpy as np
from abc import abstractmethod
from learning.ml import MLAlgorithm from learning.ml import MLAlgorithm
from learning.data import Dataset, Data from learning.data import Dataset, Data
NOT_ZERO = 1e-15
class KMeans(MLAlgorithm): class KMeans(MLAlgorithm):
def __init__(self, dataset: Dataset, clusters:int) -> None: def __init__(self, dataset: Dataset, clusters:int) -> None: