Fixes for Presentation

This commit is contained in:
2024-08-19 20:58:24 +02:00
parent 142fe5ccdf
commit 8b1c149535
7 changed files with 30 additions and 15 deletions

1
.gitignore vendored
View File

@@ -1 +1,2 @@
__pycache__ __pycache__
.venv

View File

@@ -1,3 +1,4 @@
matplotlib matplotlib
pandas pandas
tqdm tqdm
scikit-learn

View File

@@ -25,7 +25,7 @@ def auto_mpg() -> tuple[Dataset, MLAlgorithm, Any]:
ds.numbers(["HP"]) ds.numbers(["HP"])
ds.handle_na() ds.handle_na()
ds.normalize(excepts=["Cylinders","Year","Origin"]) ds.normalize(excepts=["Cylinders","Year","Origin"])
return (ds, LinearRegression(ds, learning_rate=0.0001), sklearn.linear_model.LinearRegression()) return (ds, LinearRegression(ds, learning_rate=0.0001), sklearn.linear_model.SGDRegressor())
def automobile() -> tuple[Dataset, MLAlgorithm, Any]: def automobile() -> tuple[Dataset, MLAlgorithm, Any]:
ds = Dataset(REGRESSION + "automobile.csv", "symboling", TargetType.Regression) ds = Dataset(REGRESSION + "automobile.csv", "symboling", TargetType.Regression)
@@ -35,12 +35,12 @@ def automobile() -> tuple[Dataset, MLAlgorithm, Any]:
ds.numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"]) ds.numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
ds.handle_na() ds.handle_na()
ds.normalize(excepts=attributes_to_modify) ds.normalize(excepts=attributes_to_modify)
return (ds, LinearRegression(ds, learning_rate=0.004), sklearn.linear_model.LinearRegression()) return (ds, LinearRegression(ds, learning_rate=0.003), sklearn.linear_model.SGDRegressor())
def power_plant() -> tuple[Dataset, MLAlgorithm, Any]: def power_plant() -> tuple[Dataset, MLAlgorithm, Any]:
ds = Dataset(REGRESSION + "power-plant.csv", "energy-output", TargetType.Regression) ds = Dataset(REGRESSION + "power-plant.csv", "energy-output", TargetType.Regression)
ds.normalize() ds.normalize(excepts=None)
return (ds, LinearRegression(ds, learning_rate=0.1), sklearn.linear_model.LinearRegression()) return (ds, LinearRegression(ds, learning_rate=0.1), sklearn.linear_model.SGDRegressor())
# ******************** # ********************
# Logistic Regression # Logistic Regression
@@ -101,7 +101,7 @@ def iris_no_target() -> tuple[Dataset, MLAlgorithm, Any]:
if __name__ == "__main__": if __name__ == "__main__":
np.set_printoptions(linewidth=np.inf, formatter={'float': '{:>10.5f}'.format}) np.set_printoptions(linewidth=np.inf, formatter={'float': '{:>10.5f}'.format})
rand = np.random.randint(0, 4294967295) rand = np.random.randint(0, 4294967295)
#rand = 1997847910 # LiR for power_plant #rand = 2205910060 # LiR for power_plant
#rand = 347617386 # LoR for electrical_grid #rand = 347617386 # LoR for electrical_grid
#rand = 1793295160 # MLP for iris #rand = 1793295160 # MLP for iris
#rand = 2914000170 # MLP for frogs #rand = 2914000170 # MLP for frogs
@@ -110,7 +110,7 @@ if __name__ == "__main__":
np.random.seed(rand) np.random.seed(rand)
print(f"Using seed: {rand}") print(f"Using seed: {rand}")
ds, ml, sk = frogs() ds, ml, sk = power_plant()
epochs, _, _ = ml.learn(1000, verbose=True) epochs, _, _ = ml.learn(1000, verbose=True)
ml.display_results() ml.display_results()

View File

@@ -52,10 +52,8 @@ class Dataset:
for col in self.data: for col in self.data:
if col not in excepts: if col not in excepts:
index = self.data.columns.get_loc(col) datacol = self.data[col]
datacol = self.data.pop(col) self.data[col] = (datacol - datacol.mean()) / datacol.std()
datacol = (datacol - datacol.mean()) / datacol.std()
self.data.insert(index, col, datacol)
return self return self
def factorize(self, columns:list[str]=[]) -> Self: def factorize(self, columns:list[str]=[]) -> Self:

View File

@@ -44,6 +44,13 @@ def cross_entropy_loss(h0:np.ndarray, y:np.ndarray) -> float:
# Randoms # Randoms
# ********** # **********
def pearson(h0:np.ndarray, y:np.ndarray) -> float:
diff1 = h0 - h0.mean()
diff2 = y - y.mean()
num = np.sum(diff1 * diff2)
den = np.sqrt(np.sum(diff1**2)) * np.sqrt(np.sum(diff2**2))
return num / den
def r_squared(h0:np.ndarray, y:np.ndarray) -> float: def r_squared(h0:np.ndarray, y:np.ndarray) -> float:
y_mean = np.mean(y) y_mean = np.mean(y)
ss_resid = np.sum((y - h0) ** 2) ss_resid = np.sum((y - h0) ** 2)

View File

@@ -5,7 +5,7 @@ from abc import ABC, abstractmethod
from plot import Plot from plot import Plot
from tqdm import tqdm from tqdm import tqdm
from learning.data import ConfusionMatrix, Dataset, Data, TargetType from learning.data import ConfusionMatrix, Dataset, Data, TargetType
from learning.functions import r_squared from learning.functions import pearson, r_squared
class MLAlgorithm(ABC): class MLAlgorithm(ABC):
""" Classe generica per gli algoritmi di Machine Learning """ """ Classe generica per gli algoritmi di Machine Learning """
@@ -83,6 +83,7 @@ class MLAlgorithm(ABC):
print(f"Loss valid : {self.validation_loss():0.5f}") print(f"Loss valid : {self.validation_loss():0.5f}")
print(f"Loss test : {self.test_loss():0.5f}") print(f"Loss test : {self.test_loss():0.5f}")
if self._target_type == TargetType.Regression: if self._target_type == TargetType.Regression:
print(f"Pearson : {self.test_pearson():0.5f}")
print(f"R^2 : {self.test_r_squared():0.5f}") print(f"R^2 : {self.test_r_squared():0.5f}")
elif self._target_type != TargetType.NoTarget: elif self._target_type != TargetType.NoTarget:
conf = self.test_confusion_matrix() conf = self.test_confusion_matrix()
@@ -106,6 +107,11 @@ class MLAlgorithm(ABC):
y = np.argmax(y, axis=1) y = np.argmax(y, axis=1)
return ConfusionMatrix(y, h0) return ConfusionMatrix(y, h0)
def test_pearson(self) -> float:
if self._target_type != TargetType.Regression:
return 0
return pearson(self._h0(self._testset.x), self._testset.y)
def test_r_squared(self) -> float: def test_r_squared(self) -> float:
if self._target_type != TargetType.Regression: if self._target_type != TargetType.Regression:
return 0 return 0

View File

@@ -19,11 +19,13 @@ class GradientDescent(MLAlgorithm):
def _learning_step(self) -> float: def _learning_step(self) -> float:
x, y, m, _ = self._learnset.as_tuple() x, y, m, _ = self._learnset.as_tuple()
h0 = self._h0(x)
regularization = (self.lambd / m) * self.theta regularization = (self.lambd / m) * self.theta
regularization[0] = 0 regularization[0] = 0
derivative = self.alpha * np.mean((self._h0(x) - y) * with_bias(x).T, axis=1)
self.theta -= derivative + regularization derivative = np.mean((h0 - y) * with_bias(x).T, axis=1)
self.theta -= self.alpha * derivative + regularization
return self._loss(x, y) return self._loss(x, y)
def _predict_loss(self, dataset:Data) -> float: def _predict_loss(self, dataset:Data) -> float: