Fixes for Presentation

This commit is contained in:
2024-08-19 20:58:24 +02:00
parent 142fe5ccdf
commit 8b1c149535
7 changed files with 30 additions and 15 deletions

3
.gitignore vendored
View File

@@ -1 +1,2 @@
__pycache__
__pycache__
.venv

View File

@@ -1,3 +1,4 @@
matplotlib
pandas
tqdm
tqdm
scikit-learn

View File

@@ -25,7 +25,7 @@ def auto_mpg() -> tuple[Dataset, MLAlgorithm, Any]:
ds.numbers(["HP"])
ds.handle_na()
ds.normalize(excepts=["Cylinders","Year","Origin"])
return (ds, LinearRegression(ds, learning_rate=0.0001), sklearn.linear_model.LinearRegression())
return (ds, LinearRegression(ds, learning_rate=0.0001), sklearn.linear_model.SGDRegressor())
def automobile() -> tuple[Dataset, MLAlgorithm, Any]:
ds = Dataset(REGRESSION + "automobile.csv", "symboling", TargetType.Regression)
@@ -35,12 +35,12 @@ def automobile() -> tuple[Dataset, MLAlgorithm, Any]:
ds.numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
ds.handle_na()
ds.normalize(excepts=attributes_to_modify)
return (ds, LinearRegression(ds, learning_rate=0.004), sklearn.linear_model.LinearRegression())
return (ds, LinearRegression(ds, learning_rate=0.003), sklearn.linear_model.SGDRegressor())
def power_plant() -> tuple[Dataset, MLAlgorithm, Any]:
ds = Dataset(REGRESSION + "power-plant.csv", "energy-output", TargetType.Regression)
ds.normalize()
return (ds, LinearRegression(ds, learning_rate=0.1), sklearn.linear_model.LinearRegression())
ds.normalize(excepts=None)
return (ds, LinearRegression(ds, learning_rate=0.1), sklearn.linear_model.SGDRegressor())
# ********************
# Logistic Regression
@@ -101,7 +101,7 @@ def iris_no_target() -> tuple[Dataset, MLAlgorithm, Any]:
if __name__ == "__main__":
np.set_printoptions(linewidth=np.inf, formatter={'float': '{:>10.5f}'.format})
rand = np.random.randint(0, 4294967295)
#rand = 1997847910 # LiR for power_plant
#rand = 2205910060 # LiR for power_plant
#rand = 347617386 # LoR for electrical_grid
#rand = 1793295160 # MLP for iris
#rand = 2914000170 # MLP for frogs
@@ -110,7 +110,7 @@ if __name__ == "__main__":
np.random.seed(rand)
print(f"Using seed: {rand}")
ds, ml, sk = frogs()
ds, ml, sk = power_plant()
epochs, _, _ = ml.learn(1000, verbose=True)
ml.display_results()

View File

@@ -52,10 +52,8 @@ class Dataset:
for col in self.data:
if col not in excepts:
index = self.data.columns.get_loc(col)
datacol = self.data.pop(col)
datacol = (datacol - datacol.mean()) / datacol.std()
self.data.insert(index, col, datacol)
datacol = self.data[col]
self.data[col] = (datacol - datacol.mean()) / datacol.std()
return self
def factorize(self, columns:list[str]=[]) -> Self:

View File

@@ -44,6 +44,13 @@ def cross_entropy_loss(h0:np.ndarray, y:np.ndarray) -> float:
# Randoms
# **********
def pearson(h0:np.ndarray, y:np.ndarray) -> float:
diff1 = h0 - h0.mean()
diff2 = y - y.mean()
num = np.sum(diff1 * diff2)
den = np.sqrt(np.sum(diff1**2)) * np.sqrt(np.sum(diff2**2))
return num / den
def r_squared(h0:np.ndarray, y:np.ndarray) -> float:
y_mean = np.mean(y)
ss_resid = np.sum((y - h0) ** 2)

View File

@@ -5,7 +5,7 @@ from abc import ABC, abstractmethod
from plot import Plot
from tqdm import tqdm
from learning.data import ConfusionMatrix, Dataset, Data, TargetType
from learning.functions import r_squared
from learning.functions import pearson, r_squared
class MLAlgorithm(ABC):
""" Classe generica per gli algoritmi di Machine Learning """
@@ -83,6 +83,7 @@ class MLAlgorithm(ABC):
print(f"Loss valid : {self.validation_loss():0.5f}")
print(f"Loss test : {self.test_loss():0.5f}")
if self._target_type == TargetType.Regression:
print(f"Pearson : {self.test_pearson():0.5f}")
print(f"R^2 : {self.test_r_squared():0.5f}")
elif self._target_type != TargetType.NoTarget:
conf = self.test_confusion_matrix()
@@ -106,6 +107,11 @@ class MLAlgorithm(ABC):
y = np.argmax(y, axis=1)
return ConfusionMatrix(y, h0)
def test_pearson(self) -> float:
if self._target_type != TargetType.Regression:
return 0
return pearson(self._h0(self._testset.x), self._testset.y)
def test_r_squared(self) -> float:
if self._target_type != TargetType.Regression:
return 0

View File

@@ -19,11 +19,13 @@ class GradientDescent(MLAlgorithm):
def _learning_step(self) -> float:
x, y, m, _ = self._learnset.as_tuple()
h0 = self._h0(x)
regularization = (self.lambd / m) * self.theta
regularization[0] = 0
derivative = self.alpha * np.mean((self._h0(x) - y) * with_bias(x).T, axis=1)
self.theta -= derivative + regularization
derivative = np.mean((h0 - y) * with_bias(x).T, axis=1)
self.theta -= self.alpha * derivative + regularization
return self._loss(x, y)
def _predict_loss(self, dataset:Data) -> float: