Fixes for Presentation
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1 +1,2 @@
|
|||||||
__pycache__
|
__pycache__
|
||||||
|
.venv
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
matplotlib
|
matplotlib
|
||||||
pandas
|
pandas
|
||||||
tqdm
|
tqdm
|
||||||
|
scikit-learn
|
||||||
|
|||||||
12
src/app.py
12
src/app.py
@@ -25,7 +25,7 @@ def auto_mpg() -> tuple[Dataset, MLAlgorithm, Any]:
|
|||||||
ds.numbers(["HP"])
|
ds.numbers(["HP"])
|
||||||
ds.handle_na()
|
ds.handle_na()
|
||||||
ds.normalize(excepts=["Cylinders","Year","Origin"])
|
ds.normalize(excepts=["Cylinders","Year","Origin"])
|
||||||
return (ds, LinearRegression(ds, learning_rate=0.0001), sklearn.linear_model.LinearRegression())
|
return (ds, LinearRegression(ds, learning_rate=0.0001), sklearn.linear_model.SGDRegressor())
|
||||||
|
|
||||||
def automobile() -> tuple[Dataset, MLAlgorithm, Any]:
|
def automobile() -> tuple[Dataset, MLAlgorithm, Any]:
|
||||||
ds = Dataset(REGRESSION + "automobile.csv", "symboling", TargetType.Regression)
|
ds = Dataset(REGRESSION + "automobile.csv", "symboling", TargetType.Regression)
|
||||||
@@ -35,12 +35,12 @@ def automobile() -> tuple[Dataset, MLAlgorithm, Any]:
|
|||||||
ds.numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
|
ds.numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
|
||||||
ds.handle_na()
|
ds.handle_na()
|
||||||
ds.normalize(excepts=attributes_to_modify)
|
ds.normalize(excepts=attributes_to_modify)
|
||||||
return (ds, LinearRegression(ds, learning_rate=0.004), sklearn.linear_model.LinearRegression())
|
return (ds, LinearRegression(ds, learning_rate=0.003), sklearn.linear_model.SGDRegressor())
|
||||||
|
|
||||||
def power_plant() -> tuple[Dataset, MLAlgorithm, Any]:
|
def power_plant() -> tuple[Dataset, MLAlgorithm, Any]:
|
||||||
ds = Dataset(REGRESSION + "power-plant.csv", "energy-output", TargetType.Regression)
|
ds = Dataset(REGRESSION + "power-plant.csv", "energy-output", TargetType.Regression)
|
||||||
ds.normalize()
|
ds.normalize(excepts=None)
|
||||||
return (ds, LinearRegression(ds, learning_rate=0.1), sklearn.linear_model.LinearRegression())
|
return (ds, LinearRegression(ds, learning_rate=0.1), sklearn.linear_model.SGDRegressor())
|
||||||
|
|
||||||
# ********************
|
# ********************
|
||||||
# Logistic Regression
|
# Logistic Regression
|
||||||
@@ -101,7 +101,7 @@ def iris_no_target() -> tuple[Dataset, MLAlgorithm, Any]:
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
np.set_printoptions(linewidth=np.inf, formatter={'float': '{:>10.5f}'.format})
|
np.set_printoptions(linewidth=np.inf, formatter={'float': '{:>10.5f}'.format})
|
||||||
rand = np.random.randint(0, 4294967295)
|
rand = np.random.randint(0, 4294967295)
|
||||||
#rand = 1997847910 # LiR for power_plant
|
#rand = 2205910060 # LiR for power_plant
|
||||||
#rand = 347617386 # LoR for electrical_grid
|
#rand = 347617386 # LoR for electrical_grid
|
||||||
#rand = 1793295160 # MLP for iris
|
#rand = 1793295160 # MLP for iris
|
||||||
#rand = 2914000170 # MLP for frogs
|
#rand = 2914000170 # MLP for frogs
|
||||||
@@ -110,7 +110,7 @@ if __name__ == "__main__":
|
|||||||
np.random.seed(rand)
|
np.random.seed(rand)
|
||||||
print(f"Using seed: {rand}")
|
print(f"Using seed: {rand}")
|
||||||
|
|
||||||
ds, ml, sk = frogs()
|
ds, ml, sk = power_plant()
|
||||||
|
|
||||||
epochs, _, _ = ml.learn(1000, verbose=True)
|
epochs, _, _ = ml.learn(1000, verbose=True)
|
||||||
ml.display_results()
|
ml.display_results()
|
||||||
|
|||||||
@@ -52,10 +52,8 @@ class Dataset:
|
|||||||
|
|
||||||
for col in self.data:
|
for col in self.data:
|
||||||
if col not in excepts:
|
if col not in excepts:
|
||||||
index = self.data.columns.get_loc(col)
|
datacol = self.data[col]
|
||||||
datacol = self.data.pop(col)
|
self.data[col] = (datacol - datacol.mean()) / datacol.std()
|
||||||
datacol = (datacol - datacol.mean()) / datacol.std()
|
|
||||||
self.data.insert(index, col, datacol)
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def factorize(self, columns:list[str]=[]) -> Self:
|
def factorize(self, columns:list[str]=[]) -> Self:
|
||||||
|
|||||||
@@ -44,6 +44,13 @@ def cross_entropy_loss(h0:np.ndarray, y:np.ndarray) -> float:
|
|||||||
# Randoms
|
# Randoms
|
||||||
# **********
|
# **********
|
||||||
|
|
||||||
|
def pearson(h0:np.ndarray, y:np.ndarray) -> float:
|
||||||
|
diff1 = h0 - h0.mean()
|
||||||
|
diff2 = y - y.mean()
|
||||||
|
num = np.sum(diff1 * diff2)
|
||||||
|
den = np.sqrt(np.sum(diff1**2)) * np.sqrt(np.sum(diff2**2))
|
||||||
|
return num / den
|
||||||
|
|
||||||
def r_squared(h0:np.ndarray, y:np.ndarray) -> float:
|
def r_squared(h0:np.ndarray, y:np.ndarray) -> float:
|
||||||
y_mean = np.mean(y)
|
y_mean = np.mean(y)
|
||||||
ss_resid = np.sum((y - h0) ** 2)
|
ss_resid = np.sum((y - h0) ** 2)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from abc import ABC, abstractmethod
|
|||||||
from plot import Plot
|
from plot import Plot
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from learning.data import ConfusionMatrix, Dataset, Data, TargetType
|
from learning.data import ConfusionMatrix, Dataset, Data, TargetType
|
||||||
from learning.functions import r_squared
|
from learning.functions import pearson, r_squared
|
||||||
|
|
||||||
class MLAlgorithm(ABC):
|
class MLAlgorithm(ABC):
|
||||||
""" Classe generica per gli algoritmi di Machine Learning """
|
""" Classe generica per gli algoritmi di Machine Learning """
|
||||||
@@ -83,6 +83,7 @@ class MLAlgorithm(ABC):
|
|||||||
print(f"Loss valid : {self.validation_loss():0.5f}")
|
print(f"Loss valid : {self.validation_loss():0.5f}")
|
||||||
print(f"Loss test : {self.test_loss():0.5f}")
|
print(f"Loss test : {self.test_loss():0.5f}")
|
||||||
if self._target_type == TargetType.Regression:
|
if self._target_type == TargetType.Regression:
|
||||||
|
print(f"Pearson : {self.test_pearson():0.5f}")
|
||||||
print(f"R^2 : {self.test_r_squared():0.5f}")
|
print(f"R^2 : {self.test_r_squared():0.5f}")
|
||||||
elif self._target_type != TargetType.NoTarget:
|
elif self._target_type != TargetType.NoTarget:
|
||||||
conf = self.test_confusion_matrix()
|
conf = self.test_confusion_matrix()
|
||||||
@@ -106,6 +107,11 @@ class MLAlgorithm(ABC):
|
|||||||
y = np.argmax(y, axis=1)
|
y = np.argmax(y, axis=1)
|
||||||
return ConfusionMatrix(y, h0)
|
return ConfusionMatrix(y, h0)
|
||||||
|
|
||||||
|
def test_pearson(self) -> float:
|
||||||
|
if self._target_type != TargetType.Regression:
|
||||||
|
return 0
|
||||||
|
return pearson(self._h0(self._testset.x), self._testset.y)
|
||||||
|
|
||||||
def test_r_squared(self) -> float:
|
def test_r_squared(self) -> float:
|
||||||
if self._target_type != TargetType.Regression:
|
if self._target_type != TargetType.Regression:
|
||||||
return 0
|
return 0
|
||||||
|
|||||||
@@ -19,11 +19,13 @@ class GradientDescent(MLAlgorithm):
|
|||||||
|
|
||||||
def _learning_step(self) -> float:
|
def _learning_step(self) -> float:
|
||||||
x, y, m, _ = self._learnset.as_tuple()
|
x, y, m, _ = self._learnset.as_tuple()
|
||||||
|
h0 = self._h0(x)
|
||||||
|
|
||||||
regularization = (self.lambd / m) * self.theta
|
regularization = (self.lambd / m) * self.theta
|
||||||
regularization[0] = 0
|
regularization[0] = 0
|
||||||
derivative = self.alpha * np.mean((self._h0(x) - y) * with_bias(x).T, axis=1)
|
|
||||||
self.theta -= derivative + regularization
|
derivative = np.mean((h0 - y) * with_bias(x).T, axis=1)
|
||||||
|
self.theta -= self.alpha * derivative + regularization
|
||||||
return self._loss(x, y)
|
return self._loss(x, y)
|
||||||
|
|
||||||
def _predict_loss(self, dataset:Data) -> float:
|
def _predict_loss(self, dataset:Data) -> float:
|
||||||
|
|||||||
Reference in New Issue
Block a user