Fixes for Presentation
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1 +1,2 @@
|
||||
__pycache__
|
||||
__pycache__
|
||||
.venv
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
matplotlib
|
||||
pandas
|
||||
tqdm
|
||||
tqdm
|
||||
scikit-learn
|
||||
|
||||
12
src/app.py
12
src/app.py
@@ -25,7 +25,7 @@ def auto_mpg() -> tuple[Dataset, MLAlgorithm, Any]:
|
||||
ds.numbers(["HP"])
|
||||
ds.handle_na()
|
||||
ds.normalize(excepts=["Cylinders","Year","Origin"])
|
||||
return (ds, LinearRegression(ds, learning_rate=0.0001), sklearn.linear_model.LinearRegression())
|
||||
return (ds, LinearRegression(ds, learning_rate=0.0001), sklearn.linear_model.SGDRegressor())
|
||||
|
||||
def automobile() -> tuple[Dataset, MLAlgorithm, Any]:
|
||||
ds = Dataset(REGRESSION + "automobile.csv", "symboling", TargetType.Regression)
|
||||
@@ -35,12 +35,12 @@ def automobile() -> tuple[Dataset, MLAlgorithm, Any]:
|
||||
ds.numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
|
||||
ds.handle_na()
|
||||
ds.normalize(excepts=attributes_to_modify)
|
||||
return (ds, LinearRegression(ds, learning_rate=0.004), sklearn.linear_model.LinearRegression())
|
||||
return (ds, LinearRegression(ds, learning_rate=0.003), sklearn.linear_model.SGDRegressor())
|
||||
|
||||
def power_plant() -> tuple[Dataset, MLAlgorithm, Any]:
|
||||
ds = Dataset(REGRESSION + "power-plant.csv", "energy-output", TargetType.Regression)
|
||||
ds.normalize()
|
||||
return (ds, LinearRegression(ds, learning_rate=0.1), sklearn.linear_model.LinearRegression())
|
||||
ds.normalize(excepts=None)
|
||||
return (ds, LinearRegression(ds, learning_rate=0.1), sklearn.linear_model.SGDRegressor())
|
||||
|
||||
# ********************
|
||||
# Logistic Regression
|
||||
@@ -101,7 +101,7 @@ def iris_no_target() -> tuple[Dataset, MLAlgorithm, Any]:
|
||||
if __name__ == "__main__":
|
||||
np.set_printoptions(linewidth=np.inf, formatter={'float': '{:>10.5f}'.format})
|
||||
rand = np.random.randint(0, 4294967295)
|
||||
#rand = 1997847910 # LiR for power_plant
|
||||
#rand = 2205910060 # LiR for power_plant
|
||||
#rand = 347617386 # LoR for electrical_grid
|
||||
#rand = 1793295160 # MLP for iris
|
||||
#rand = 2914000170 # MLP for frogs
|
||||
@@ -110,7 +110,7 @@ if __name__ == "__main__":
|
||||
np.random.seed(rand)
|
||||
print(f"Using seed: {rand}")
|
||||
|
||||
ds, ml, sk = frogs()
|
||||
ds, ml, sk = power_plant()
|
||||
|
||||
epochs, _, _ = ml.learn(1000, verbose=True)
|
||||
ml.display_results()
|
||||
|
||||
@@ -52,10 +52,8 @@ class Dataset:
|
||||
|
||||
for col in self.data:
|
||||
if col not in excepts:
|
||||
index = self.data.columns.get_loc(col)
|
||||
datacol = self.data.pop(col)
|
||||
datacol = (datacol - datacol.mean()) / datacol.std()
|
||||
self.data.insert(index, col, datacol)
|
||||
datacol = self.data[col]
|
||||
self.data[col] = (datacol - datacol.mean()) / datacol.std()
|
||||
return self
|
||||
|
||||
def factorize(self, columns:list[str]=[]) -> Self:
|
||||
|
||||
@@ -44,6 +44,13 @@ def cross_entropy_loss(h0:np.ndarray, y:np.ndarray) -> float:
|
||||
# Randoms
|
||||
# **********
|
||||
|
||||
def pearson(h0:np.ndarray, y:np.ndarray) -> float:
|
||||
diff1 = h0 - h0.mean()
|
||||
diff2 = y - y.mean()
|
||||
num = np.sum(diff1 * diff2)
|
||||
den = np.sqrt(np.sum(diff1**2)) * np.sqrt(np.sum(diff2**2))
|
||||
return num / den
|
||||
|
||||
def r_squared(h0:np.ndarray, y:np.ndarray) -> float:
|
||||
y_mean = np.mean(y)
|
||||
ss_resid = np.sum((y - h0) ** 2)
|
||||
|
||||
@@ -5,7 +5,7 @@ from abc import ABC, abstractmethod
|
||||
from plot import Plot
|
||||
from tqdm import tqdm
|
||||
from learning.data import ConfusionMatrix, Dataset, Data, TargetType
|
||||
from learning.functions import r_squared
|
||||
from learning.functions import pearson, r_squared
|
||||
|
||||
class MLAlgorithm(ABC):
|
||||
""" Classe generica per gli algoritmi di Machine Learning """
|
||||
@@ -83,6 +83,7 @@ class MLAlgorithm(ABC):
|
||||
print(f"Loss valid : {self.validation_loss():0.5f}")
|
||||
print(f"Loss test : {self.test_loss():0.5f}")
|
||||
if self._target_type == TargetType.Regression:
|
||||
print(f"Pearson : {self.test_pearson():0.5f}")
|
||||
print(f"R^2 : {self.test_r_squared():0.5f}")
|
||||
elif self._target_type != TargetType.NoTarget:
|
||||
conf = self.test_confusion_matrix()
|
||||
@@ -106,6 +107,11 @@ class MLAlgorithm(ABC):
|
||||
y = np.argmax(y, axis=1)
|
||||
return ConfusionMatrix(y, h0)
|
||||
|
||||
def test_pearson(self) -> float:
|
||||
if self._target_type != TargetType.Regression:
|
||||
return 0
|
||||
return pearson(self._h0(self._testset.x), self._testset.y)
|
||||
|
||||
def test_r_squared(self) -> float:
|
||||
if self._target_type != TargetType.Regression:
|
||||
return 0
|
||||
|
||||
@@ -19,11 +19,13 @@ class GradientDescent(MLAlgorithm):
|
||||
|
||||
def _learning_step(self) -> float:
|
||||
x, y, m, _ = self._learnset.as_tuple()
|
||||
h0 = self._h0(x)
|
||||
|
||||
regularization = (self.lambd / m) * self.theta
|
||||
regularization[0] = 0
|
||||
derivative = self.alpha * np.mean((self._h0(x) - y) * with_bias(x).T, axis=1)
|
||||
self.theta -= derivative + regularization
|
||||
|
||||
derivative = np.mean((h0 - y) * with_bias(x).T, axis=1)
|
||||
self.theta -= self.alpha * derivative + regularization
|
||||
return self._loss(x, y)
|
||||
|
||||
def _predict_loss(self, dataset:Data) -> float:
|
||||
|
||||
Reference in New Issue
Block a user