diff --git a/src/app.py b/src/app.py index 3ce4475..598f3bc 100644 --- a/src/app.py +++ b/src/app.py @@ -1,9 +1,8 @@ from learning.data import Dataset from learning.supervised import LinearRegression -from learning.ml import MLAlgorithm -from plot import Plot +from learning.ml import MLRegression -def auto_mpg() -> MLAlgorithm: +def auto_mpg() -> MLRegression: df = Dataset("datasets\\auto-mpg.csv", "MPG") df.to_numbers(["HP"]) @@ -12,29 +11,22 @@ def auto_mpg() -> MLAlgorithm: return LinearRegression(df, learning_rate=0.0001) -def automobile() -> MLAlgorithm: +def automobile() -> MLRegression: df = Dataset("datasets\\regression\\automobile.csv", "symboling") attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"] df.factorize(attributes_to_modify) - df.to_numbers() + df.to_numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"]) df.handle_na() df.regularize(excepts=attributes_to_modify) return LinearRegression(df, learning_rate=0.001) - - -epoch = 50000 -skip = 1000 -lr = automobile() - -train, test = lr.learn(epoch) - -plot = Plot("Error", "Time", "Mean Error") -plot.line("training", "red", data=train[skip:]) -plot.line("test", "blue", data=test[skip:]) +epoch = 15000 +ml = automobile() +ml.learn(epoch) +ml.plot() """ for _ in range(0, epoch): @@ -45,7 +37,3 @@ for _ in range(0, epoch): plot.update("test", test_err) plot.update_limits() """ - -plot.wait() - - diff --git a/src/learning/data.py b/src/learning/data.py index 6b6afb9..944fbfa 100644 --- a/src/learning/data.py +++ b/src/learning/data.py @@ -17,24 +17,25 @@ class Dataset: self.target = target self.classification = (data[target].dtype == object) - def regularize(self, excepts:list=[]) -> Self: + def regularize(self, excepts:list[str]=[]) -> Self: excepts.append(self.target) excepts.append("Bias") for col in self.data: if col not in excepts: - dt = self.data[col] - self.data[col] = (dt - dt.mean()) / dt.std() + datacol = self.data[col] + datacol = (datacol - datacol.mean()) / datacol.std() + self.data[col] = datacol return self - def factorize(self, columns:list=[]) -> Self: + def factorize(self, columns:list[str]=[]) -> Self: data = self.data for col in columns: data[col] = pd.factorize(data[col])[0] return self - def to_numbers(self, columns:list=[]) -> Self: + def to_numbers(self, columns:list[str]=[]) -> Self: data = self.data - for col in self.data.columns: + for col in columns: if data[col].dtype == object: data[col] = pd.to_numeric(data[col], errors='coerce') return self @@ -64,3 +65,13 @@ class PrincipalComponentAnalisys: if threshold <= 0 or threshold > 1: threshold = 1 + + +if __name__ == "__main__": + df = Dataset("datasets\\regression\\automobile.csv", "symboling") + attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"] + df.factorize(attributes_to_modify) + df.to_numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"]) + df.handle_na() + df.regularize(excepts=attributes_to_modify) + print(df.data.dtypes) diff --git a/src/learning/ml.py b/src/learning/ml.py index f6f543c..38834d5 100644 --- a/src/learning/ml.py +++ b/src/learning/ml.py @@ -1,14 +1,18 @@ from abc import ABC, abstractmethod from learning.data import Dataset +from plot import Plot import numpy as np class MLAlgorithm(ABC): + """ Classe generica per gli algoritmi di Machine Learning """ dataset: Dataset testset: np.ndarray learnset: np.ndarray + test_error: list[float] + train_error: list[float] def _set_dataset(self, dataset:Dataset, split:float=0.2): ndarray = dataset.shuffle().as_ndarray() @@ -30,6 +34,9 @@ class MLAlgorithm(ABC): for _ in range(0, max(1, times)): train.append(self.learning_step()) test.append(self.test_error()) + + self.train_error = train + self.test_error = test return (train, test) @abstractmethod @@ -39,3 +46,15 @@ class MLAlgorithm(ABC): @abstractmethod def test_error(self) -> float: pass + + @abstractmethod + def plot(self, skip:int=1000) -> None: + pass + + +class MLRegression(MLAlgorithm): + def plot(self, skip:int=1000) -> None: + plot = Plot("Error", "Time", "Mean Error") + plot.line("training", "red", data=self.train_error[skip:]) + plot.line("test", "blue", data=self.test_error[skip:]) + plot.wait() diff --git a/src/learning/supervised.py b/src/learning/supervised.py index e451bce..e34c87f 100644 --- a/src/learning/supervised.py +++ b/src/learning/supervised.py @@ -1,10 +1,10 @@ import math as math import numpy as np -from ml import MLAlgorithm +from learning.ml import MLRegression from learning.data import Dataset -class LinearRegression(MLAlgorithm): +class LinearRegression(MLRegression): def __init__(self, dataset:Dataset, learning_rate:float=0.1) -> None: self._set_dataset(dataset)