This commit is contained in:
2024-04-20 21:21:45 +02:00
parent 18e390d34b
commit f525cdf280
4 changed files with 46 additions and 28 deletions

View File

@@ -1,9 +1,8 @@
from learning.data import Dataset from learning.data import Dataset
from learning.supervised import LinearRegression from learning.supervised import LinearRegression
from learning.ml import MLAlgorithm from learning.ml import MLRegression
from plot import Plot
def auto_mpg() -> MLAlgorithm: def auto_mpg() -> MLRegression:
df = Dataset("datasets\\auto-mpg.csv", "MPG") df = Dataset("datasets\\auto-mpg.csv", "MPG")
df.to_numbers(["HP"]) df.to_numbers(["HP"])
@@ -12,29 +11,22 @@ def auto_mpg() -> MLAlgorithm:
return LinearRegression(df, learning_rate=0.0001) return LinearRegression(df, learning_rate=0.0001)
def automobile() -> MLAlgorithm: def automobile() -> MLRegression:
df = Dataset("datasets\\regression\\automobile.csv", "symboling") df = Dataset("datasets\\regression\\automobile.csv", "symboling")
attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"] attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"]
df.factorize(attributes_to_modify) df.factorize(attributes_to_modify)
df.to_numbers() df.to_numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
df.handle_na() df.handle_na()
df.regularize(excepts=attributes_to_modify) df.regularize(excepts=attributes_to_modify)
return LinearRegression(df, learning_rate=0.001) return LinearRegression(df, learning_rate=0.001)
epoch = 15000
ml = automobile()
epoch = 50000 ml.learn(epoch)
skip = 1000 ml.plot()
lr = automobile()
train, test = lr.learn(epoch)
plot = Plot("Error", "Time", "Mean Error")
plot.line("training", "red", data=train[skip:])
plot.line("test", "blue", data=test[skip:])
""" """
for _ in range(0, epoch): for _ in range(0, epoch):
@@ -45,7 +37,3 @@ for _ in range(0, epoch):
plot.update("test", test_err) plot.update("test", test_err)
plot.update_limits() plot.update_limits()
""" """
plot.wait()

View File

@@ -17,24 +17,25 @@ class Dataset:
self.target = target self.target = target
self.classification = (data[target].dtype == object) self.classification = (data[target].dtype == object)
def regularize(self, excepts:list=[]) -> Self: def regularize(self, excepts:list[str]=[]) -> Self:
excepts.append(self.target) excepts.append(self.target)
excepts.append("Bias") excepts.append("Bias")
for col in self.data: for col in self.data:
if col not in excepts: if col not in excepts:
dt = self.data[col] datacol = self.data[col]
self.data[col] = (dt - dt.mean()) / dt.std() datacol = (datacol - datacol.mean()) / datacol.std()
self.data[col] = datacol
return self return self
def factorize(self, columns:list=[]) -> Self: def factorize(self, columns:list[str]=[]) -> Self:
data = self.data data = self.data
for col in columns: for col in columns:
data[col] = pd.factorize(data[col])[0] data[col] = pd.factorize(data[col])[0]
return self return self
def to_numbers(self, columns:list=[]) -> Self: def to_numbers(self, columns:list[str]=[]) -> Self:
data = self.data data = self.data
for col in self.data.columns: for col in columns:
if data[col].dtype == object: if data[col].dtype == object:
data[col] = pd.to_numeric(data[col], errors='coerce') data[col] = pd.to_numeric(data[col], errors='coerce')
return self return self
@@ -64,3 +65,13 @@ class PrincipalComponentAnalisys:
if threshold <= 0 or threshold > 1: if threshold <= 0 or threshold > 1:
threshold = 1 threshold = 1
if __name__ == "__main__":
df = Dataset("datasets\\regression\\automobile.csv", "symboling")
attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"]
df.factorize(attributes_to_modify)
df.to_numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
df.handle_na()
df.regularize(excepts=attributes_to_modify)
print(df.data.dtypes)

View File

@@ -1,14 +1,18 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from learning.data import Dataset from learning.data import Dataset
from plot import Plot
import numpy as np import numpy as np
class MLAlgorithm(ABC): class MLAlgorithm(ABC):
""" Classe generica per gli algoritmi di Machine Learning """
dataset: Dataset dataset: Dataset
testset: np.ndarray testset: np.ndarray
learnset: np.ndarray learnset: np.ndarray
test_error: list[float]
train_error: list[float]
def _set_dataset(self, dataset:Dataset, split:float=0.2): def _set_dataset(self, dataset:Dataset, split:float=0.2):
ndarray = dataset.shuffle().as_ndarray() ndarray = dataset.shuffle().as_ndarray()
@@ -30,6 +34,9 @@ class MLAlgorithm(ABC):
for _ in range(0, max(1, times)): for _ in range(0, max(1, times)):
train.append(self.learning_step()) train.append(self.learning_step())
test.append(self.test_error()) test.append(self.test_error())
self.train_error = train
self.test_error = test
return (train, test) return (train, test)
@abstractmethod @abstractmethod
@@ -39,3 +46,15 @@ class MLAlgorithm(ABC):
@abstractmethod @abstractmethod
def test_error(self) -> float: def test_error(self) -> float:
pass pass
@abstractmethod
def plot(self, skip:int=1000) -> None:
pass
class MLRegression(MLAlgorithm):
def plot(self, skip:int=1000) -> None:
plot = Plot("Error", "Time", "Mean Error")
plot.line("training", "red", data=self.train_error[skip:])
plot.line("test", "blue", data=self.test_error[skip:])
plot.wait()

View File

@@ -1,10 +1,10 @@
import math as math import math as math
import numpy as np import numpy as np
from ml import MLAlgorithm from learning.ml import MLRegression
from learning.data import Dataset from learning.data import Dataset
class LinearRegression(MLAlgorithm): class LinearRegression(MLRegression):
def __init__(self, dataset:Dataset, learning_rate:float=0.1) -> None: def __init__(self, dataset:Dataset, learning_rate:float=0.1) -> None:
self._set_dataset(dataset) self._set_dataset(dataset)