Fix bugs
This commit is contained in:
28
src/app.py
28
src/app.py
@@ -1,9 +1,8 @@
|
|||||||
from learning.data import Dataset
|
from learning.data import Dataset
|
||||||
from learning.supervised import LinearRegression
|
from learning.supervised import LinearRegression
|
||||||
from learning.ml import MLAlgorithm
|
from learning.ml import MLRegression
|
||||||
from plot import Plot
|
|
||||||
|
|
||||||
def auto_mpg() -> MLAlgorithm:
|
def auto_mpg() -> MLRegression:
|
||||||
df = Dataset("datasets\\auto-mpg.csv", "MPG")
|
df = Dataset("datasets\\auto-mpg.csv", "MPG")
|
||||||
|
|
||||||
df.to_numbers(["HP"])
|
df.to_numbers(["HP"])
|
||||||
@@ -12,29 +11,22 @@ def auto_mpg() -> MLAlgorithm:
|
|||||||
|
|
||||||
return LinearRegression(df, learning_rate=0.0001)
|
return LinearRegression(df, learning_rate=0.0001)
|
||||||
|
|
||||||
def automobile() -> MLAlgorithm:
|
def automobile() -> MLRegression:
|
||||||
df = Dataset("datasets\\regression\\automobile.csv", "symboling")
|
df = Dataset("datasets\\regression\\automobile.csv", "symboling")
|
||||||
|
|
||||||
attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"]
|
attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"]
|
||||||
df.factorize(attributes_to_modify)
|
df.factorize(attributes_to_modify)
|
||||||
df.to_numbers()
|
df.to_numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
|
||||||
df.handle_na()
|
df.handle_na()
|
||||||
df.regularize(excepts=attributes_to_modify)
|
df.regularize(excepts=attributes_to_modify)
|
||||||
|
|
||||||
return LinearRegression(df, learning_rate=0.001)
|
return LinearRegression(df, learning_rate=0.001)
|
||||||
|
|
||||||
|
|
||||||
|
epoch = 15000
|
||||||
|
ml = automobile()
|
||||||
epoch = 50000
|
ml.learn(epoch)
|
||||||
skip = 1000
|
ml.plot()
|
||||||
lr = automobile()
|
|
||||||
|
|
||||||
train, test = lr.learn(epoch)
|
|
||||||
|
|
||||||
plot = Plot("Error", "Time", "Mean Error")
|
|
||||||
plot.line("training", "red", data=train[skip:])
|
|
||||||
plot.line("test", "blue", data=test[skip:])
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
for _ in range(0, epoch):
|
for _ in range(0, epoch):
|
||||||
@@ -45,7 +37,3 @@ for _ in range(0, epoch):
|
|||||||
plot.update("test", test_err)
|
plot.update("test", test_err)
|
||||||
plot.update_limits()
|
plot.update_limits()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
plot.wait()
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -17,24 +17,25 @@ class Dataset:
|
|||||||
self.target = target
|
self.target = target
|
||||||
self.classification = (data[target].dtype == object)
|
self.classification = (data[target].dtype == object)
|
||||||
|
|
||||||
def regularize(self, excepts:list=[]) -> Self:
|
def regularize(self, excepts:list[str]=[]) -> Self:
|
||||||
excepts.append(self.target)
|
excepts.append(self.target)
|
||||||
excepts.append("Bias")
|
excepts.append("Bias")
|
||||||
for col in self.data:
|
for col in self.data:
|
||||||
if col not in excepts:
|
if col not in excepts:
|
||||||
dt = self.data[col]
|
datacol = self.data[col]
|
||||||
self.data[col] = (dt - dt.mean()) / dt.std()
|
datacol = (datacol - datacol.mean()) / datacol.std()
|
||||||
|
self.data[col] = datacol
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def factorize(self, columns:list=[]) -> Self:
|
def factorize(self, columns:list[str]=[]) -> Self:
|
||||||
data = self.data
|
data = self.data
|
||||||
for col in columns:
|
for col in columns:
|
||||||
data[col] = pd.factorize(data[col])[0]
|
data[col] = pd.factorize(data[col])[0]
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def to_numbers(self, columns:list=[]) -> Self:
|
def to_numbers(self, columns:list[str]=[]) -> Self:
|
||||||
data = self.data
|
data = self.data
|
||||||
for col in self.data.columns:
|
for col in columns:
|
||||||
if data[col].dtype == object:
|
if data[col].dtype == object:
|
||||||
data[col] = pd.to_numeric(data[col], errors='coerce')
|
data[col] = pd.to_numeric(data[col], errors='coerce')
|
||||||
return self
|
return self
|
||||||
@@ -64,3 +65,13 @@ class PrincipalComponentAnalisys:
|
|||||||
if threshold <= 0 or threshold > 1:
|
if threshold <= 0 or threshold > 1:
|
||||||
threshold = 1
|
threshold = 1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
df = Dataset("datasets\\regression\\automobile.csv", "symboling")
|
||||||
|
attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"]
|
||||||
|
df.factorize(attributes_to_modify)
|
||||||
|
df.to_numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
|
||||||
|
df.handle_na()
|
||||||
|
df.regularize(excepts=attributes_to_modify)
|
||||||
|
print(df.data.dtypes)
|
||||||
|
|||||||
@@ -1,14 +1,18 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from learning.data import Dataset
|
from learning.data import Dataset
|
||||||
|
from plot import Plot
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
class MLAlgorithm(ABC):
|
class MLAlgorithm(ABC):
|
||||||
|
""" Classe generica per gli algoritmi di Machine Learning """
|
||||||
|
|
||||||
dataset: Dataset
|
dataset: Dataset
|
||||||
testset: np.ndarray
|
testset: np.ndarray
|
||||||
learnset: np.ndarray
|
learnset: np.ndarray
|
||||||
|
test_error: list[float]
|
||||||
|
train_error: list[float]
|
||||||
|
|
||||||
def _set_dataset(self, dataset:Dataset, split:float=0.2):
|
def _set_dataset(self, dataset:Dataset, split:float=0.2):
|
||||||
ndarray = dataset.shuffle().as_ndarray()
|
ndarray = dataset.shuffle().as_ndarray()
|
||||||
@@ -30,6 +34,9 @@ class MLAlgorithm(ABC):
|
|||||||
for _ in range(0, max(1, times)):
|
for _ in range(0, max(1, times)):
|
||||||
train.append(self.learning_step())
|
train.append(self.learning_step())
|
||||||
test.append(self.test_error())
|
test.append(self.test_error())
|
||||||
|
|
||||||
|
self.train_error = train
|
||||||
|
self.test_error = test
|
||||||
return (train, test)
|
return (train, test)
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
@@ -39,3 +46,15 @@ class MLAlgorithm(ABC):
|
|||||||
@abstractmethod
|
@abstractmethod
|
||||||
def test_error(self) -> float:
|
def test_error(self) -> float:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def plot(self, skip:int=1000) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class MLRegression(MLAlgorithm):
|
||||||
|
def plot(self, skip:int=1000) -> None:
|
||||||
|
plot = Plot("Error", "Time", "Mean Error")
|
||||||
|
plot.line("training", "red", data=self.train_error[skip:])
|
||||||
|
plot.line("test", "blue", data=self.test_error[skip:])
|
||||||
|
plot.wait()
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
import math as math
|
import math as math
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from ml import MLAlgorithm
|
from learning.ml import MLRegression
|
||||||
from learning.data import Dataset
|
from learning.data import Dataset
|
||||||
|
|
||||||
class LinearRegression(MLAlgorithm):
|
class LinearRegression(MLRegression):
|
||||||
def __init__(self, dataset:Dataset, learning_rate:float=0.1) -> None:
|
def __init__(self, dataset:Dataset, learning_rate:float=0.1) -> None:
|
||||||
self._set_dataset(dataset)
|
self._set_dataset(dataset)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user