Progress Bar

- added progress bar
- divided dataset into validation, test, learning
- added patience for learning
This commit is contained in:
2024-04-22 15:41:13 +02:00
parent b4bd976a9d
commit 1fb277bc70
3 changed files with 88 additions and 50 deletions

View File

@@ -1,16 +1,17 @@
from learning.data import Dataset from learning.data import Dataset
from learning.supervised import LinearRegression from learning.supervised import LinearRegression
from learning.ml import MLRegression from learning.ml import MLRegression
from typing import Callable
def auto_mpg() -> tuple[int, int, MLRegression]: def auto_mpg() -> tuple[int, MLRegression]:
df = Dataset("datasets\\auto-mpg.csv", "MPG") df = Dataset("datasets\\auto-mpg.csv", "MPG")
df.to_numbers(["HP"]) df.to_numbers(["HP"])
df.handle_na() df.handle_na()
df.regularize(excepts=["Cylinders","Year","Origin"]) df.regularize(excepts=["Cylinders","Year","Origin"])
return (5000, 1000, LinearRegression(df, learning_rate=0.0001)) return (1000, LinearRegression(df, learning_rate=0.0001))
def automobile() -> tuple[int, int, MLRegression]: def automobile() -> tuple[int, MLRegression]:
df = Dataset("datasets\\regression\\automobile.csv", "symboling") df = Dataset("datasets\\regression\\automobile.csv", "symboling")
attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"] attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"]
@@ -18,23 +19,27 @@ def automobile() -> tuple[int, int, MLRegression]:
df.to_numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"]) df.to_numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
df.handle_na() df.handle_na()
df.regularize(excepts=attributes_to_modify) df.regularize(excepts=attributes_to_modify)
return (5000, 1000, LinearRegression(df, learning_rate=0.002)) return (1000, LinearRegression(df, learning_rate=0.004))
def power_plant() -> tuple[int, int, MLRegression]: def power_plant() -> tuple[int, MLRegression]:
df = Dataset("datasets\\regression\\power-plant.csv", "energy-output") df = Dataset("datasets\\regression\\power-plant.csv", "energy-output")
df.regularize() df.regularize()
return (1000, 80, LinearRegression(df, learning_rate=0.1)) return (80, LinearRegression(df, learning_rate=0.1))
def learn_dataset(function:Callable[..., tuple[int, MLRegression]], epochs:int=100000, verbose=True)-> None:
skip, ml = function()
ml.learn(epochs, verbose=verbose)
err_tests = ml.test_loss()
err_valid = ml.validation_loss()
err_learn = ml.learning_loss()
print(f"Loss value: tests={err_tests:1.5f}, valid={err_valid:1.5f}, learn={err_learn:1.5f}")
epoch, skip, ml = automobile()
ml.learn(epoch)
ml.plot(skip=skip) ml.plot(skip=skip)
"""
for _ in range(0, epoch):
train_err = lr.learning_step()
test_err = lr.test_error()
plot.update("training", train_err)
plot.update("test", test_err) if __name__ == "__main__":
plot.update_limits() learn_dataset(auto_mpg)
"""

View File

@@ -1,6 +1,7 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from learning.data import Dataset from learning.data import Dataset
from plot import Plot from plot import Plot
from tqdm import tqdm
import numpy as np import numpy as np
@@ -11,16 +12,18 @@ class MLAlgorithm(ABC):
dataset: Dataset dataset: Dataset
testset: np.ndarray testset: np.ndarray
learnset: np.ndarray learnset: np.ndarray
test_error: list[float] _valid_loss: list[float]
train_error: list[float] _train_loss: list[float]
def _set_dataset(self, dataset:Dataset, split:float=0.2): def _set_dataset(self, dataset:Dataset, split:float=0.2):
ndarray = dataset.shuffle().as_ndarray() ndarray = dataset.shuffle().as_ndarray()
split = int(ndarray.shape[0] * split) splitT = int(ndarray.shape[0] * split)
splitV = int(splitT / 2)
self.dataset = dataset self.dataset = dataset
self.testset = ndarray[split:] self.validset = ndarray[:splitV]
self.learnset = ndarray[:split] self.testset = ndarray[splitV:splitT]
self.learnset = ndarray[splitT:]
def _split_data_target(self, dset:np.ndarray) -> tuple[np.ndarray, np.ndarray, int]: def _split_data_target(self, dset:np.ndarray) -> tuple[np.ndarray, np.ndarray, int]:
x = np.delete(dset, 0, 1) x = np.delete(dset, 0, 1)
@@ -28,43 +31,64 @@ class MLAlgorithm(ABC):
m = dset.shape[0] m = dset.shape[0]
return (x, y, m) return (x, y, m)
def learn(self, times:int) -> tuple[list, list]: def learn(self, epochs:int, early_stop:float=0.0000001, max_patience:int=10, verbose:bool=False) -> tuple[int, list, list]:
_, train, test = self.learn_until(times) learn = []
return (train, test) valid = []
def learn_until(self, max_iter:int=1000000, delta:float=0.0) -> tuple[int, list, list]:
train = []
test = []
prev = None
count = 0 count = 0
patience = 0
trange = range(epochs)
if verbose: trange = tqdm(trange, bar_format="Epochs {percentage:3.0f}% [{bar}] {elapsed}{postfix}")
try:
for _ in trange:
if count > 1 and valid[-2] - valid[-1] < early_stop:
if patience >= max_patience:
self.set_parameters(backup)
break
patience += 1
else:
backup = self.get_parameters()
patience = 0
while count < max_iter and (prev == None or prev - train[-1] > delta):
count += 1 count += 1
prev = train[-1] if len(train) > 0 else None learn.append(self.learning_step())
valid.append(self.validation_loss())
train.append(self.learning_step()) if verbose: trange.set_postfix({"learn": f"{learn[-1]:2.5f}", "validation": f"{valid[-1]:2.5f}"})
test.append(self.test_error()) except KeyboardInterrupt: pass
if verbose: print(f"Loop ended after {count} epochs")
self._train_loss = learn
self._valid_loss = valid
return (count, learn, valid)
def learning_loss(self) -> float:
return self.predict_loss(self.learnset)
def validation_loss(self) -> float:
return self.predict_loss(self.validset)
def test_loss(self) -> float:
return self.predict_loss(self.testset)
self.train_error = train
self.test_error = test
return (count, train, test)
@abstractmethod @abstractmethod
def learning_step(self) -> float: def learning_step(self) -> float: pass
pass
@abstractmethod @abstractmethod
def test_error(self) -> float: def predict_loss(self, dataset:np.ndarray) -> float: pass
pass
@abstractmethod @abstractmethod
def plot(self, skip:int=1000) -> None: def plot(self, skip:int=1000) -> None: pass
pass @abstractmethod
def get_parameters(self): pass
@abstractmethod
def set_parameters(self, parameters): pass
class MLRegression(MLAlgorithm): class MLRegression(MLAlgorithm):
def plot(self, skip:int=1000) -> None: def plot(self, skip:int=1000) -> None:
skip = skip if len(self._train_loss) > skip else 0
plot = Plot("Error", "Time", "Mean Error") plot = Plot("Error", "Time", "Mean Error")
plot.line("training", "blue", data=self.train_error[skip:]) plot.line("training", "blue", data=self._train_loss[skip:])
plot.line("test", "red", data=self.test_error[skip:]) plot.line("validation", "red", data=self._valid_loss[skip:])
plot.wait() plot.wait()

View File

@@ -5,6 +5,9 @@ from learning.ml import MLRegression
from learning.data import Dataset from learning.data import Dataset
class LinearRegression(MLRegression): class LinearRegression(MLRegression):
theta:np.ndarray
alpha:float
def __init__(self, dataset:Dataset, learning_rate:float=0.1) -> None: def __init__(self, dataset:Dataset, learning_rate:float=0.1) -> None:
self._set_dataset(dataset) self._set_dataset(dataset)
@@ -20,10 +23,16 @@ class LinearRegression(MLRegression):
self.theta -= alpha * (1/m) * np.sum((x.dot(theta) - y) * x.T, axis=1) self.theta -= alpha * (1/m) * np.sum((x.dot(theta) - y) * x.T, axis=1)
return self._error(x, y, m) return self._error(x, y, m)
def test_error(self) -> float: def predict_loss(self, dataset:np.ndarray) -> float:
x, y, m = self._split_data_target(self.testset) x, y, m = self._split_data_target(dataset)
return self._error(x, y, m) return self._error(x, y, m)
def _error(self, x:np.ndarray, y:np.ndarray, m:int) -> float: def _error(self, x:np.ndarray, y:np.ndarray, m:int) -> float:
diff = (x.dot(self.theta) - y) diff = (x.dot(self.theta) - y)
return 1/(2*m) * np.sum(diff ** 2) return 1/(2*m) * np.sum(diff ** 2)
def get_parameters(self):
return self.theta.copy()
def set_parameters(self, parameters):
self.theta = parameters