Progress Bar
- added progress bar - divided dataset into validation, test, learning - added patience for learning
This commit is contained in:
39
src/app.py
39
src/app.py
@@ -1,16 +1,17 @@
|
|||||||
from learning.data import Dataset
|
from learning.data import Dataset
|
||||||
from learning.supervised import LinearRegression
|
from learning.supervised import LinearRegression
|
||||||
from learning.ml import MLRegression
|
from learning.ml import MLRegression
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
def auto_mpg() -> tuple[int, int, MLRegression]:
|
def auto_mpg() -> tuple[int, MLRegression]:
|
||||||
df = Dataset("datasets\\auto-mpg.csv", "MPG")
|
df = Dataset("datasets\\auto-mpg.csv", "MPG")
|
||||||
|
|
||||||
df.to_numbers(["HP"])
|
df.to_numbers(["HP"])
|
||||||
df.handle_na()
|
df.handle_na()
|
||||||
df.regularize(excepts=["Cylinders","Year","Origin"])
|
df.regularize(excepts=["Cylinders","Year","Origin"])
|
||||||
return (5000, 1000, LinearRegression(df, learning_rate=0.0001))
|
return (1000, LinearRegression(df, learning_rate=0.0001))
|
||||||
|
|
||||||
def automobile() -> tuple[int, int, MLRegression]:
|
def automobile() -> tuple[int, MLRegression]:
|
||||||
df = Dataset("datasets\\regression\\automobile.csv", "symboling")
|
df = Dataset("datasets\\regression\\automobile.csv", "symboling")
|
||||||
|
|
||||||
attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"]
|
attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"]
|
||||||
@@ -18,23 +19,27 @@ def automobile() -> tuple[int, int, MLRegression]:
|
|||||||
df.to_numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
|
df.to_numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
|
||||||
df.handle_na()
|
df.handle_na()
|
||||||
df.regularize(excepts=attributes_to_modify)
|
df.regularize(excepts=attributes_to_modify)
|
||||||
return (5000, 1000, LinearRegression(df, learning_rate=0.002))
|
return (1000, LinearRegression(df, learning_rate=0.004))
|
||||||
|
|
||||||
def power_plant() -> tuple[int, int, MLRegression]:
|
def power_plant() -> tuple[int, MLRegression]:
|
||||||
df = Dataset("datasets\\regression\\power-plant.csv", "energy-output")
|
df = Dataset("datasets\\regression\\power-plant.csv", "energy-output")
|
||||||
df.regularize()
|
df.regularize()
|
||||||
return (1000, 80, LinearRegression(df, learning_rate=0.1))
|
return (80, LinearRegression(df, learning_rate=0.1))
|
||||||
|
|
||||||
epoch, skip, ml = automobile()
|
|
||||||
ml.learn(epoch)
|
|
||||||
ml.plot(skip=skip)
|
|
||||||
|
|
||||||
"""
|
|
||||||
for _ in range(0, epoch):
|
|
||||||
train_err = lr.learning_step()
|
|
||||||
test_err = lr.test_error()
|
|
||||||
|
|
||||||
plot.update("training", train_err)
|
def learn_dataset(function:Callable[..., tuple[int, MLRegression]], epochs:int=100000, verbose=True)-> None:
|
||||||
plot.update("test", test_err)
|
skip, ml = function()
|
||||||
plot.update_limits()
|
ml.learn(epochs, verbose=verbose)
|
||||||
"""
|
|
||||||
|
err_tests = ml.test_loss()
|
||||||
|
err_valid = ml.validation_loss()
|
||||||
|
err_learn = ml.learning_loss()
|
||||||
|
print(f"Loss value: tests={err_tests:1.5f}, valid={err_valid:1.5f}, learn={err_learn:1.5f}")
|
||||||
|
|
||||||
|
ml.plot(skip=skip)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
learn_dataset(auto_mpg)
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from learning.data import Dataset
|
from learning.data import Dataset
|
||||||
from plot import Plot
|
from plot import Plot
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@@ -11,16 +12,18 @@ class MLAlgorithm(ABC):
|
|||||||
dataset: Dataset
|
dataset: Dataset
|
||||||
testset: np.ndarray
|
testset: np.ndarray
|
||||||
learnset: np.ndarray
|
learnset: np.ndarray
|
||||||
test_error: list[float]
|
_valid_loss: list[float]
|
||||||
train_error: list[float]
|
_train_loss: list[float]
|
||||||
|
|
||||||
def _set_dataset(self, dataset:Dataset, split:float=0.2):
|
def _set_dataset(self, dataset:Dataset, split:float=0.2):
|
||||||
ndarray = dataset.shuffle().as_ndarray()
|
ndarray = dataset.shuffle().as_ndarray()
|
||||||
split = int(ndarray.shape[0] * split)
|
splitT = int(ndarray.shape[0] * split)
|
||||||
|
splitV = int(splitT / 2)
|
||||||
|
|
||||||
self.dataset = dataset
|
self.dataset = dataset
|
||||||
self.testset = ndarray[split:]
|
self.validset = ndarray[:splitV]
|
||||||
self.learnset = ndarray[:split]
|
self.testset = ndarray[splitV:splitT]
|
||||||
|
self.learnset = ndarray[splitT:]
|
||||||
|
|
||||||
def _split_data_target(self, dset:np.ndarray) -> tuple[np.ndarray, np.ndarray, int]:
|
def _split_data_target(self, dset:np.ndarray) -> tuple[np.ndarray, np.ndarray, int]:
|
||||||
x = np.delete(dset, 0, 1)
|
x = np.delete(dset, 0, 1)
|
||||||
@@ -28,43 +31,64 @@ class MLAlgorithm(ABC):
|
|||||||
m = dset.shape[0]
|
m = dset.shape[0]
|
||||||
return (x, y, m)
|
return (x, y, m)
|
||||||
|
|
||||||
def learn(self, times:int) -> tuple[list, list]:
|
def learn(self, epochs:int, early_stop:float=0.0000001, max_patience:int=10, verbose:bool=False) -> tuple[int, list, list]:
|
||||||
_, train, test = self.learn_until(times)
|
learn = []
|
||||||
return (train, test)
|
valid = []
|
||||||
|
|
||||||
def learn_until(self, max_iter:int=1000000, delta:float=0.0) -> tuple[int, list, list]:
|
|
||||||
train = []
|
|
||||||
test = []
|
|
||||||
prev = None
|
|
||||||
count = 0
|
count = 0
|
||||||
|
patience = 0
|
||||||
|
trange = range(epochs)
|
||||||
|
if verbose: trange = tqdm(trange, bar_format="Epochs {percentage:3.0f}% [{bar}] {elapsed}{postfix}")
|
||||||
|
|
||||||
while count < max_iter and (prev == None or prev - train[-1] > delta):
|
try:
|
||||||
count += 1
|
for _ in trange:
|
||||||
prev = train[-1] if len(train) > 0 else None
|
if count > 1 and valid[-2] - valid[-1] < early_stop:
|
||||||
|
if patience >= max_patience:
|
||||||
|
self.set_parameters(backup)
|
||||||
|
break
|
||||||
|
patience += 1
|
||||||
|
else:
|
||||||
|
backup = self.get_parameters()
|
||||||
|
patience = 0
|
||||||
|
|
||||||
train.append(self.learning_step())
|
count += 1
|
||||||
test.append(self.test_error())
|
learn.append(self.learning_step())
|
||||||
|
valid.append(self.validation_loss())
|
||||||
|
|
||||||
|
if verbose: trange.set_postfix({"learn": f"{learn[-1]:2.5f}", "validation": f"{valid[-1]:2.5f}"})
|
||||||
|
except KeyboardInterrupt: pass
|
||||||
|
if verbose: print(f"Loop ended after {count} epochs")
|
||||||
|
|
||||||
|
self._train_loss = learn
|
||||||
|
self._valid_loss = valid
|
||||||
|
return (count, learn, valid)
|
||||||
|
|
||||||
|
def learning_loss(self) -> float:
|
||||||
|
return self.predict_loss(self.learnset)
|
||||||
|
|
||||||
|
def validation_loss(self) -> float:
|
||||||
|
return self.predict_loss(self.validset)
|
||||||
|
|
||||||
|
def test_loss(self) -> float:
|
||||||
|
return self.predict_loss(self.testset)
|
||||||
|
|
||||||
self.train_error = train
|
|
||||||
self.test_error = test
|
|
||||||
return (count, train, test)
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def learning_step(self) -> float:
|
def learning_step(self) -> float: pass
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def test_error(self) -> float:
|
def predict_loss(self, dataset:np.ndarray) -> float: pass
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def plot(self, skip:int=1000) -> None:
|
def plot(self, skip:int=1000) -> None: pass
|
||||||
pass
|
@abstractmethod
|
||||||
|
def get_parameters(self): pass
|
||||||
|
@abstractmethod
|
||||||
|
def set_parameters(self, parameters): pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class MLRegression(MLAlgorithm):
|
class MLRegression(MLAlgorithm):
|
||||||
def plot(self, skip:int=1000) -> None:
|
def plot(self, skip:int=1000) -> None:
|
||||||
|
skip = skip if len(self._train_loss) > skip else 0
|
||||||
plot = Plot("Error", "Time", "Mean Error")
|
plot = Plot("Error", "Time", "Mean Error")
|
||||||
plot.line("training", "blue", data=self.train_error[skip:])
|
plot.line("training", "blue", data=self._train_loss[skip:])
|
||||||
plot.line("test", "red", data=self.test_error[skip:])
|
plot.line("validation", "red", data=self._valid_loss[skip:])
|
||||||
plot.wait()
|
plot.wait()
|
||||||
|
|||||||
@@ -5,6 +5,9 @@ from learning.ml import MLRegression
|
|||||||
from learning.data import Dataset
|
from learning.data import Dataset
|
||||||
|
|
||||||
class LinearRegression(MLRegression):
|
class LinearRegression(MLRegression):
|
||||||
|
theta:np.ndarray
|
||||||
|
alpha:float
|
||||||
|
|
||||||
def __init__(self, dataset:Dataset, learning_rate:float=0.1) -> None:
|
def __init__(self, dataset:Dataset, learning_rate:float=0.1) -> None:
|
||||||
self._set_dataset(dataset)
|
self._set_dataset(dataset)
|
||||||
|
|
||||||
@@ -20,10 +23,16 @@ class LinearRegression(MLRegression):
|
|||||||
self.theta -= alpha * (1/m) * np.sum((x.dot(theta) - y) * x.T, axis=1)
|
self.theta -= alpha * (1/m) * np.sum((x.dot(theta) - y) * x.T, axis=1)
|
||||||
return self._error(x, y, m)
|
return self._error(x, y, m)
|
||||||
|
|
||||||
def test_error(self) -> float:
|
def predict_loss(self, dataset:np.ndarray) -> float:
|
||||||
x, y, m = self._split_data_target(self.testset)
|
x, y, m = self._split_data_target(dataset)
|
||||||
return self._error(x, y, m)
|
return self._error(x, y, m)
|
||||||
|
|
||||||
def _error(self, x:np.ndarray, y:np.ndarray, m:int) -> float:
|
def _error(self, x:np.ndarray, y:np.ndarray, m:int) -> float:
|
||||||
diff = (x.dot(self.theta) - y)
|
diff = (x.dot(self.theta) - y)
|
||||||
return 1/(2*m) * np.sum(diff ** 2)
|
return 1/(2*m) * np.sum(diff ** 2)
|
||||||
|
|
||||||
|
def get_parameters(self):
|
||||||
|
return self.theta.copy()
|
||||||
|
|
||||||
|
def set_parameters(self, parameters):
|
||||||
|
self.theta = parameters
|
||||||
|
|||||||
Reference in New Issue
Block a user