From 3e9bcb87e9c0613b49ac1a3f326d066a8a78aec5 Mon Sep 17 00:00:00 2001 From: Berack96 Date: Thu, 2 May 2024 14:27:57 +0200 Subject: [PATCH] Bugfixes - fix bug recursion on init - fix bug linux path - added regularization --- .vscode/launch.json | 2 +- src/app.py | 16 ++++++++++------ src/learning/supervised.py | 24 ++++++++++++++++++++---- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index e05779f..d621c75 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,7 +8,7 @@ "name": "Python Debugger: App", "type": "debugpy", "request": "launch", - "program": "src\\app.py", + "program": "src/app.py", "console": "integratedTerminal" } ] diff --git a/src/app.py b/src/app.py index 9fb1266..e4c489d 100644 --- a/src/app.py +++ b/src/app.py @@ -3,8 +3,12 @@ from learning.supervised import LinearRegression, LogisticRegression, MultiLayer from learning.ml import MLAlgorithm from typing import Callable +DATASET = "datasets/" +REGRESSION = DATASET + "regression/" +CLASSIFICATION = DATASET + "classification/" + def auto_mpg() -> tuple[int, MLAlgorithm]: - ds = Dataset("datasets\\auto-mpg.csv", "MPG", TargetType.Regression) + ds = Dataset(REGRESSION + "auto-mpg.csv", "MPG", TargetType.Regression) ds.numbers(["HP"]) ds.handle_na() @@ -12,7 +16,7 @@ def auto_mpg() -> tuple[int, MLAlgorithm]: return (1000, LinearRegression(ds, learning_rate=0.0001)) def automobile() -> tuple[int, MLAlgorithm]: - ds = Dataset("datasets\\regression\\automobile.csv", "symboling", TargetType.Regression) + ds = Dataset(REGRESSION + "automobile.csv", "symboling", TargetType.Regression) attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"] ds.factorize(attributes_to_modify) @@ -22,19 +26,19 @@ def automobile() -> tuple[int, MLAlgorithm]: return (1000, LinearRegression(ds, learning_rate=0.004)) def power_plant() -> tuple[int, MLAlgorithm]: - ds = Dataset("datasets\\regression\\power-plant.csv", "energy-output", TargetType.Regression) + ds = Dataset(REGRESSION + "power-plant.csv", "energy-output", TargetType.Regression) ds.normalize() return (80, LinearRegression(ds, learning_rate=0.1)) def electrical_grid() -> tuple[int, MLAlgorithm]: - ds = Dataset("datasets\\classification\\electrical_grid.csv", "stabf", TargetType.Classification) + ds = Dataset(CLASSIFICATION + "electrical_grid.csv", "stabf", TargetType.Classification) ds.factorize(["stabf"]) ds.normalize() return (1000, LogisticRegression(ds, learning_rate=0.08)) def frogs() -> tuple[int, MLAlgorithm]: - ds = Dataset("datasets\\classification\\frogs.csv", "Species", TargetType.MultiClassification) + ds = Dataset(CLASSIFICATION + "frogs.csv", "Species", TargetType.MultiClassification) ds.remove(["Family", "Genus", "RecordID"]) ds.factorize(["Species"]) return (1000, MultiLayerPerceptron(ds, learning_rate=0.08)) @@ -55,5 +59,5 @@ def learn_dataset(function:Callable[..., tuple[int, MLAlgorithm]], epochs:int=10 return ml if __name__ == "__main__": - ml = learn_dataset(automobile) + ml = learn_dataset(electrical_grid) print(ml.accuracy(ml.testset)) diff --git a/src/learning/supervised.py b/src/learning/supervised.py index f5a2006..709f09b 100644 --- a/src/learning/supervised.py +++ b/src/learning/supervised.py @@ -8,16 +8,21 @@ from learning.data import Dataset, Data class GradientDescent(MLAlgorithm): theta:np.ndarray alpha:float + lambd:float - def __init__(self, dataset:Dataset, learning_rate:float=0.1) -> None: - self.__init__(dataset) + def __init__(self, dataset:Dataset, learning_rate:float=0.1, regularization:float=0.01) -> None: + super().__init__(dataset) self.theta = np.random.rand(self.learnset.param) self.alpha = max(0, learning_rate) + self.lambd = max(0, regularization) def learning_step(self) -> float: x, y, m, _ = self.learnset.as_tuple() - self.theta -= self.alpha * (1/m) * np.sum((self._h0(x) - y) * x.T, axis=1) + regularization = (self.lambd / m) * self.theta + regularization[0] = 0 + derivative = self.alpha * (1/m) * np.sum((self._h0(x) - y) * x.T, axis=1) + self.theta -= derivative + regularization return self._loss(x, y, m) def predict_loss(self, dataset:Data) -> float: @@ -54,5 +59,16 @@ class MultiLayerPerceptron(MLAlgorithm): neurons: list[np.ndarray] def __init__(self, dataset:Dataset, layers:list[int]=[4,3]) -> None: - self.__init__(dataset) + super().__init__(dataset) + + def _h0(self, x:np.ndarray) -> np.ndarray: + pass + def learning_step(self) -> float: + pass + def predict_loss(self, dataset:np.ndarray) -> float: + pass + def get_parameters(self): + pass + def set_parameters(self, parameters): + pass