LogisticRegression
- implemented LR - changed classes tree
This commit is contained in:
15
.vscode/launch.json
vendored
Normal file
15
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python Debugger: App",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "src\\app.py",
|
||||
"console": "integratedTerminal"
|
||||
}
|
||||
]
|
||||
}
|
||||
3
requirements
Normal file
3
requirements
Normal file
@@ -0,0 +1,3 @@
|
||||
matplotlib
|
||||
pandas
|
||||
tqdm
|
||||
60
src/app.py
60
src/app.py
@@ -1,34 +1,48 @@
|
||||
from learning.data import Dataset
|
||||
from learning.supervised import LinearRegression
|
||||
from learning.ml import MLRegression
|
||||
from learning.supervised import LinearRegression, LogisticRegression, MultiLogisticRegression
|
||||
from learning.ml import MLAlgorithm
|
||||
from typing import Callable
|
||||
|
||||
def auto_mpg() -> tuple[int, MLRegression]:
|
||||
df = Dataset("datasets\\auto-mpg.csv", "MPG")
|
||||
def auto_mpg() -> tuple[int, MLAlgorithm]:
|
||||
ds = Dataset("datasets\\auto-mpg.csv", "MPG")
|
||||
|
||||
df.to_numbers(["HP"])
|
||||
df.handle_na()
|
||||
df.regularize(excepts=["Cylinders","Year","Origin"])
|
||||
return (1000, LinearRegression(df, learning_rate=0.0001))
|
||||
ds.to_numbers(["HP"])
|
||||
ds.handle_na()
|
||||
ds.regularize(excepts=["Cylinders","Year","Origin"])
|
||||
return (1000, LinearRegression(ds.as_ndarray(), learning_rate=0.0001))
|
||||
|
||||
def automobile() -> tuple[int, MLRegression]:
|
||||
df = Dataset("datasets\\regression\\automobile.csv", "symboling")
|
||||
def automobile() -> tuple[int, MLAlgorithm]:
|
||||
ds = Dataset("datasets\\regression\\automobile.csv", "symboling")
|
||||
|
||||
attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"]
|
||||
df.factorize(attributes_to_modify)
|
||||
df.to_numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
|
||||
df.handle_na()
|
||||
df.regularize(excepts=attributes_to_modify)
|
||||
return (1000, LinearRegression(df, learning_rate=0.004))
|
||||
ds.factorize(attributes_to_modify)
|
||||
ds.to_numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
|
||||
ds.handle_na()
|
||||
ds.regularize(excepts=attributes_to_modify)
|
||||
return (1000, LinearRegression(ds.as_ndarray(), learning_rate=0.004))
|
||||
|
||||
def power_plant() -> tuple[int, MLRegression]:
|
||||
df = Dataset("datasets\\regression\\power-plant.csv", "energy-output")
|
||||
df.regularize()
|
||||
return (80, LinearRegression(df, learning_rate=0.1))
|
||||
def power_plant() -> tuple[int, MLAlgorithm]:
|
||||
ds = Dataset("datasets\\regression\\power-plant.csv", "energy-output")
|
||||
ds.regularize()
|
||||
return (80, LinearRegression(ds.as_ndarray(), learning_rate=0.1))
|
||||
|
||||
|
||||
def electrical_grid() -> tuple[int, MLAlgorithm]:
|
||||
ds = Dataset("datasets\\classification\\electrical_grid.csv", "stabf")
|
||||
ds.factorize(["stabf"])
|
||||
ds.regularize()
|
||||
return (1000, LogisticRegression(ds.as_ndarray(), learning_rate=0.08))
|
||||
|
||||
def frogs() -> tuple[int, MLAlgorithm]:
|
||||
ds = Dataset("datasets\\classification\\frogs.csv", "Species")
|
||||
ds.remove(["Family", "Genus", "RecordID"])
|
||||
ds.factorize(["Species"])
|
||||
return (1000, MultiLogisticRegression(ds.as_ndarray(), learning_rate=0.08))
|
||||
|
||||
|
||||
|
||||
def learn_dataset(function:Callable[..., tuple[int, MLRegression]], epochs:int=100000, verbose=True)-> None:
|
||||
|
||||
def learn_dataset(function:Callable[..., tuple[int, MLAlgorithm]], epochs:int=10000, verbose=True)-> MLAlgorithm:
|
||||
skip, ml = function()
|
||||
ml.learn(epochs, verbose=verbose)
|
||||
|
||||
@@ -38,8 +52,8 @@ def learn_dataset(function:Callable[..., tuple[int, MLRegression]], epochs:int=1
|
||||
print(f"Loss value: tests={err_tests:1.5f}, valid={err_valid:1.5f}, learn={err_learn:1.5f}")
|
||||
|
||||
ml.plot(skip=skip)
|
||||
|
||||
|
||||
return ml
|
||||
|
||||
if __name__ == "__main__":
|
||||
learn_dataset(automobile)
|
||||
ml = learn_dataset(electrical_grid)
|
||||
print(ml.accuracy(ml.testset))
|
||||
|
||||
@@ -4,7 +4,7 @@ import numpy as np
|
||||
from typing_extensions import Self
|
||||
|
||||
class Dataset:
|
||||
def __init__(self, csv:str, target:str) -> None:
|
||||
def __init__(self, csv:str, target:str, classification:bool=None) -> None:
|
||||
data = pd.read_csv(csv)
|
||||
|
||||
# move target to the start
|
||||
@@ -12,10 +12,18 @@ class Dataset:
|
||||
data.insert(0, target, col_target)
|
||||
data.insert(1, "Bias", 1.0)
|
||||
|
||||
if classification == None:
|
||||
classification = (data[target].dtype == object)
|
||||
|
||||
self.original = data
|
||||
self.data = data
|
||||
self.target = target
|
||||
self.classification = (data[target].dtype == object)
|
||||
self.classification = classification
|
||||
|
||||
def remove(self, columns:list[str]) -> Self:
|
||||
for col in columns:
|
||||
self.data.pop(col)
|
||||
return self
|
||||
|
||||
def regularize(self, excepts:list[str]=[]) -> Self:
|
||||
excepts.append(self.target)
|
||||
|
||||
@@ -1,29 +1,27 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from learning.data import Dataset
|
||||
from plot import Plot
|
||||
from tqdm import tqdm
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
class MLAlgorithm(ABC):
|
||||
""" Classe generica per gli algoritmi di Machine Learning """
|
||||
|
||||
dataset: Dataset
|
||||
testset: np.ndarray
|
||||
learnset: np.ndarray
|
||||
_valid_loss: list[float]
|
||||
_train_loss: list[float]
|
||||
|
||||
def _set_dataset(self, dataset:Dataset, split:float=0.2):
|
||||
ndarray = dataset.shuffle().as_ndarray()
|
||||
splitT = int(ndarray.shape[0] * split)
|
||||
def _set_dataset(self, dataset:np.ndarray, split:float=0.2):
|
||||
splitT = int(dataset.shape[0] * split)
|
||||
splitV = int(splitT / 2)
|
||||
|
||||
self.dataset = dataset
|
||||
self.validset = ndarray[:splitV]
|
||||
self.testset = ndarray[splitV:splitT]
|
||||
self.learnset = ndarray[splitT:]
|
||||
np.random.shuffle(dataset)
|
||||
self.validset = dataset[:splitV]
|
||||
self.testset = dataset[splitV:splitT]
|
||||
self.learnset = dataset[splitT:]
|
||||
|
||||
def _split_data_target(self, dset:np.ndarray) -> tuple[np.ndarray, np.ndarray, int]:
|
||||
x = np.delete(dset, 0, 1)
|
||||
@@ -71,7 +69,32 @@ class MLAlgorithm(ABC):
|
||||
def test_loss(self) -> float:
|
||||
return self.predict_loss(self.testset)
|
||||
|
||||
def plot(self, skip:int=1000) -> None:
|
||||
skip = skip if len(self._train_loss) > skip else 0
|
||||
plot = Plot("Loss", "Time", "Mean Loss")
|
||||
plot.line("training", "blue", data=self._train_loss[skip:])
|
||||
plot.line("validation", "red", data=self._valid_loss[skip:])
|
||||
plot.wait()
|
||||
|
||||
def confusion_matrix(self, dataset:np.ndarray) -> np.ndarray:
|
||||
x, y, _ = self._split_data_target(dataset)
|
||||
h0 = np.where(self._h0(x) > 0.5, 1, 0)
|
||||
|
||||
classes = len(np.unique(y))
|
||||
conf_matrix = np.zeros((classes, classes), dtype=int)
|
||||
|
||||
for actual, prediction in zip(y, h0):
|
||||
conf_matrix[int(actual), int(prediction)] += 1
|
||||
return conf_matrix
|
||||
|
||||
def accuracy(self, dataset:np.ndarray) -> np.ndarray:
|
||||
conf = self.confusion_matrix(dataset)
|
||||
correct = np.sum(np.diagonal(conf))
|
||||
total = np.sum(conf)
|
||||
return correct / total
|
||||
|
||||
@abstractmethod
|
||||
def _h0(self, x:np.ndarray) -> np.ndarray: pass
|
||||
@abstractmethod
|
||||
def learning_step(self) -> float: pass
|
||||
@abstractmethod
|
||||
@@ -80,21 +103,3 @@ class MLAlgorithm(ABC):
|
||||
def get_parameters(self): pass
|
||||
@abstractmethod
|
||||
def set_parameters(self, parameters): pass
|
||||
|
||||
@abstractmethod
|
||||
def plot(self, skip:int=1000) -> None:
|
||||
skip = skip if len(self._train_loss) > skip else 0
|
||||
plot = Plot("Loss", "Time", "Mean Loss")
|
||||
plot.line("training", "blue", data=self._train_loss[skip:])
|
||||
plot.line("validation", "red", data=self._valid_loss[skip:])
|
||||
plot.wait()
|
||||
|
||||
|
||||
|
||||
class MLRegression(MLAlgorithm):
|
||||
def plot(self, skip: int = 1000) -> None:
|
||||
return super().plot(skip)
|
||||
|
||||
class MLClassification(MLAlgorithm):
|
||||
def plot(self, skip: int = 1000) -> None:
|
||||
return super().plot(skip)
|
||||
@@ -1,35 +1,29 @@
|
||||
import math as math
|
||||
import numpy as np
|
||||
|
||||
from learning.ml import MLRegression, MLClassification
|
||||
from learning.data import Dataset
|
||||
from abc import abstractmethod
|
||||
from learning.ml import MLAlgorithm
|
||||
|
||||
class LinearRegression(MLRegression):
|
||||
class GradientDescent(MLAlgorithm):
|
||||
theta:np.ndarray
|
||||
alpha:float
|
||||
|
||||
def __init__(self, dataset:Dataset, learning_rate:float=0.1) -> None:
|
||||
def __init__(self, dataset:np.ndarray, learning_rate:float=0.1) -> None:
|
||||
self._set_dataset(dataset)
|
||||
|
||||
parameters = dataset.data.shape[1] - 1 #removing the result
|
||||
parameters = dataset.shape[1] - 1 #removing the result
|
||||
self.theta = np.random.rand(parameters)
|
||||
self.alpha = max(0, learning_rate)
|
||||
|
||||
def learning_step(self) -> float:
|
||||
theta = self.theta
|
||||
alpha = self.alpha
|
||||
x, y, m = self._split_data_target(self.learnset)
|
||||
|
||||
self.theta -= alpha * (1/m) * np.sum((x.dot(theta) - y) * x.T, axis=1)
|
||||
return self._error(x, y, m)
|
||||
self.theta -= self.alpha * (1/m) * np.sum((self._h0(x) - y) * x.T, axis=1)
|
||||
return self._loss(x, y, m)
|
||||
|
||||
def predict_loss(self, dataset:np.ndarray) -> float:
|
||||
x, y, m = self._split_data_target(dataset)
|
||||
return self._error(x, y, m)
|
||||
|
||||
def _error(self, x:np.ndarray, y:np.ndarray, m:int) -> float:
|
||||
diff = (x.dot(self.theta) - y)
|
||||
return 1/(2*m) * np.sum(diff ** 2)
|
||||
return self._loss(x, y, m)
|
||||
|
||||
def get_parameters(self):
|
||||
return self.theta.copy()
|
||||
@@ -37,6 +31,23 @@ class LinearRegression(MLRegression):
|
||||
def set_parameters(self, parameters):
|
||||
self.theta = parameters
|
||||
|
||||
@abstractmethod
|
||||
def _loss(self, x:np.ndarray, y:np.ndarray, m:int) -> float: pass
|
||||
|
||||
class LogisticRegression(MLClassification):
|
||||
pass
|
||||
|
||||
class LinearRegression(GradientDescent):
|
||||
def _h0(self, x: np.ndarray) -> np.ndarray:
|
||||
return self.theta.dot(x.T)
|
||||
|
||||
def _loss(self, x:np.ndarray, y:np.ndarray, m:int) -> float:
|
||||
diff = (x.dot(self.theta) - y)
|
||||
return 1/(2*m) * np.sum(diff ** 2)
|
||||
|
||||
class LogisticRegression(GradientDescent):
|
||||
def _h0(self, x: np.ndarray) -> np.ndarray:
|
||||
return 1 / (1 + np.exp(-self.theta.dot(x.T)))
|
||||
|
||||
def _loss(self, x:np.ndarray, y:np.ndarray, m:int) -> float:
|
||||
h0 = self._h0(x)
|
||||
diff = -y*np.log(h0) -(1-y)*np.log(1-h0)
|
||||
return 1/m * np.sum(diff)
|
||||
|
||||
Reference in New Issue
Block a user