Project struct
This commit is contained in:
51
src/app.py
Normal file
51
src/app.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from learning.data import Dataset
|
||||
from learning.supervised import LinearRegression
|
||||
from learning.ml import MLAlgorithm
|
||||
from plot import Plot
|
||||
|
||||
def auto_mpg() -> MLAlgorithm:
|
||||
df = Dataset("datasets\\auto-mpg.csv", "MPG")
|
||||
|
||||
df.to_numbers(["HP"])
|
||||
df.handle_na()
|
||||
df.regularize(excepts=["Cylinders","Year","Origin"])
|
||||
|
||||
return LinearRegression(df, learning_rate=0.0001)
|
||||
|
||||
def automobile() -> MLAlgorithm:
|
||||
df = Dataset("datasets\\regression\\automobile.csv", "symboling")
|
||||
|
||||
attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"]
|
||||
df.factorize(attributes_to_modify)
|
||||
df.to_numbers()
|
||||
df.handle_na()
|
||||
df.regularize(excepts=attributes_to_modify)
|
||||
|
||||
return LinearRegression(df, learning_rate=0.001)
|
||||
|
||||
|
||||
|
||||
|
||||
epoch = 50000
|
||||
skip = 1000
|
||||
lr = automobile()
|
||||
|
||||
train, test = lr.learn(epoch)
|
||||
|
||||
plot = Plot("Error", "Time", "Mean Error")
|
||||
plot.line("training", "red", data=train[skip:])
|
||||
plot.line("test", "blue", data=test[skip:])
|
||||
|
||||
"""
|
||||
for _ in range(0, epoch):
|
||||
train_err = lr.learning_step()
|
||||
test_err = lr.test_error()
|
||||
|
||||
plot.update("training", train_err)
|
||||
plot.update("test", test_err)
|
||||
plot.update_limits()
|
||||
"""
|
||||
|
||||
plot.wait()
|
||||
|
||||
|
||||
@@ -10,13 +10,16 @@ class Dataset:
|
||||
# move target to the start
|
||||
col_target = data.pop(target)
|
||||
data.insert(0, target, col_target)
|
||||
data.insert(1, "Bias", 1.0)
|
||||
|
||||
self.original = data
|
||||
self.data = data
|
||||
self.target = target
|
||||
self.classification = (data[target].dtype == object)
|
||||
|
||||
def regularize(self, excepts:list=[]) -> Self:
|
||||
excepts.append(self.target)
|
||||
excepts.append("Bias")
|
||||
for col in self.data:
|
||||
if col not in excepts:
|
||||
dt = self.data[col]
|
||||
@@ -44,10 +47,11 @@ class Dataset:
|
||||
self.data = self.data.sample(frac=1)
|
||||
return self
|
||||
|
||||
def as_ndarray(self, bias=True):
|
||||
data = self.data.copy()
|
||||
if bias: data.insert(1, "Bias", 1.0)
|
||||
return data.to_numpy()
|
||||
def as_ndarray(self) -> np.ndarray:
|
||||
return self.data.to_numpy()
|
||||
|
||||
def get_index(self, column:str) -> int:
|
||||
return self.data.columns.get_loc(column)
|
||||
|
||||
class PrincipalComponentAnalisys:
|
||||
def __init__(self, data:np.ndarray) -> None:
|
||||
41
src/learning/ml.py
Normal file
41
src/learning/ml.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from learning.data import Dataset
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class MLAlgorithm(ABC):
|
||||
|
||||
dataset: Dataset
|
||||
testset: np.ndarray
|
||||
learnset: np.ndarray
|
||||
|
||||
def _set_dataset(self, dataset:Dataset, split:float=0.2):
|
||||
ndarray = dataset.shuffle().as_ndarray()
|
||||
split = int(ndarray.shape[0] * split)
|
||||
|
||||
self.dataset = dataset
|
||||
self.testset = ndarray[split:]
|
||||
self.learnset = ndarray[:split]
|
||||
|
||||
def _split_data_target(self, dset:np.ndarray) -> tuple[np.ndarray, np.ndarray, int]:
|
||||
x = np.delete(dset, 0, 1)
|
||||
y = dset[:, 0]
|
||||
m = dset.shape[0]
|
||||
return (x, y, m)
|
||||
|
||||
def learn(self, times:int) -> tuple[list, list]:
|
||||
train = []
|
||||
test = []
|
||||
for _ in range(0, max(1, times)):
|
||||
train.append(self.learning_step())
|
||||
test.append(self.test_error())
|
||||
return (train, test)
|
||||
|
||||
@abstractmethod
|
||||
def learning_step(self) -> float:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def test_error(self) -> float:
|
||||
pass
|
||||
29
src/learning/supervised.py
Normal file
29
src/learning/supervised.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import math as math
|
||||
import numpy as np
|
||||
|
||||
from ml import MLAlgorithm
|
||||
from learning.data import Dataset
|
||||
|
||||
class LinearRegression(MLAlgorithm):
|
||||
def __init__(self, dataset:Dataset, learning_rate:float=0.1) -> None:
|
||||
self._set_dataset(dataset)
|
||||
|
||||
parameters = dataset.data.shape[1] - 1 #removing the result
|
||||
self.theta = np.random.rand(parameters)
|
||||
self.alpha = max(0, learning_rate)
|
||||
|
||||
def learning_step(self) -> float:
|
||||
theta = self.theta
|
||||
alpha = self.alpha
|
||||
x, y, m = self._split_data_target(self.learnset)
|
||||
|
||||
self.theta -= alpha * (1/m) * np.sum((x.dot(theta) - y) * x.T, axis=1)
|
||||
return self._error(x, y, m)
|
||||
|
||||
def test_error(self) -> float:
|
||||
x, y, m = self._split_data_target(self.testset)
|
||||
return self._error(x, y, m)
|
||||
|
||||
def _error(self, x:np.ndarray, y:np.ndarray, m:int) -> float:
|
||||
diff = (x.dot(self.theta) - y)
|
||||
return 1/(2*m) * np.sum(diff ** 2)
|
||||
@@ -1,83 +0,0 @@
|
||||
import math as math
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from data import Dataset
|
||||
|
||||
|
||||
class LinearRegression:
|
||||
def __init__(self, dataset:Dataset, learning_rate:float=0.1) -> None:
|
||||
ndarray = dataset.shuffle().as_ndarray()
|
||||
parameters = ndarray.shape[1] - 1 #removing the result
|
||||
|
||||
split = int(ndarray.shape[0] * 0.2)
|
||||
self.testset = ndarray[split:]
|
||||
self.trainingset = ndarray[:split]
|
||||
|
||||
self.theta = np.random.rand(parameters)
|
||||
self.alpha = max(0, learning_rate)
|
||||
|
||||
def learn(self, times:int) -> list:
|
||||
train = []
|
||||
test = []
|
||||
for _ in range(0, max(1, times)):
|
||||
train.append(self.learning_step())
|
||||
test.append(self.test_error())
|
||||
return (train, test)
|
||||
|
||||
def learning_step(self) -> float:
|
||||
theta = self.theta
|
||||
alpha = self.alpha
|
||||
x = np.delete(self.trainingset, 0, 1)
|
||||
y = self.trainingset[:, 0]
|
||||
m = self.trainingset.shape[0]
|
||||
|
||||
diff = (x.dot(theta) - y)
|
||||
sum = np.sum(diff * x.T, axis=1)
|
||||
theta -= alpha * (1/m) * sum
|
||||
self.theta = theta
|
||||
return self._error(x, y, m)
|
||||
|
||||
def test_error(self) -> float:
|
||||
x = np.delete(self.testset, 0, 1)
|
||||
y = self.testset[:, 0]
|
||||
m = self.testset.shape[0]
|
||||
return self._error(x, y, m)
|
||||
|
||||
def _error(self, x:np.ndarray, y:np.ndarray, m:int) -> float:
|
||||
diff = (x.dot(self.theta) - y)
|
||||
return 1/(2*m) * np.sum(diff ** 2)
|
||||
|
||||
def auto_mpg(epoch:int):
|
||||
df = Dataset("datasets\\auto-mpg.csv", "MPG")
|
||||
|
||||
df.to_numbers(["HP"])
|
||||
df.handle_na()
|
||||
df.regularize(excepts=["Cylinders","Year","Origin"])
|
||||
|
||||
lr = LinearRegression(df, learning_rate=0.0001)
|
||||
return lr.learn(epoch)
|
||||
|
||||
def automobile(epoch:int):
|
||||
df = Dataset("datasets\\regression\\automobile.csv", "symboling")
|
||||
|
||||
attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"]
|
||||
df.factorize(attributes_to_modify)
|
||||
df.to_numbers()
|
||||
df.handle_na()
|
||||
df.regularize(excepts=attributes_to_modify)
|
||||
|
||||
lr = LinearRegression(df, learning_rate=0.001)
|
||||
return lr.learn(epoch)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
epoch = 10000
|
||||
skip = - int(epoch * 0.9)
|
||||
err_train, err_test = auto_mpg(epoch)
|
||||
plt.title("Error")
|
||||
plt.xlabel("Time")
|
||||
plt.ylabel("Mean Error")
|
||||
plt.plot(err_train[skip:-1], color="red")
|
||||
plt.plot(err_test[skip:-1], color="blue")
|
||||
plt.show()
|
||||
63
src/plot.py
Normal file
63
src/plot.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import matplotlib.pyplot as plt
|
||||
from typing_extensions import Self
|
||||
|
||||
class Plot:
|
||||
def __init__(self, title:str, labelx:str, labely:str) -> None:
|
||||
plt.title(title)
|
||||
plt.xlabel(labelx)
|
||||
plt.ylabel(labely)
|
||||
plt.ion()
|
||||
plt.show(block=False)
|
||||
|
||||
self.data = dict()
|
||||
|
||||
def wait(self) -> Self:
|
||||
plt.ioff()
|
||||
plt.show()
|
||||
return self
|
||||
|
||||
def scatter(self, label:str, datax:list[float], datay:list[float], color:str) -> Self:
|
||||
plt.scatter(datax, datay, color=color, label=label)
|
||||
return self
|
||||
|
||||
def line(self, label:str, color:str, data:list[float]=[], max_length:int=100) -> Self:
|
||||
line, = plt.plot(data if len(data) > 0 else [0], label=label, color=color)
|
||||
x = [] if len(data) == 0 else [*range(len(data))]
|
||||
|
||||
self.data[label] = (line, data, x, max_length)
|
||||
plt.legend()
|
||||
return self
|
||||
|
||||
def update(self, label:str, newdata:float) -> Self:
|
||||
line, datay, datax, max = self.data[label]
|
||||
|
||||
x = 0 if len(datax) == 0 else datax[-1]
|
||||
datax.append(x+1)
|
||||
datay.append(newdata)
|
||||
|
||||
remove = len(datax) - max
|
||||
if remove > 0:
|
||||
del datax[:remove]
|
||||
del datay[:remove]
|
||||
|
||||
line.set_data((datax, datay))
|
||||
return self
|
||||
|
||||
def update_limits(self) -> Self:
|
||||
if not bool(plt.get_fignums()): raise Exception("plot closed!")
|
||||
limy_top = 0.1
|
||||
limx_top, limx_bot = (0, 100000000000000000)
|
||||
|
||||
for val in self.data:
|
||||
_, datay, datax, _ = self.data[val]
|
||||
limy_top = max(max(datay), limy_top)
|
||||
limx_top = max(max(datax), limx_top)
|
||||
limx_bot = min(min(datax), limx_bot)
|
||||
if limx_top == limx_bot: limx_top += 1
|
||||
|
||||
plt.xlim(limx_bot, limx_top)
|
||||
plt.ylim(0, limy_top)
|
||||
plt.draw()
|
||||
plt.pause(0.0000000001)
|
||||
return self
|
||||
|
||||
Reference in New Issue
Block a user