Fixes
- cleaner app.py - fixed pandas Warning - better learning method -power-plant csv fixed
This commit is contained in:
File diff suppressed because it is too large
Load Diff
19
src/app.py
19
src/app.py
@@ -2,16 +2,15 @@ from learning.data import Dataset
|
|||||||
from learning.supervised import LinearRegression
|
from learning.supervised import LinearRegression
|
||||||
from learning.ml import MLRegression
|
from learning.ml import MLRegression
|
||||||
|
|
||||||
def auto_mpg() -> MLRegression:
|
def auto_mpg() -> tuple[int, int, MLRegression]:
|
||||||
df = Dataset("datasets\\auto-mpg.csv", "MPG")
|
df = Dataset("datasets\\auto-mpg.csv", "MPG")
|
||||||
|
|
||||||
df.to_numbers(["HP"])
|
df.to_numbers(["HP"])
|
||||||
df.handle_na()
|
df.handle_na()
|
||||||
df.regularize(excepts=["Cylinders","Year","Origin"])
|
df.regularize(excepts=["Cylinders","Year","Origin"])
|
||||||
|
return (5000, 1000, LinearRegression(df, learning_rate=0.0001))
|
||||||
|
|
||||||
return LinearRegression(df, learning_rate=0.0001)
|
def automobile() -> tuple[int, int, MLRegression]:
|
||||||
|
|
||||||
def automobile() -> MLRegression:
|
|
||||||
df = Dataset("datasets\\regression\\automobile.csv", "symboling")
|
df = Dataset("datasets\\regression\\automobile.csv", "symboling")
|
||||||
|
|
||||||
attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"]
|
attributes_to_modify = ["fuel-system", "engine-type", "drive-wheels", "body-style", "make", "engine-location", "aspiration", "fuel-type", "num-of-cylinders", "num-of-doors"]
|
||||||
@@ -19,14 +18,16 @@ def automobile() -> MLRegression:
|
|||||||
df.to_numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
|
df.to_numbers(["normalized-losses", "bore", "stroke", "horsepower", "peak-rpm", "price"])
|
||||||
df.handle_na()
|
df.handle_na()
|
||||||
df.regularize(excepts=attributes_to_modify)
|
df.regularize(excepts=attributes_to_modify)
|
||||||
|
return (5000, 1000, LinearRegression(df, learning_rate=0.002))
|
||||||
|
|
||||||
return LinearRegression(df, learning_rate=0.001)
|
def power_plant() -> tuple[int, int, MLRegression]:
|
||||||
|
df = Dataset("datasets\\regression\\power-plant.csv", "energy-output")
|
||||||
|
df.regularize()
|
||||||
|
return (1000, 80, LinearRegression(df, learning_rate=0.1))
|
||||||
|
|
||||||
|
epoch, skip, ml = automobile()
|
||||||
epoch = 15000
|
|
||||||
ml = automobile()
|
|
||||||
ml.learn(epoch)
|
ml.learn(epoch)
|
||||||
ml.plot()
|
ml.plot(skip=skip)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
for _ in range(0, epoch):
|
for _ in range(0, epoch):
|
||||||
|
|||||||
@@ -22,9 +22,10 @@ class Dataset:
|
|||||||
excepts.append("Bias")
|
excepts.append("Bias")
|
||||||
for col in self.data:
|
for col in self.data:
|
||||||
if col not in excepts:
|
if col not in excepts:
|
||||||
datacol = self.data[col]
|
index = self.data.columns.get_loc(col)
|
||||||
|
datacol = self.data.pop(col)
|
||||||
datacol = (datacol - datacol.mean()) / datacol.std()
|
datacol = (datacol - datacol.mean()) / datacol.std()
|
||||||
self.data[col] = datacol
|
self.data.insert(index, col, datacol)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def factorize(self, columns:list[str]=[]) -> Self:
|
def factorize(self, columns:list[str]=[]) -> Self:
|
||||||
|
|||||||
@@ -29,15 +29,25 @@ class MLAlgorithm(ABC):
|
|||||||
return (x, y, m)
|
return (x, y, m)
|
||||||
|
|
||||||
def learn(self, times:int) -> tuple[list, list]:
|
def learn(self, times:int) -> tuple[list, list]:
|
||||||
|
_, train, test = self.learn_until(times)
|
||||||
|
return (train, test)
|
||||||
|
|
||||||
|
def learn_until(self, max_iter:int=1000000, delta:float=0.0) -> tuple[int, list, list]:
|
||||||
train = []
|
train = []
|
||||||
test = []
|
test = []
|
||||||
for _ in range(0, max(1, times)):
|
prev = None
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
while count < max_iter and (prev == None or prev - train[-1] > delta):
|
||||||
|
count += 1
|
||||||
|
prev = train[-1] if len(train) > 0 else None
|
||||||
|
|
||||||
train.append(self.learning_step())
|
train.append(self.learning_step())
|
||||||
test.append(self.test_error())
|
test.append(self.test_error())
|
||||||
|
|
||||||
self.train_error = train
|
self.train_error = train
|
||||||
self.test_error = test
|
self.test_error = test
|
||||||
return (train, test)
|
return (count, train, test)
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def learning_step(self) -> float:
|
def learning_step(self) -> float:
|
||||||
@@ -55,6 +65,6 @@ class MLAlgorithm(ABC):
|
|||||||
class MLRegression(MLAlgorithm):
|
class MLRegression(MLAlgorithm):
|
||||||
def plot(self, skip:int=1000) -> None:
|
def plot(self, skip:int=1000) -> None:
|
||||||
plot = Plot("Error", "Time", "Mean Error")
|
plot = Plot("Error", "Time", "Mean Error")
|
||||||
plot.line("training", "red", data=self.train_error[skip:])
|
plot.line("training", "blue", data=self.train_error[skip:])
|
||||||
plot.line("test", "blue", data=self.test_error[skip:])
|
plot.line("test", "red", data=self.test_error[skip:])
|
||||||
plot.wait()
|
plot.wait()
|
||||||
|
|||||||
Reference in New Issue
Block a user