MLP works

- fixed wrong loss function
- fixed rand init
- fixed learning rate
This commit is contained in:
2024-08-12 18:47:29 +02:00
parent a992539116
commit 7739878a2c
2 changed files with 13 additions and 10 deletions

View File

@@ -65,13 +65,15 @@ def frogs() -> tuple[Dataset, MLAlgorithm, Any]:
ds = Dataset(CLASSIFICATION + "frogs.csv", "Species", TargetType.MultiClassification) ds = Dataset(CLASSIFICATION + "frogs.csv", "Species", TargetType.MultiClassification)
ds.remove(["Family", "Genus", "RecordID"]) ds.remove(["Family", "Genus", "RecordID"])
ds.factorize(["Species"]) ds.factorize(["Species"])
return (ds, MultiLayerPerceptron(ds, [4, 3]), sklearn.neural_network.MLPClassifier([4, 3], 'relu')) size = [8, 5]
return (ds, MultiLayerPerceptron(ds, size, 0.1), sklearn.neural_network.MLPClassifier(size, 'relu'))
def iris() -> tuple[Dataset, MLAlgorithm, Any]: def iris() -> tuple[Dataset, MLAlgorithm, Any]:
ds = Dataset(CLASSIFICATION + "iris.csv", "Class", TargetType.MultiClassification) ds = Dataset(CLASSIFICATION + "iris.csv", "Class", TargetType.MultiClassification)
ds.factorize(["Class"]) ds.factorize(["Class"])
ds.normalize() ds.normalize()
return (ds, MultiLayerPerceptron(ds, [4, 3]), sklearn.neural_network.MLPClassifier([4, 3], 'relu')) size = [4, 3]
return (ds, MultiLayerPerceptron(ds, size), sklearn.neural_network.MLPClassifier(size, 'relu'))
# ******************** # ********************
# Main & random # Main & random

View File

@@ -4,6 +4,7 @@ import numpy as np
from abc import abstractmethod from abc import abstractmethod
from learning.ml import MLAlgorithm from learning.ml import MLAlgorithm
from learning.data import Dataset, Data from learning.data import Dataset, Data
NOT_ZERO = 1e-15
class GradientDescent(MLAlgorithm): class GradientDescent(MLAlgorithm):
theta:np.ndarray theta:np.ndarray
@@ -51,16 +52,15 @@ class LogisticRegression(GradientDescent):
return 1 / (1 + np.exp(-self.theta.dot(self.with_bias(x).T))) return 1 / (1 + np.exp(-self.theta.dot(self.with_bias(x).T)))
def _loss(self, x:np.ndarray, y:np.ndarray, m:int) -> float: def _loss(self, x:np.ndarray, y:np.ndarray, m:int) -> float:
not_zero = 1e-15
h0 = self._h0(x) h0 = self._h0(x)
diff = - y*np.log(h0 + not_zero) - (1-y)*np.log(1-h0 + not_zero) diff = - y*np.log(h0 + NOT_ZERO) - (1-y)*np.log(1-h0 + NOT_ZERO)
return 1/m * np.sum(diff) return 1/m * np.sum(diff)
class MultiLayerPerceptron(MLAlgorithm): class MultiLayerPerceptron(MLAlgorithm):
layers: list[np.ndarray] layers: list[np.ndarray]
activations: list[np.ndarray] activations: list[np.ndarray]
def __init__(self, dataset:Dataset, layers:list[int]) -> None: def __init__(self, dataset:Dataset, layers:list[int], learning_rate:float=0.1) -> None:
super().__init__(dataset) super().__init__(dataset)
input = self._learnset.x.shape[1] input = self._learnset.x.shape[1]
output = self._learnset.y.shape[1] output = self._learnset.y.shape[1]
@@ -71,9 +71,10 @@ class MultiLayerPerceptron(MLAlgorithm):
self.layers = [] self.layers = []
self.activations = [] self.activations = []
self.learning_rate = learning_rate
for next in layers: for next in layers:
current = np.random.rand(input + 1, next) # +1 bias current = np.random.rand(input + 1, next) * np.sqrt(2 / input) # +1 bias, sqrt is He init
self.layers.append(current) self.layers.append(current)
input = next input = next
@@ -98,7 +99,7 @@ class MultiLayerPerceptron(MLAlgorithm):
if l > 0: if l > 0:
delta = np.dot(delta, self.layers[l][:-1].T) # ignoring bias delta = np.dot(delta, self.layers[l][:-1].T) # ignoring bias
delta[activation <= 0] = 0 # derivative ReLU delta[activation <= 0] = 0 # derivative ReLU
self.layers[l] -= deltaW self.layers[l] -= deltaW * self.learning_rate
return self._predict_loss(self._learnset) return self._predict_loss(self._learnset)
@@ -108,9 +109,9 @@ class MultiLayerPerceptron(MLAlgorithm):
total_sum = np.sum(exp_input, axis=1, keepdims=True) total_sum = np.sum(exp_input, axis=1, keepdims=True)
return exp_input / total_sum return exp_input / total_sum
def _predict_loss(self, dataset:Data) -> float: def _predict_loss(self, dataset:Data) -> float: # cross-entropy
diff = self._h0(dataset.x) - dataset.y diff = dataset.y * np.log(self._h0(dataset.x) + NOT_ZERO)
return 1/(2*dataset.size) * np.sum(diff ** 2) return -np.mean(np.sum(diff, axis=1))
def _get_parameters(self): def _get_parameters(self):