MLP works
- fixed wrong loss function - fixed rand init - fixed learning rate
This commit is contained in:
@@ -65,13 +65,15 @@ def frogs() -> tuple[Dataset, MLAlgorithm, Any]:
|
|||||||
ds = Dataset(CLASSIFICATION + "frogs.csv", "Species", TargetType.MultiClassification)
|
ds = Dataset(CLASSIFICATION + "frogs.csv", "Species", TargetType.MultiClassification)
|
||||||
ds.remove(["Family", "Genus", "RecordID"])
|
ds.remove(["Family", "Genus", "RecordID"])
|
||||||
ds.factorize(["Species"])
|
ds.factorize(["Species"])
|
||||||
return (ds, MultiLayerPerceptron(ds, [4, 3]), sklearn.neural_network.MLPClassifier([4, 3], 'relu'))
|
size = [8, 5]
|
||||||
|
return (ds, MultiLayerPerceptron(ds, size, 0.1), sklearn.neural_network.MLPClassifier(size, 'relu'))
|
||||||
|
|
||||||
def iris() -> tuple[Dataset, MLAlgorithm, Any]:
|
def iris() -> tuple[Dataset, MLAlgorithm, Any]:
|
||||||
ds = Dataset(CLASSIFICATION + "iris.csv", "Class", TargetType.MultiClassification)
|
ds = Dataset(CLASSIFICATION + "iris.csv", "Class", TargetType.MultiClassification)
|
||||||
ds.factorize(["Class"])
|
ds.factorize(["Class"])
|
||||||
ds.normalize()
|
ds.normalize()
|
||||||
return (ds, MultiLayerPerceptron(ds, [4, 3]), sklearn.neural_network.MLPClassifier([4, 3], 'relu'))
|
size = [4, 3]
|
||||||
|
return (ds, MultiLayerPerceptron(ds, size), sklearn.neural_network.MLPClassifier(size, 'relu'))
|
||||||
|
|
||||||
# ********************
|
# ********************
|
||||||
# Main & random
|
# Main & random
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import numpy as np
|
|||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from learning.ml import MLAlgorithm
|
from learning.ml import MLAlgorithm
|
||||||
from learning.data import Dataset, Data
|
from learning.data import Dataset, Data
|
||||||
|
NOT_ZERO = 1e-15
|
||||||
|
|
||||||
class GradientDescent(MLAlgorithm):
|
class GradientDescent(MLAlgorithm):
|
||||||
theta:np.ndarray
|
theta:np.ndarray
|
||||||
@@ -51,16 +52,15 @@ class LogisticRegression(GradientDescent):
|
|||||||
return 1 / (1 + np.exp(-self.theta.dot(self.with_bias(x).T)))
|
return 1 / (1 + np.exp(-self.theta.dot(self.with_bias(x).T)))
|
||||||
|
|
||||||
def _loss(self, x:np.ndarray, y:np.ndarray, m:int) -> float:
|
def _loss(self, x:np.ndarray, y:np.ndarray, m:int) -> float:
|
||||||
not_zero = 1e-15
|
|
||||||
h0 = self._h0(x)
|
h0 = self._h0(x)
|
||||||
diff = - y*np.log(h0 + not_zero) - (1-y)*np.log(1-h0 + not_zero)
|
diff = - y*np.log(h0 + NOT_ZERO) - (1-y)*np.log(1-h0 + NOT_ZERO)
|
||||||
return 1/m * np.sum(diff)
|
return 1/m * np.sum(diff)
|
||||||
|
|
||||||
class MultiLayerPerceptron(MLAlgorithm):
|
class MultiLayerPerceptron(MLAlgorithm):
|
||||||
layers: list[np.ndarray]
|
layers: list[np.ndarray]
|
||||||
activations: list[np.ndarray]
|
activations: list[np.ndarray]
|
||||||
|
|
||||||
def __init__(self, dataset:Dataset, layers:list[int]) -> None:
|
def __init__(self, dataset:Dataset, layers:list[int], learning_rate:float=0.1) -> None:
|
||||||
super().__init__(dataset)
|
super().__init__(dataset)
|
||||||
input = self._learnset.x.shape[1]
|
input = self._learnset.x.shape[1]
|
||||||
output = self._learnset.y.shape[1]
|
output = self._learnset.y.shape[1]
|
||||||
@@ -71,9 +71,10 @@ class MultiLayerPerceptron(MLAlgorithm):
|
|||||||
|
|
||||||
self.layers = []
|
self.layers = []
|
||||||
self.activations = []
|
self.activations = []
|
||||||
|
self.learning_rate = learning_rate
|
||||||
|
|
||||||
for next in layers:
|
for next in layers:
|
||||||
current = np.random.rand(input + 1, next) # +1 bias
|
current = np.random.rand(input + 1, next) * np.sqrt(2 / input) # +1 bias, sqrt is He init
|
||||||
self.layers.append(current)
|
self.layers.append(current)
|
||||||
input = next
|
input = next
|
||||||
|
|
||||||
@@ -98,7 +99,7 @@ class MultiLayerPerceptron(MLAlgorithm):
|
|||||||
if l > 0:
|
if l > 0:
|
||||||
delta = np.dot(delta, self.layers[l][:-1].T) # ignoring bias
|
delta = np.dot(delta, self.layers[l][:-1].T) # ignoring bias
|
||||||
delta[activation <= 0] = 0 # derivative ReLU
|
delta[activation <= 0] = 0 # derivative ReLU
|
||||||
self.layers[l] -= deltaW
|
self.layers[l] -= deltaW * self.learning_rate
|
||||||
|
|
||||||
return self._predict_loss(self._learnset)
|
return self._predict_loss(self._learnset)
|
||||||
|
|
||||||
@@ -108,9 +109,9 @@ class MultiLayerPerceptron(MLAlgorithm):
|
|||||||
total_sum = np.sum(exp_input, axis=1, keepdims=True)
|
total_sum = np.sum(exp_input, axis=1, keepdims=True)
|
||||||
return exp_input / total_sum
|
return exp_input / total_sum
|
||||||
|
|
||||||
def _predict_loss(self, dataset:Data) -> float:
|
def _predict_loss(self, dataset:Data) -> float: # cross-entropy
|
||||||
diff = self._h0(dataset.x) - dataset.y
|
diff = dataset.y * np.log(self._h0(dataset.x) + NOT_ZERO)
|
||||||
return 1/(2*dataset.size) * np.sum(diff ** 2)
|
return -np.mean(np.sum(diff, axis=1))
|
||||||
|
|
||||||
|
|
||||||
def _get_parameters(self):
|
def _get_parameters(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user