End of ML
- fixes for clustering - fixes in general
This commit is contained in:
@@ -1,5 +1,8 @@
|
||||
import numpy as np
|
||||
|
||||
from learning.data import ConfusionMatrix, Data, Dataset, TargetType
|
||||
from sklearn.metrics import silhouette_score, r2_score
|
||||
|
||||
NOT_ZERO = 1e-15
|
||||
LEAKY_RELU = 0.2
|
||||
|
||||
@@ -45,20 +48,34 @@ def cross_entropy_loss(h0:np.ndarray, y:np.ndarray) -> float:
|
||||
# Randoms
|
||||
# **********
|
||||
|
||||
def pearson(h0:np.ndarray, y:np.ndarray) -> float:
|
||||
diff1 = h0 - h0.mean()
|
||||
diff2 = y - y.mean()
|
||||
num = np.sum(diff1 * diff2)
|
||||
den = np.sqrt(np.sum(diff1**2)) * np.sqrt(np.sum(diff2**2))
|
||||
return num / den
|
||||
|
||||
def r_squared(h0:np.ndarray, y:np.ndarray) -> float:
|
||||
y_mean = np.mean(y)
|
||||
ss_resid = np.sum((y - h0) ** 2)
|
||||
ss_total = np.sum((y - y_mean) ** 2)
|
||||
return 1 - (ss_resid / ss_total)
|
||||
|
||||
def with_bias(x:np.ndarray) -> np.ndarray:
|
||||
shape = (x.shape[0], 1) if len(x.shape) != 1 else (1,)
|
||||
ones = np.ones(shape)
|
||||
return np.hstack([ones, x])
|
||||
|
||||
def print_metrics(target:TargetType, dataset:Data, h0:np.ndarray) -> None:
|
||||
if target == TargetType.Regression:
|
||||
print(f"R^2 : {r2_score(dataset.y, h0):0.5f}")
|
||||
print(f"Pearson : {np.corrcoef(dataset.y, h0)[0, 1]:0.5f}")
|
||||
elif target != TargetType.NoTarget:
|
||||
if h0.ndim == 1: h0 = np.where(h0 > 0.5, 1, 0)
|
||||
ConfusionMatrix(dataset.y, h0).print()
|
||||
else:
|
||||
print(f"Silhouette : {silhouette_score(dataset.x, h0):0.5f}")
|
||||
print("========================")
|
||||
|
||||
def print_silhouette_weka(ds:Dataset, file_weka:str):
|
||||
test, _, _, _ = ds.get_dataset()[2].as_tuple()
|
||||
test = np.round(test, 6)
|
||||
|
||||
weka = Dataset(file_weka, "", TargetType.NoTarget)
|
||||
weka.factorize(["cluster"])
|
||||
|
||||
weka, _, _, _ = weka.get_dataset(test_frac=0, valid_frac=0)[0].as_tuple()
|
||||
weka_x, weka_y = weka[:, :-1], weka[:, -1:]
|
||||
|
||||
bau = [np.where((weka_x == x).all(axis=1))[0][0] for x in test]
|
||||
weka_x, weka_y = weka_x[bau], weka_y[bau].ravel()
|
||||
|
||||
score = silhouette_score(weka_x, weka_y)
|
||||
print(score)
|
||||
|
||||
Reference in New Issue
Block a user