Project struct

This commit is contained in:
2024-04-18 18:53:57 +02:00
parent e362cff0c7
commit 18e390d34b
6 changed files with 192 additions and 87 deletions

66
src/learning/data.py Normal file
View File

@@ -0,0 +1,66 @@
import pandas as pd
import numpy as np
from typing_extensions import Self
class Dataset:
def __init__(self, csv:str, target:str) -> None:
data = pd.read_csv(csv)
# move target to the start
col_target = data.pop(target)
data.insert(0, target, col_target)
data.insert(1, "Bias", 1.0)
self.original = data
self.data = data
self.target = target
self.classification = (data[target].dtype == object)
def regularize(self, excepts:list=[]) -> Self:
excepts.append(self.target)
excepts.append("Bias")
for col in self.data:
if col not in excepts:
dt = self.data[col]
self.data[col] = (dt - dt.mean()) / dt.std()
return self
def factorize(self, columns:list=[]) -> Self:
data = self.data
for col in columns:
data[col] = pd.factorize(data[col])[0]
return self
def to_numbers(self, columns:list=[]) -> Self:
data = self.data
for col in self.data.columns:
if data[col].dtype == object:
data[col] = pd.to_numeric(data[col], errors='coerce')
return self
def handle_na(self) -> Self:
self.data = self.data.dropna()
return self
def shuffle(self) -> Self:
self.data = self.data.sample(frac=1)
return self
def as_ndarray(self) -> np.ndarray:
return self.data.to_numpy()
def get_index(self, column:str) -> int:
return self.data.columns.get_loc(column)
class PrincipalComponentAnalisys:
def __init__(self, data:np.ndarray) -> None:
self.data = data
def reduce(self, total:int=0, threshold:float=1) -> Self:
columns = self.data.shape[1]
if total > columns or total <= 0:
total = columns
if threshold <= 0 or threshold > 1:
threshold = 1