evaluator WIP

This commit is contained in:
GassiGiuseppe 2025-10-12 22:59:07 +02:00
parent 972a73758d
commit 80fd7fd600

View File

@ -1,13 +1,17 @@
import evaluate import evaluate
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
class Evaluator(): class Evaluator():
def __init__(self) -> None: def __init__(self) -> None:
# txt based evaluator # txt based evaluator
self.__rouge = evaluate.load("rouge") self.__rouge = evaluate.load("rouge")
self.__rouge_types = ["rougeLsum", "rouge1", "rouge2"] #rougeLsum will work bad because it expect that each sentence are divided with /n self.__rouge_types = ["rougeLsum", "rouge1", "rouge2"] #rougeLsum will work bad because it expect that each sentence are divided with /n
self._bleu = evaluate.load("bleu") self._bleu = evaluate.load("bleu")
self._meteor = evaluate.load("meteor") self._meteor = evaluate.load("meteor")
# token based evaluator
self.__acc_m = evaluate.load("accuracy")
self.__prec_m = evaluate.load("precision")
self.__rec_m = evaluate.load("recall")
self.__f1_m = evaluate.load("f1")
def rdf2txt_rouge_evaluation(self, preds: list[str], refs: list[str]): def rdf2txt_rouge_evaluation(self, preds: list[str], refs: list[str]):
@ -32,6 +36,35 @@ class Evaluator():
return float(res["meteor"]) return float(res["meteor"])
def txt2rdf_precision_evaluation(self,preds: list[str], refs: list[str]): def __my_accuracy(self,preds: list[list[int]], refs: list[list[int]]):
# it is done on token sequence not single token
total = len(preds)
correct = 0
for p, r in zip(preds, refs):
correct += int(p == r)
return correct / total
def __accuracy(self, preds, refs):
return accuracy_score(preds,refs)
def __clean_batch_by_pad(self, preds: list[list[int]], refs: list[list[int]]):
output_preds = []
output_refs = []
#TODO
pad_token: int = 7000 # percolate
for pred, ref in zip(preds,refs):
try:
i = ref.index(pad_token) # first time pad token appears
except ValueError:
i = len(ref)
output_preds.append(pred[:i])
output_refs.append(ref[:i])
return output_preds,output_refs
def __precision_recall(self, preds: list[list[int]], refs: list[list[int]]):
#TODO
p, r, f1, _ = precision_recall_fscore_support(
preds, refs, average="binary", zero_division=0
) #### watch later
return {"precision": float(p), "recall": float(r), "f1": float(f1)}