From 80fd7fd600d26f54a40365db6749e5dbd64fa7fa Mon Sep 17 00:00:00 2001 From: GassiGiuseppe Date: Sun, 12 Oct 2025 22:59:07 +0200 Subject: [PATCH] evaluator WIP --- Project_Model/Libs/Evaluation/evaluation.py | 39 +++++++++++++++++++-- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/Project_Model/Libs/Evaluation/evaluation.py b/Project_Model/Libs/Evaluation/evaluation.py index e2abae2..73d1483 100644 --- a/Project_Model/Libs/Evaluation/evaluation.py +++ b/Project_Model/Libs/Evaluation/evaluation.py @@ -1,13 +1,17 @@ import evaluate - +from sklearn.metrics import accuracy_score, precision_recall_fscore_support class Evaluator(): def __init__(self) -> None: # txt based evaluator self.__rouge = evaluate.load("rouge") self.__rouge_types = ["rougeLsum", "rouge1", "rouge2"] #rougeLsum will work bad because it expect that each sentence are divided with /n - self._bleu = evaluate.load("bleu") self._meteor = evaluate.load("meteor") + # token based evaluator + self.__acc_m = evaluate.load("accuracy") + self.__prec_m = evaluate.load("precision") + self.__rec_m = evaluate.load("recall") + self.__f1_m = evaluate.load("f1") def rdf2txt_rouge_evaluation(self, preds: list[str], refs: list[str]): @@ -32,6 +36,35 @@ class Evaluator(): return float(res["meteor"]) - def txt2rdf_precision_evaluation(self,preds: list[str], refs: list[str]): + def __my_accuracy(self,preds: list[list[int]], refs: list[list[int]]): + # it is done on token sequence not single token + total = len(preds) + correct = 0 + for p, r in zip(preds, refs): + correct += int(p == r) + return correct / total + + def __accuracy(self, preds, refs): + return accuracy_score(preds,refs) + def __clean_batch_by_pad(self, preds: list[list[int]], refs: list[list[int]]): + output_preds = [] + output_refs = [] + #TODO + pad_token: int = 7000 # percolate + for pred, ref in zip(preds,refs): + try: + i = ref.index(pad_token) # first time pad token appears + except ValueError: + i = len(ref) + output_preds.append(pred[:i]) + output_refs.append(ref[:i]) + return output_preds,output_refs + + def __precision_recall(self, preds: list[list[int]], refs: list[list[int]]): + #TODO + p, r, f1, _ = precision_recall_fscore_support( + preds, refs, average="binary", zero_division=0 + ) #### watch later + return {"precision": float(p), "recall": float(r), "f1": float(f1)} \ No newline at end of file