From 7dedbc481b751347681a80e729f5081eb86aca3b Mon Sep 17 00:00:00 2001 From: GassiGiuseppe Date: Sun, 12 Oct 2025 18:18:20 +0200 Subject: [PATCH] evaluator WIP --- Project_Model/Libs/Evaluation/evaluation.py | 37 +++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 Project_Model/Libs/Evaluation/evaluation.py diff --git a/Project_Model/Libs/Evaluation/evaluation.py b/Project_Model/Libs/Evaluation/evaluation.py new file mode 100644 index 0000000..e2abae2 --- /dev/null +++ b/Project_Model/Libs/Evaluation/evaluation.py @@ -0,0 +1,37 @@ +import evaluate + +class Evaluator(): + def __init__(self) -> None: + # txt based evaluator + self.__rouge = evaluate.load("rouge") + self.__rouge_types = ["rougeLsum", "rouge1", "rouge2"] #rougeLsum will work bad because it expect that each sentence are divided with /n + + self._bleu = evaluate.load("bleu") + self._meteor = evaluate.load("meteor") + + def rdf2txt_rouge_evaluation(self, preds: list[str], refs: list[str]): + + results = self.__rouge.compute( + predictions=preds, references=refs, + rouge_types=self.__rouge_types, + use_stemmer=True, + use_aggregator=True #F1 + ) + + return {k: float(results[k]) for k in self.__rouge_types} + + def rdf2txt_bleu_evaluation(self, preds: list[str], refs: list[str]) -> float: + # sacreBLEU via evaluate; expects references as list-of-lists + # each prediction can be evaluated against a list of references, hence [[ref]] + results = self._bleu.compute(predictions=preds, references=[[r] for r in refs]) + return float(results["bleu"]) # (native sacreBLEU scale) + + def rdf2txt_meteor_evaluation(self, preds: list[str], refs: list[str]) -> float: + # as bleu + res = self._meteor.compute(predictions=preds, references=[[r] for r in refs]) + return float(res["meteor"]) + + + def txt2rdf_precision_evaluation(self,preds: list[str], refs: list[str]): + +