evaluator WIP

This commit is contained in:
GassiGiuseppe 2025-10-12 18:18:20 +02:00
parent 2ccec9efb8
commit 7dedbc481b

View File

@ -0,0 +1,37 @@
import evaluate
class Evaluator():
def __init__(self) -> None:
# txt based evaluator
self.__rouge = evaluate.load("rouge")
self.__rouge_types = ["rougeLsum", "rouge1", "rouge2"] #rougeLsum will work bad because it expect that each sentence are divided with /n
self._bleu = evaluate.load("bleu")
self._meteor = evaluate.load("meteor")
def rdf2txt_rouge_evaluation(self, preds: list[str], refs: list[str]):
results = self.__rouge.compute(
predictions=preds, references=refs,
rouge_types=self.__rouge_types,
use_stemmer=True,
use_aggregator=True #F1
)
return {k: float(results[k]) for k in self.__rouge_types}
def rdf2txt_bleu_evaluation(self, preds: list[str], refs: list[str]) -> float:
# sacreBLEU via evaluate; expects references as list-of-lists
# each prediction can be evaluated against a list of references, hence [[ref]]
results = self._bleu.compute(predictions=preds, references=[[r] for r in refs])
return float(results["bleu"]) # (native sacreBLEU scale)
def rdf2txt_meteor_evaluation(self, preds: list[str], refs: list[str]) -> float:
# as bleu
res = self._meteor.compute(predictions=preds, references=[[r] for r in refs])
return float(res["meteor"])
def txt2rdf_precision_evaluation(self,preds: list[str], refs: list[str]):