Fixes for evaluation

This commit is contained in:
Christian Risi
2025-10-16 19:20:23 +02:00
parent 9ff117f437
commit 892f91aad7
10 changed files with 492 additions and 31 deletions

View File

@@ -28,6 +28,7 @@ class Batcher:
tokenizer: BPE.TokeNanoCore,
masker: SpannedMasker,
seed: int = 0,
debug = False
) -> None:
# ABSTRACT, TRIPLE
# tasks:
@@ -44,6 +45,7 @@ class Batcher:
self._seed = seed
# self._token_completation = TokenCompletationTransformer(sotl,eos)
self._completation_task_token_truncator = truncate_rdf_list
self.__debug = debug
def batch(self, batch_size) -> Generator[
tuple[
@@ -142,6 +144,7 @@ class Batcher:
return out_X, out_Y, padding_X, padding_Y
def __rdf2txt_transformation(self, batch: pd.DataFrame):
X: list[list[int]]
task_token = self._tokenizer.encode(SpecialToken.RDF_TO_TEXT.value)
out = batch.rename(columns={"RDFs": "X", "Abstract": "Y"})[["X", "Y"]]
out["X"] = [task_token + x for x in out["X"]]