Fixes for evaluation

This commit is contained in:
Christian Risi
2025-10-16 19:20:23 +02:00
parent 9ff117f437
commit 892f91aad7
10 changed files with 492 additions and 31 deletions

View File

@@ -0,0 +1,16 @@
import torch
import Project_Model.Libs.BPE as BPE
def decode_batch(batch: torch.Tensor, tokenizer: BPE.TokeNanoCore ,uknonw_token: int) -> list[str]:
strings = []
BATCH, _ = batch.shape
for i in range(0, BATCH):
tokens: list[int] = batch.tolist()[i]
tokens = list(map(lambda x: uknonw_token if x > tokenizer.vocabulary_size else x, tokens))
strings.append(tokenizer.decode(tokens))
return strings