Fixes for evaluation

2025-10-16 19:20:23 +02:00
parent 9ff117f437
commit 892f91aad7
10 changed files with 492 additions and 31 deletions
--- a/Project_Model/Libs/TransformerUtils/decode_batch.py
+++ b/Project_Model/Libs/TransformerUtils/decode_batch.py
@@ -0,0 +1,16 @@
+import torch
+import Project_Model.Libs.BPE as BPE
+
+def decode_batch(batch: torch.Tensor, tokenizer: BPE.TokeNanoCore ,uknonw_token: int) -> list[str]:
+
+    strings = []
+
+    BATCH, _ = batch.shape
+
+    for i in range(0, BATCH):
+
+        tokens: list[int] = batch.tolist()[i]
+        tokens = list(map(lambda x: uknonw_token if x > tokenizer.vocabulary_size else x, tokens))
+        strings.append(tokenizer.decode(tokens))
+
+    return strings