NanoSocrates/Project_Model/Libs/TransformerUtils/decode_batch.py

import torch
import Project_Model.Libs.BPE as BPE

def decode_batch(batch: torch.Tensor, tokenizer: BPE.TokeNanoCore ,uknonw_token: int) -> list[str]:

    strings = []

    BATCH, _ = batch.shape

    for i in range(0, BATCH):

        tokens: list[int] = batch.tolist()[i]
        tokens = list(map(lambda x: uknonw_token if x > tokenizer.vocabulary_size else x, tokens))
        strings.append(tokenizer.decode(tokens))

    return strings
Fixes for evaluation 2025-10-16 19:20:23 +02:00			`import torch`
			`import Project_Model.Libs.BPE as BPE`

			`def decode_batch(batch: torch.Tensor, tokenizer: BPE.TokeNanoCore ,uknonw_token: int) -> list[str]:`

			`strings = []`

			`BATCH, _ = batch.shape`

			`for i in range(0, BATCH):`

			`tokens: list[int] = batch.tolist()[i]`
			`tokens = list(map(lambda x: uknonw_token if x > tokenizer.vocabulary_size else x, tokens))`
			`strings.append(tokenizer.decode(tokens))`

			`return strings`