Batcher added

This commit is contained in:
GassiGiuseppe
2025-10-10 20:10:08 +02:00
parent bed9718f27
commit 96610612fe
3 changed files with 119 additions and 54 deletions

View File

@@ -31,7 +31,7 @@ class TokeNanoCore:
def vocabulary_size(self):
BPE_VOC_SIZE = self.__bpe_encoder.vocabulary_size
SPECIAL_VOC_SIZE = self.__special_encoder.vocabulary_size
return BPE_VOC_SIZE + SPECIAL_VOC_SIZE
return BPE_VOC_SIZE + SPECIAL_VOC_SIZE + 1
def encode(self, corpus: str) -> list[int]:
output: list[int] = []