Added a way to see vocabulary size

This commit is contained in:
Christian Risi
2025-10-04 19:42:29 +02:00
parent 03cdca1f00
commit da0bdf703b
2 changed files with 11 additions and 1 deletions

View File

@@ -29,6 +29,10 @@ class NanoSocratesSpecial(Encoder):
VOC_LENGTH = len(self.__vocabulary)
return BPE_OFFSET + VOC_LENGTH + 1
@property
def vocabulary_size(self) -> int:
return len(self.vocabulary)
@property
def vocabulary(self) -> dict[str, int]:
return self.__vocabulary