Updated NanoSocratesSpecial to work with TokeNano

This commit is contained in:
GassiGiuseppe 2025-10-03 00:59:15 +02:00
parent 7c935d2700
commit a5b8692a77

View File

@ -5,34 +5,43 @@ class NanoSocratesSpecial(Encoder):
def __init__( def __init__(
self, self,
initial_vocabulary: list[str] | None = None vocabulary_index: int ,
) -> None: vocabulary: dict[str, int] | None = None
) -> None:
super().__init__() super().__init__()
self.__vocabulary: dict[str, int] = {} if vocabulary is None:
self.__vocabulary: dict[str, int] = {}
else:
self.__vocabulary: dict[str, int] = vocabulary
self.__reverse_vocabulary: dict[int, str] = {} self.__reverse_vocabulary: dict[int, str] = {}
self.__current_index = 0
if initial_vocabulary is None: if vocabulary_index is None:
return self.__vocabulary_index = 0
else:
self.__vocabulary_index = vocabulary_index
for word in initial_vocabulary: # self.__build_reverse_vocabulary()
CURRENT_INDEX = self.__current_index
self.__vocabulary[word] = CURRENT_INDEX
self.__reverse_vocabulary[CURRENT_INDEX] = word
self.__current_index += 1
@property def build_reverse_vocabulary(self):
def vocabulary_size(self): self.__reverse_vocabulary = {v: k for k, v in self.__vocabulary.items()}
return self.__current_index
def add_special_word(self, word:str): # @property
CURRENT_INDEX = self.__current_index # def vocabulary_size(self):
# return self.__current_index
def set_vocabulary_index(self, vocabulary_index: int):
self.__vocabulary_index = vocabulary_index
def add_special_word_to_vocabulary(self, word:str):
self.__vocabulary_index = self.__vocabulary_index + 1
CURRENT_INDEX = self.__vocabulary_index
self.__vocabulary[word] = CURRENT_INDEX self.__vocabulary[word] = CURRENT_INDEX
self.__reverse_vocabulary[CURRENT_INDEX] = word self.__reverse_vocabulary[CURRENT_INDEX] = word
self.__current_index += 1
def encode(self, word: str) -> list[int]: def encode(self, word: str) -> list[int]:
ID = self.__vocabulary.get(word) ID = self.__vocabulary.get(word)
@ -52,3 +61,5 @@ class NanoSocratesSpecial(Encoder):
return WORD return WORD
def get_reverse_vocabulary(self)-> dict[int, str]:
return self.__reverse_vocabulary