from .Encoder import Encoder from ..Errors import OutOfDictionaryException class NanoSocratesSpecial(Encoder): def __init__( self, initial_vocabulary: list[str] | None = None ) -> None: super().__init__() self.__vocabulary: dict[str, int] = {} self.__reverse_vocabulary: dict[int, str] = {} self.__current_index = 0 if initial_vocabulary is None: return for word in initial_vocabulary: CURRENT_INDEX = self.__current_index self.__vocabulary[word] = CURRENT_INDEX self.__reverse_vocabulary[CURRENT_INDEX] = word self.__current_index += 1 @property def vocabulary_size(self): return self.__current_index def add_special_word(self, word:str): CURRENT_INDEX = self.__current_index self.__vocabulary[word] = CURRENT_INDEX self.__reverse_vocabulary[CURRENT_INDEX] = word self.__current_index += 1 def encode(self, word: str) -> list[int]: ID = self.__vocabulary.get(word) if ID is None: raise OutOfDictionaryException() return [ID] def decode(self, token_id: int) -> str: ID = token_id WORD = self.__reverse_vocabulary.get(ID) if WORD is None: raise OutOfDictionaryException() return WORD