from .Encoder import Encoder from ..Errors import OutOfDictionaryException class NanoSocratesSpecial(Encoder): def __init__( self, vocabulary_index: int , vocabulary: dict[str, int] | None = None ) -> None: super().__init__() if vocabulary is None: self.__vocabulary: dict[str, int] = {} else: self.__vocabulary: dict[str, int] = vocabulary self.__reverse_vocabulary: dict[int, str] = {} if vocabulary_index is None: self.__vocabulary_index = 0 else: self.__vocabulary_index = vocabulary_index # self.__build_reverse_vocabulary() def build_reverse_vocabulary(self): self.__reverse_vocabulary = {v: k for k, v in self.__vocabulary.items()} # @property # def vocabulary_size(self): # return self.__current_index def set_vocabulary_index(self, vocabulary_index: int): self.__vocabulary_index = vocabulary_index def add_special_word_to_vocabulary(self, word:str): self.__vocabulary_index = self.__vocabulary_index + 1 CURRENT_INDEX = self.__vocabulary_index self.__vocabulary[word] = CURRENT_INDEX self.__reverse_vocabulary[CURRENT_INDEX] = word def encode(self, word: str) -> list[int]: ID = self.__vocabulary.get(word) if ID is None: raise OutOfDictionaryException() return [ID] def decode(self, token_id: int) -> str: ID = token_id WORD = self.__reverse_vocabulary.get(ID) if WORD is None: raise OutOfDictionaryException() return WORD def get_reverse_vocabulary(self)-> dict[int, str]: return self.__reverse_vocabulary