diff --git a/Project_Model/Libs/BPE/Classes/NanoSocratesSpecial.py b/Project_Model/Libs/BPE/Classes/NanoSocratesSpecial.py index e551d6c..8fe81bb 100644 --- a/Project_Model/Libs/BPE/Classes/NanoSocratesSpecial.py +++ b/Project_Model/Libs/BPE/Classes/NanoSocratesSpecial.py @@ -5,34 +5,43 @@ class NanoSocratesSpecial(Encoder): def __init__( self, - initial_vocabulary: list[str] | None = None - ) -> None: + vocabulary_index: int , + vocabulary: dict[str, int] | None = None + ) -> None: + super().__init__() - self.__vocabulary: dict[str, int] = {} + if vocabulary is None: + self.__vocabulary: dict[str, int] = {} + else: + self.__vocabulary: dict[str, int] = vocabulary + self.__reverse_vocabulary: dict[int, str] = {} - self.__current_index = 0 - if initial_vocabulary is None: - return + if vocabulary_index is None: + self.__vocabulary_index = 0 + else: + self.__vocabulary_index = vocabulary_index - for word in initial_vocabulary: + # self.__build_reverse_vocabulary() - CURRENT_INDEX = self.__current_index - self.__vocabulary[word] = CURRENT_INDEX - self.__reverse_vocabulary[CURRENT_INDEX] = word - self.__current_index += 1 - @property - def vocabulary_size(self): - return self.__current_index + def build_reverse_vocabulary(self): + self.__reverse_vocabulary = {v: k for k, v in self.__vocabulary.items()} - def add_special_word(self, word:str): - CURRENT_INDEX = self.__current_index + # @property + # def vocabulary_size(self): + # return self.__current_index + + def set_vocabulary_index(self, vocabulary_index: int): + self.__vocabulary_index = vocabulary_index + + def add_special_word_to_vocabulary(self, word:str): + self.__vocabulary_index = self.__vocabulary_index + 1 + CURRENT_INDEX = self.__vocabulary_index self.__vocabulary[word] = CURRENT_INDEX self.__reverse_vocabulary[CURRENT_INDEX] = word - self.__current_index += 1 def encode(self, word: str) -> list[int]: ID = self.__vocabulary.get(word) @@ -52,3 +61,5 @@ class NanoSocratesSpecial(Encoder): return WORD + def get_reverse_vocabulary(self)-> dict[int, str]: + return self.__reverse_vocabulary