diff --git a/Project_Model/Libs/BPE/Classes/TokeNanoCore.py b/Project_Model/Libs/BPE/Classes/TokeNanoCore.py index f726a95..3abaeb2 100644 --- a/Project_Model/Libs/BPE/Classes/TokeNanoCore.py +++ b/Project_Model/Libs/BPE/Classes/TokeNanoCore.py @@ -31,11 +31,15 @@ class TokeNanoCore: for piece, token_type in self.__splitter.split_text(corpus): if token_type == TokenType.SPECIAL: - output.extend(self.__special_encoder.encode(piece)) + ENCODED_PIECE = self.__special_encoder.encode(piece) + output.extend(ENCODED_PIECE) + continue # slow but clear if token_type == TokenType.BPE: - output.extend(self.__bpe_encoder.encode(piece)) + ENCODED_PIECE = self.__bpe_encoder.encode(piece) + output.extend(ENCODED_PIECE) + continue return output @@ -46,11 +50,13 @@ class TokeNanoCore: if token_type == TokenType.SPECIAL: output_str += self.__special_encoder.decode( token - ) # it accept an integer + ) + continue # slow but clear if token_type == TokenType.BPE: output_str += self.__bpe_encoder.decode( token - ) # it accept a list of integer + ) + continue return output_str