updated tokenano to be more easy to read

This commit is contained in:
GassiGiuseppe 2025-10-04 19:01:21 +02:00
parent bbadd4c521
commit 845c63dbef

View File

@ -31,11 +31,15 @@ class TokeNanoCore:
for piece, token_type in self.__splitter.split_text(corpus): for piece, token_type in self.__splitter.split_text(corpus):
if token_type == TokenType.SPECIAL: if token_type == TokenType.SPECIAL:
output.extend(self.__special_encoder.encode(piece)) ENCODED_PIECE = self.__special_encoder.encode(piece)
output.extend(ENCODED_PIECE)
continue
# slow but clear # slow but clear
if token_type == TokenType.BPE: if token_type == TokenType.BPE:
output.extend(self.__bpe_encoder.encode(piece)) ENCODED_PIECE = self.__bpe_encoder.encode(piece)
output.extend(ENCODED_PIECE)
continue
return output return output
@ -46,11 +50,13 @@ class TokeNanoCore:
if token_type == TokenType.SPECIAL: if token_type == TokenType.SPECIAL:
output_str += self.__special_encoder.decode( output_str += self.__special_encoder.decode(
token token
) # it accept an integer )
continue
# slow but clear # slow but clear
if token_type == TokenType.BPE: if token_type == TokenType.BPE:
output_str += self.__bpe_encoder.decode( output_str += self.__bpe_encoder.decode(
token token
) # it accept a list of integer )
continue
return output_str return output_str