updated tokenano to be more easy to read
This commit is contained in:
parent
bbadd4c521
commit
845c63dbef
@ -31,11 +31,15 @@ class TokeNanoCore:
|
||||
for piece, token_type in self.__splitter.split_text(corpus):
|
||||
|
||||
if token_type == TokenType.SPECIAL:
|
||||
output.extend(self.__special_encoder.encode(piece))
|
||||
ENCODED_PIECE = self.__special_encoder.encode(piece)
|
||||
output.extend(ENCODED_PIECE)
|
||||
continue
|
||||
|
||||
# slow but clear
|
||||
if token_type == TokenType.BPE:
|
||||
output.extend(self.__bpe_encoder.encode(piece))
|
||||
ENCODED_PIECE = self.__bpe_encoder.encode(piece)
|
||||
output.extend(ENCODED_PIECE)
|
||||
continue
|
||||
|
||||
return output
|
||||
|
||||
@ -46,11 +50,13 @@ class TokeNanoCore:
|
||||
if token_type == TokenType.SPECIAL:
|
||||
output_str += self.__special_encoder.decode(
|
||||
token
|
||||
) # it accept an integer
|
||||
)
|
||||
continue
|
||||
|
||||
# slow but clear
|
||||
if token_type == TokenType.BPE:
|
||||
output_str += self.__bpe_encoder.decode(
|
||||
token
|
||||
) # it accept a list of integer
|
||||
)
|
||||
continue
|
||||
return output_str
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user