Pipeline fix and added a util to decode

This commit is contained in:
Christian Risi
2025-10-09 13:24:48 +02:00
parent f3b83eda3d
commit aac7675b30
7 changed files with 78 additions and 29 deletions

View File

@@ -189,7 +189,7 @@ class NanoSocratesBPE(Encoder):
token_stack.appendleft(right_token)
token_stack.appendleft(left_token)
return UTF_8_STRING_ARR.decode("utf-8")
return UTF_8_STRING_ARR.decode("utf-8", errors="ignore")
def __token_decode(self, token_id: int) -> tuple[int, int]: