Pipeline fix and added a util to decode
This commit is contained in:
@@ -189,7 +189,7 @@ class NanoSocratesBPE(Encoder):
|
||||
token_stack.appendleft(right_token)
|
||||
token_stack.appendleft(left_token)
|
||||
|
||||
return UTF_8_STRING_ARR.decode("utf-8")
|
||||
return UTF_8_STRING_ARR.decode("utf-8", errors="ignore")
|
||||
|
||||
def __token_decode(self, token_id: int) -> tuple[int, int]:
|
||||
|
||||
|
||||
Reference in New Issue
Block a user