Fixed illegal tokens being added in target output

This commit is contained in:
Christian Risi 2025-10-06 16:16:47 +02:00
parent d3bba9b944
commit e93710af08

View File

@ -90,6 +90,11 @@ class SpannedMasker:
SPAN_LENGTH = min(CANDIDATE_SPAN, REMAINING_MASK)
for _ in range(0, SPAN_LENGTH):
INNER_TOKEN = sequence[mask_index]
if self.__is_illegal_token(INNER_TOKEN, forbidden_tokens):
continue
MASK[mask_index] = True
mask_index += 1