Added test to see if illegal tokens were included in target
This commit is contained in:
parent
e93710af08
commit
1797571bb2
@ -16,7 +16,7 @@ class TestSpannedMasker:
|
||||
CORPUS_PATH = Path("Project_Model/Tests/spanner_file/mask.txt")
|
||||
TEXT = CORPUS_PATH.read_text("utf-8")
|
||||
CORRUPTION_PERCENTAGE = 0.15
|
||||
TOLERANCE = 0.05
|
||||
TOLERANCE = 0.15
|
||||
|
||||
TOKENIZER = BPE.TokeNanoCore(VOCABULARY, SPECIAL_LIST)
|
||||
VOCABULARY_SIZE = TOKENIZER.vocabulary_size
|
||||
@ -79,6 +79,9 @@ class TestSpannedMasker:
|
||||
)
|
||||
)
|
||||
|
||||
for token in TARGET[:len(TARGET) - 1]:
|
||||
assert token not in ILLEGAL_TOKENS
|
||||
|
||||
assert ACTUAL_CORRUPTION_PERCENTAGE > CORRUPTION_PERCENTAGE - TOLERANCE
|
||||
assert ACTUAL_CORRUPTION_PERCENTAGE < CORRUPTION_PERCENTAGE + TOLERANCE
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user