diff --git a/Project_Model/Tests/spanned_masker_test.py b/Project_Model/Tests/spanned_masker_test.py index 3b70491..a29bdfa 100644 --- a/Project_Model/Tests/spanned_masker_test.py +++ b/Project_Model/Tests/spanned_masker_test.py @@ -16,7 +16,7 @@ class TestSpannedMasker: CORPUS_PATH = Path("Project_Model/Tests/spanner_file/mask.txt") TEXT = CORPUS_PATH.read_text("utf-8") CORRUPTION_PERCENTAGE = 0.15 - TOLERANCE = 0.05 + TOLERANCE = 0.15 TOKENIZER = BPE.TokeNanoCore(VOCABULARY, SPECIAL_LIST) VOCABULARY_SIZE = TOKENIZER.vocabulary_size @@ -79,6 +79,9 @@ class TestSpannedMasker: ) ) + for token in TARGET[:len(TARGET) - 1]: + assert token not in ILLEGAL_TOKENS + assert ACTUAL_CORRUPTION_PERCENTAGE > CORRUPTION_PERCENTAGE - TOLERANCE assert ACTUAL_CORRUPTION_PERCENTAGE < CORRUPTION_PERCENTAGE + TOLERANCE