From 1797571bb28c3d37a2e779b1b1d33bc8eb551d2c Mon Sep 17 00:00:00 2001 From: Christian Risi <75698846+CnF-Gris@users.noreply.github.com> Date: Mon, 6 Oct 2025 16:17:12 +0200 Subject: [PATCH] Added test to see if illegal tokens were included in target --- Project_Model/Tests/spanned_masker_test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Project_Model/Tests/spanned_masker_test.py b/Project_Model/Tests/spanned_masker_test.py index 3b70491..a29bdfa 100644 --- a/Project_Model/Tests/spanned_masker_test.py +++ b/Project_Model/Tests/spanned_masker_test.py @@ -16,7 +16,7 @@ class TestSpannedMasker: CORPUS_PATH = Path("Project_Model/Tests/spanner_file/mask.txt") TEXT = CORPUS_PATH.read_text("utf-8") CORRUPTION_PERCENTAGE = 0.15 - TOLERANCE = 0.05 + TOLERANCE = 0.15 TOKENIZER = BPE.TokeNanoCore(VOCABULARY, SPECIAL_LIST) VOCABULARY_SIZE = TOKENIZER.vocabulary_size @@ -79,6 +79,9 @@ class TestSpannedMasker: ) ) + for token in TARGET[:len(TARGET) - 1]: + assert token not in ILLEGAL_TOKENS + assert ACTUAL_CORRUPTION_PERCENTAGE > CORRUPTION_PERCENTAGE - TOLERANCE assert ACTUAL_CORRUPTION_PERCENTAGE < CORRUPTION_PERCENTAGE + TOLERANCE