Added test to see if illegal tokens were included in target
This commit is contained in:
parent
e93710af08
commit
1797571bb2
@ -16,7 +16,7 @@ class TestSpannedMasker:
|
|||||||
CORPUS_PATH = Path("Project_Model/Tests/spanner_file/mask.txt")
|
CORPUS_PATH = Path("Project_Model/Tests/spanner_file/mask.txt")
|
||||||
TEXT = CORPUS_PATH.read_text("utf-8")
|
TEXT = CORPUS_PATH.read_text("utf-8")
|
||||||
CORRUPTION_PERCENTAGE = 0.15
|
CORRUPTION_PERCENTAGE = 0.15
|
||||||
TOLERANCE = 0.05
|
TOLERANCE = 0.15
|
||||||
|
|
||||||
TOKENIZER = BPE.TokeNanoCore(VOCABULARY, SPECIAL_LIST)
|
TOKENIZER = BPE.TokeNanoCore(VOCABULARY, SPECIAL_LIST)
|
||||||
VOCABULARY_SIZE = TOKENIZER.vocabulary_size
|
VOCABULARY_SIZE = TOKENIZER.vocabulary_size
|
||||||
@ -79,6 +79,9 @@ class TestSpannedMasker:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
for token in TARGET[:len(TARGET) - 1]:
|
||||||
|
assert token not in ILLEGAL_TOKENS
|
||||||
|
|
||||||
assert ACTUAL_CORRUPTION_PERCENTAGE > CORRUPTION_PERCENTAGE - TOLERANCE
|
assert ACTUAL_CORRUPTION_PERCENTAGE > CORRUPTION_PERCENTAGE - TOLERANCE
|
||||||
assert ACTUAL_CORRUPTION_PERCENTAGE < CORRUPTION_PERCENTAGE + TOLERANCE
|
assert ACTUAL_CORRUPTION_PERCENTAGE < CORRUPTION_PERCENTAGE + TOLERANCE
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user