moved spanned mask variables in init for better reliability, also tested
This commit is contained in:
@@ -21,7 +21,7 @@ class TestSpannedMasker:
|
||||
TOKENIZER = BPE.TokeNanoCore(VOCABULARY, SPECIAL_LIST)
|
||||
VOCABULARY_SIZE = TOKENIZER.vocabulary_size
|
||||
|
||||
MASKER = Transformer.SpannedMasker(CORRUPTION_PERCENTAGE, 3)
|
||||
|
||||
|
||||
TOKENS = TOKENIZER.encode(TEXT)
|
||||
|
||||
@@ -31,10 +31,12 @@ class TestSpannedMasker:
|
||||
|
||||
ILLEGAL_TOKENS: set[int] = SPECIAL_TOKENS.difference(LEGAL_TOKENS)
|
||||
|
||||
MASKER = Transformer.SpannedMasker(VOCABULARY_SIZE,ILLEGAL_TOKENS,CORRUPTION_PERCENTAGE, 3)
|
||||
|
||||
SPECIAL_FORMATTER = TOKENIZER.encode("*<SOT>")[0]
|
||||
END_FORMATTER = TOKENIZER.encode("<EOT>")[0]
|
||||
|
||||
OUTPUT, TARGET = MASKER.mask_sequence(TOKENS, VOCABULARY_SIZE, ILLEGAL_TOKENS)
|
||||
OUTPUT, TARGET = MASKER.mask_sequence(TOKENS)
|
||||
|
||||
UNCORRUPTED_TOKENS = list(
|
||||
filter(lambda token: token <= VOCABULARY_SIZE, OUTPUT)
|
||||
|
||||
Reference in New Issue
Block a user