22 lines
534 B
Python
22 lines
534 B
Python
|
|
from Project_Model.Libs.BPE.Classes.TokeNanoCore import TokeNanoCore
|
|
|
|
class TestTokeNano:
|
|
|
|
def test_decode_encode_simple(self):
|
|
TEXT = "<SOT>abababab<EOT>"
|
|
|
|
# ab = 256
|
|
# 256, 256 = 257
|
|
# 257, 257 = 258
|
|
|
|
VOCABULARY = {(ord("a"), ord("b")): 256, (256, 256): 257, (257, 257): 258}
|
|
# EXPECTED = [258]
|
|
|
|
TOKE_NANO = TokeNanoCore(VOCABULARY, ["<SOT>", "<EOT>"])
|
|
|
|
ENCODED = TOKE_NANO.encode(TEXT)
|
|
DECODED = TOKE_NANO.decode(ENCODED)
|
|
|
|
assert TEXT == DECODED
|