NanoSocrates/Project_Model/Tests/tokenano_test.py

22 lines
534 B
Python
Raw Normal View History

2025-10-03 01:04:47 +02:00
from Project_Model.Libs.BPE.Classes.TokeNanoCore import TokeNanoCore
class TestTokeNano:
def test_decode_encode_simple(self):
TEXT = "<SOT>abababab<EOT>"
# ab = 256
# 256, 256 = 257
# 257, 257 = 258
VOCABULARY = {(ord("a"), ord("b")): 256, (256, 256): 257, (257, 257): 258}
# EXPECTED = [258]
TOKE_NANO = TokeNanoCore(VOCABULARY, ["<SOT>", "<EOT>"])
2025-10-03 01:04:47 +02:00
ENCODED = TOKE_NANO.encode(TEXT)
DECODED = TOKE_NANO.decode(ENCODED)
assert TEXT == DECODED