Changed training phase to take directly data instead of its encode

This commit is contained in:
Christian Risi 2025-10-02 09:56:44 +02:00
parent 2194cc7b4f
commit 2e595a3a23

View File

@ -46,7 +46,7 @@ def split_fit(object: tuple[NanoSocratesBPE, list[list[int]]]):
continue continue
# We are sure of its type # We are sure of its type
NEW_DATA.append(output) # type: ignore NEW_DATA.append(piece) # type: ignore
return (bpe, NEW_DATA, memory) return (bpe, NEW_DATA, memory)
@ -56,14 +56,14 @@ def split_encode(object: tuple[NanoSocratesBPE, list[list[int]]]):
NEW_DATA: list[list[int]] = [] NEW_DATA: list[list[int]] = []
for piece in data: for index, piece in zip(range(0, len(data)), data):
output = bpe.encode_intermediate(piece) output = bpe.encode_intermediate(piece)
if len(output) < 2: if len(output) < 2:
continue continue
# We are sure of its type # We are sure of its type
NEW_DATA.append(output) # type: ignore NEW_DATA.append(data[index]) # type: ignore
return NEW_DATA return NEW_DATA