Changed training phase to take directly data instead of its encode
This commit is contained in:
parent
2194cc7b4f
commit
2e595a3a23
@ -46,7 +46,7 @@ def split_fit(object: tuple[NanoSocratesBPE, list[list[int]]]):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# We are sure of its type
|
# We are sure of its type
|
||||||
NEW_DATA.append(output) # type: ignore
|
NEW_DATA.append(piece) # type: ignore
|
||||||
|
|
||||||
return (bpe, NEW_DATA, memory)
|
return (bpe, NEW_DATA, memory)
|
||||||
|
|
||||||
@ -56,14 +56,14 @@ def split_encode(object: tuple[NanoSocratesBPE, list[list[int]]]):
|
|||||||
|
|
||||||
NEW_DATA: list[list[int]] = []
|
NEW_DATA: list[list[int]] = []
|
||||||
|
|
||||||
for piece in data:
|
for index, piece in zip(range(0, len(data)), data):
|
||||||
output = bpe.encode_intermediate(piece)
|
output = bpe.encode_intermediate(piece)
|
||||||
|
|
||||||
if len(output) < 2:
|
if len(output) < 2:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# We are sure of its type
|
# We are sure of its type
|
||||||
NEW_DATA.append(output) # type: ignore
|
NEW_DATA.append(data[index]) # type: ignore
|
||||||
|
|
||||||
return NEW_DATA
|
return NEW_DATA
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user