34 lines
1.0 KiB
Python
Raw Normal View History

2025-10-08 00:39:16 +02:00
class TokenCompletationTransformer:
def __init__(self,SOTL_token,EOS_token, input_percent:float = 0.5) -> None:
self.__SOTL_token = SOTL_token
self.__EOS_token = EOS_token
self.__input_percent = input_percent
pass
def get_completation_tuple(
self,
token_sequence: list[int],
)-> tuple[list[int], list[int]]:
# split the sequence by encoded(<SOTL>), dont take the first, firts pertenge in as X the other as Y
sotl_count =int( token_sequence.count(self.__SOTL_token) * self.__input_percent)
sotl_index = 0
percent_index = 0
while sotl_index < sotl_count:
token = token_sequence[percent_index]
if token == self.__SOTL_token:
sotl_index += 1
percent_index+=1
percent_index = percent_index -1
x_list = token_sequence[:percent_index]
x_list.append(self.__EOS_token)
y_list = token_sequence[percent_index:]
return (x_list,y_list)