34 lines
1.0 KiB
Python
34 lines
1.0 KiB
Python
|
|
class TokenCompletationTransformer:
|
|
|
|
def __init__(self,SOTL_token,EOS_token, input_percent:float = 0.5) -> None:
|
|
self.__SOTL_token = SOTL_token
|
|
self.__EOS_token = EOS_token
|
|
self.__input_percent = input_percent
|
|
pass
|
|
|
|
|
|
def get_completation_tuple(
|
|
self,
|
|
token_sequence: list[int],
|
|
)-> tuple[list[int], list[int]]:
|
|
|
|
# split the sequence by encoded(<SOTL>), dont take the first, firts pertenge in as X the other as Y
|
|
sotl_count =int( token_sequence.count(self.__SOTL_token) * self.__input_percent)
|
|
|
|
sotl_index = 0
|
|
percent_index = 0
|
|
while sotl_index < sotl_count:
|
|
token = token_sequence[percent_index]
|
|
if token == self.__SOTL_token:
|
|
sotl_index += 1
|
|
|
|
percent_index+=1
|
|
|
|
percent_index = percent_index -1
|
|
x_list = token_sequence[:percent_index]
|
|
x_list.append(self.__EOS_token)
|
|
y_list = token_sequence[percent_index:]
|
|
return (x_list,y_list)
|
|
|