Merge branch 'dev.embedder' of https://repositories.communitynotfound.work/PoliBa-DeepLearning/NanoSocrates into dev.embedder
This commit is contained in:
@@ -21,4 +21,7 @@ class SpecialToken(Enum):
|
||||
# NanoSocrates
|
||||
START = "<START>"
|
||||
CORPUS_END = "<END>"
|
||||
PAD = "<PAD>"
|
||||
START_OF_SEQUENCE = "<SOS>"
|
||||
END_OF_SEQUENCE = "<EOS>"
|
||||
PAD = "<PAD>"
|
||||
|
||||
@@ -45,9 +45,8 @@ def normalize_sequence(
|
||||
pad_token: int,
|
||||
end_token: int,
|
||||
) -> tuple[list[int], list[bool]]:
|
||||
|
||||
new_sequence = pad_sequence(sequence, max_length, pad_token)
|
||||
new_sequence = truncate_sequence(new_sequence, max_length, end_token)
|
||||
new_sequence = truncate_sequence(sequence, max_length, end_token)
|
||||
new_sequence = pad_sequence(new_sequence, max_length, pad_token)
|
||||
PADDING_MASK = create_padding_mask(new_sequence, pad_token)
|
||||
|
||||
return (new_sequence, PADDING_MASK)
|
||||
|
||||
Reference in New Issue
Block a user