Merge branch 'dev.embedder' of https://repositories.communitynotfound.work/PoliBa-DeepLearning/NanoSocrates into dev.embedder
This commit is contained in:
@@ -189,7 +189,7 @@ class NanoSocratesBPE(Encoder):
|
||||
token_stack.appendleft(right_token)
|
||||
token_stack.appendleft(left_token)
|
||||
|
||||
return UTF_8_STRING_ARR.decode("utf-8")
|
||||
return UTF_8_STRING_ARR.decode("utf-8", errors="ignore")
|
||||
|
||||
def __token_decode(self, token_id: int) -> tuple[int, int]:
|
||||
|
||||
|
||||
@@ -41,18 +41,19 @@ class Decoder(nn.Module):
|
||||
torch.Tensor,
|
||||
torch.Tensor,
|
||||
torch.Tensor,
|
||||
torch.Tensor,
|
||||
torch.Tensor
|
||||
]
|
||||
): # -> list[torch.Tensor]: # k_x = v_x . While x_q = x
|
||||
# WARNING: args is needed to have sequential
|
||||
x, k_x, v_x, padding_mask = args
|
||||
x, k_x, v_x, src_padding_mask, tgt_padding_mask = args
|
||||
|
||||
# build of attention mask
|
||||
attention_mask = get_causal_attention_mask(x.size(1))
|
||||
|
||||
# 1) Masked Attention
|
||||
MASKED_ATTENTION = self.__masked_attention(
|
||||
x, x, x, key_padding_mask=padding_mask, attention_mask=attention_mask
|
||||
x, x, x, key_padding_mask=tgt_padding_mask, attention_mask=attention_mask
|
||||
)
|
||||
|
||||
# 2) Dropout
|
||||
@@ -68,7 +69,7 @@ class Decoder(nn.Module):
|
||||
|
||||
# 5) Encoder–decoder (cross) attention
|
||||
CROSS_ATTENTION = self.__cross_attention(
|
||||
x, k_x, v_x, key_padding_mask=padding_mask
|
||||
x, k_x, v_x, key_padding_mask=src_padding_mask
|
||||
)
|
||||
|
||||
# 6) Dropout
|
||||
@@ -96,7 +97,7 @@ class Decoder(nn.Module):
|
||||
# 12) Layer Normalization
|
||||
x = self.__layer_norm_3(x)
|
||||
|
||||
return (x, k_x, v_x, padding_mask)
|
||||
return (x, k_x, v_x, src_padding_mask, tgt_padding_mask)
|
||||
|
||||
|
||||
# use eval to disable dropout ecc
|
||||
|
||||
@@ -37,17 +37,17 @@ class TrainingModel(torch.nn.Module):
|
||||
|
||||
self.__detokener = DeToken(latent_space, vocabulary_size)
|
||||
|
||||
def forward(self, args: tuple[torch.Tensor, torch.Tensor, torch.Tensor]):
|
||||
def forward(self, args: tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]):
|
||||
|
||||
encoder_embedder_input, padding_tensor, decoder_embedder_input = args
|
||||
encoder_embedder_input, src_padding, decoder_embedder_input, tgt_padding = args
|
||||
|
||||
encoder_tensor = self.__encoder_embedder(encoder_embedder_input)
|
||||
decoder_tensor = self.__decoder_embedder(decoder_embedder_input)
|
||||
|
||||
encoder_output, _ = self.__encoder((encoder_tensor, padding_tensor))
|
||||
encoder_output, _ = self.__encoder((encoder_tensor, src_padding))
|
||||
|
||||
decoder_output, _, _, _ = self.__decoder(
|
||||
(decoder_tensor, encoder_output, encoder_output, None)
|
||||
decoder_output, _, _, _, _ = self.__decoder(
|
||||
(decoder_tensor, encoder_output, encoder_output, src_padding, tgt_padding)
|
||||
)
|
||||
|
||||
logits: torch.Tensor = self.__detokener(decoder_output)
|
||||
|
||||
@@ -3,6 +3,7 @@ from .task_type import TaskType
|
||||
from .post_tokenization import truncate_sequence, pad_sequence, normalize_sequence, create_padding_mask
|
||||
from .inference_masking import inference_masking
|
||||
from .truncate_rdf_list import truncate_rdf_list
|
||||
from .decode_out import tensor2token
|
||||
|
||||
__all__ = [
|
||||
"TaskType",
|
||||
@@ -13,5 +14,6 @@ __all__ = [
|
||||
"create_padding_mask",
|
||||
"normalize_sequence",
|
||||
"inference_masking",
|
||||
"truncate_rdf_list"
|
||||
"truncate_rdf_list",
|
||||
"tensor2token"
|
||||
]
|
||||
27
Project_Model/Libs/Transformer/Utils/decode_out.py
Normal file
27
Project_Model/Libs/Transformer/Utils/decode_out.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from typing import Generator
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
def tensor2token(tensor: torch.Tensor, end_token: int) -> Generator[list[int]]:
|
||||
|
||||
if len(tensor.shape) < 1 or len(tensor.shape) > 2:
|
||||
raise ValueError("Shape is not correct")
|
||||
|
||||
if len(tensor.shape) == 1:
|
||||
token_list: list[int] = tensor.tolist()
|
||||
token_list.append(end_token)
|
||||
yield token_list
|
||||
return
|
||||
|
||||
batch_len: int
|
||||
batch_len, _ = tensor.shape
|
||||
|
||||
for i in range(batch_len):
|
||||
|
||||
smaller_tensor = tensor[i, :]
|
||||
token_list: list[int] = smaller_tensor.tolist()
|
||||
token_list.append(end_token)
|
||||
yield token_list
|
||||
|
||||
|
||||
@@ -1,17 +1,20 @@
|
||||
def truncate_sequence(
|
||||
sequence: list[int], truncate_at: int, end_token: int
|
||||
sequence: list[int], truncate_at: int, end_token: int, add_ending: bool
|
||||
) -> list[int]:
|
||||
|
||||
if len(sequence) < truncate_at - 1:
|
||||
sequence.append(end_token)
|
||||
if add_ending:
|
||||
sequence.append(end_token)
|
||||
return sequence
|
||||
|
||||
if len(sequence) < truncate_at:
|
||||
sequence[-1] = end_token
|
||||
if add_ending:
|
||||
sequence[-1] = end_token
|
||||
return sequence
|
||||
|
||||
TRUNCATED_SEQUENCE = sequence[:truncate_at]
|
||||
TRUNCATED_SEQUENCE[-1] = end_token
|
||||
if add_ending:
|
||||
TRUNCATED_SEQUENCE[-1] = end_token
|
||||
|
||||
return TRUNCATED_SEQUENCE
|
||||
|
||||
@@ -48,8 +51,9 @@ def normalize_sequence(
|
||||
max_length: int,
|
||||
pad_token: int,
|
||||
end_token: int,
|
||||
add_ending: bool = True
|
||||
) -> tuple[list[int], list[bool]]:
|
||||
new_sequence = truncate_sequence(sequence, max_length, end_token)
|
||||
new_sequence = truncate_sequence(sequence, max_length, end_token, add_ending)
|
||||
new_sequence = pad_sequence(new_sequence, max_length, pad_token)
|
||||
PADDING_MASK = create_padding_mask(new_sequence, pad_token)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user