2025-10-05 17:49:01 +02:00

91 lines
2.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import torch
import torch.nn as nn
from .FeedForwardNetwork import FeedForwardNetwork
from .TorchMultiHeadAttention import TorchMultiHeadAttention as MultiHeadAttention
from ..Utils.attention_mask import get_attention_mask
class Decoder(nn.Module):
def __init__(
self,
embedding_dimension: int,
feed_forward_hidden_layer_dimension: int,
number_of_attention_heads: int,
) -> None:
super().__init__()
self.__masked_attention = MultiHeadAttention(
embedding_dimension, number_of_attention_heads, dropout=0.1, attention_mask=get_attention_mask(embedding_dimension)
)
self.__layer_norm_1 = nn.LayerNorm(embedding_dimension)
self.__cross_attention = MultiHeadAttention(
embedding_dimension, number_of_attention_heads, dropout=0.1
)
self.__layer_norm_2 = nn.LayerNorm(embedding_dimension)
self.__dropout = nn.Dropout(0.1)
self.__feed_forward_network = FeedForwardNetwork(
embedding_dimension, feed_forward_hidden_layer_dimension
)
self.__layer_norm_3 = nn.LayerNorm(embedding_dimension)
def forward(self, x, k_x, v_x, attention_mask) -> torch.Tensor: # k_x = v_x . While x_q = x
# 1) Masked Attention
MASKED_ATTENTION = self.__masked_attention(
x, x, x, attention_mask=attention_mask
)
# 2) Dropout
DROPPED_MASKED_ATTENTION = self.__dropout(
MASKED_ATTENTION
)
del MASKED_ATTENTION
# 3) Residual Connection
x = x + DROPPED_MASKED_ATTENTION
del DROPPED_MASKED_ATTENTION
# 4) Layer Normalization
x = self.__layer_norm_1(x)
# 5) Encoderdecoder (cross) attention
CROSS_ATTENTION = self.__cross_attention(x, k_x, v_x)
# 6) Dropout
DROPPED_CROSS_ATTENTION = self.__dropout(CROSS_ATTENTION)
del CROSS_ATTENTION
# 7) Residual Connection
x = x + DROPPED_CROSS_ATTENTION
del DROPPED_CROSS_ATTENTION
# 8) Layer Normalization
x = self.__layer_norm_2(x)
# 9) Position-wise feed-forward
FEED_FORWARD = self.__feed_forward_network(x)
# 10) Dropout
DROPPED_FEED_FORWARD = self.__dropout(FEED_FORWARD)
del FEED_FORWARD
# 11) Residual Connection
x = x + DROPPED_FEED_FORWARD
del DROPPED_FEED_FORWARD
# 12) Layer Normalization
x = self.__layer_norm_3(x)
return x
# use eval to disable dropout ecc