diff --git a/Project_Model/Libs/Transformer/Classes/Decoder.py b/Project_Model/Libs/Transformer/Classes/Decoder.py index 97cd148..10422b8 100644 --- a/Project_Model/Libs/Transformer/Classes/Decoder.py +++ b/Project_Model/Libs/Transformer/Classes/Decoder.py @@ -19,7 +19,7 @@ class Decoder(nn.Module): self.__attention_heads = number_of_attention_heads super().__init__() - + self.__masked_attention = MultiHeadAttention( embedding_dimension, number_of_attention_heads, dropout=0.1 @@ -68,12 +68,12 @@ class Decoder(nn.Module): ) # 2) Dropout - # DROPPED_MASKED_ATTENTION = self.__dropout(MASKED_ATTENTION) - # del MASKED_ATTENTION + DROPPED_MASKED_ATTENTION = self.__dropout(MASKED_ATTENTION) + del MASKED_ATTENTION # 3) Residual Connection - x = x + MASKED_ATTENTION - del MASKED_ATTENTION + x = x + DROPPED_MASKED_ATTENTION + del DROPPED_MASKED_ATTENTION # 4) Layer Normalization x = self.__layer_norm_1(x) @@ -86,12 +86,12 @@ class Decoder(nn.Module): ) # 6) Dropout - # DROPPED_CROSS_ATTENTION = self.__dropout(CROSS_ATTENTION) - # del CROSS_ATTENTION + DROPPED_CROSS_ATTENTION = self.__dropout(CROSS_ATTENTION) + del CROSS_ATTENTION # 7) Residual Connection - x = x + CROSS_ATTENTION - del CROSS_ATTENTION + x = x + DROPPED_CROSS_ATTENTION + del DROPPED_CROSS_ATTENTION # 8) Layer Normalization x = self.__layer_norm_2(x) @@ -100,12 +100,12 @@ class Decoder(nn.Module): FEED_FORWARD = self.__feed_forward_network(x) # 10) Dropout - # DROPPED_FEED_FORWARD = self.__dropout(FEED_FORWARD) - # del FEED_FORWARD + DROPPED_FEED_FORWARD = self.__dropout(FEED_FORWARD) + del FEED_FORWARD # 11) Residual Connection - x = x + FEED_FORWARD - del FEED_FORWARD + x = x + DROPPED_FEED_FORWARD + del DROPPED_FEED_FORWARD # 12) Layer Normalization x = self.__layer_norm_3(x) diff --git a/Project_Model/Libs/Transformer/Classes/Encoder.py b/Project_Model/Libs/Transformer/Classes/Encoder.py index 78fca5e..d5d6498 100644 --- a/Project_Model/Libs/Transformer/Classes/Encoder.py +++ b/Project_Model/Libs/Transformer/Classes/Encoder.py @@ -43,12 +43,12 @@ class Encoder( ATTENTION = self.__attention(x, x, x, key_padding_mask=padding_mask) # 2) Dropout - # DROPPED_ATTENTION = self.__dropout(ATTENTION) - # del ATTENTION + DROPPED_ATTENTION = self.__dropout(ATTENTION) + del ATTENTION # 3) Residual Connection - x = x + ATTENTION - del ATTENTION + x = x + DROPPED_ATTENTION + del DROPPED_ATTENTION # 4) Layer Normalization x = self.__layer_norm_1(x) @@ -57,12 +57,12 @@ class Encoder( FEED_FORWARD = self.__feed_forward(x) # 6) Dropout - # DROPPED_FEED_FORWARD = self.__dropout(FEED_FORWARD) - # del FEED_FORWARD + DROPPED_FEED_FORWARD = self.__dropout(FEED_FORWARD) + del FEED_FORWARD # 7) Residual Connection - x = x + FEED_FORWARD - del FEED_FORWARD + x = x + DROPPED_FEED_FORWARD + del DROPPED_FEED_FORWARD # 8) Layer Normalization x = self.__layer_norm_2(x) diff --git a/Project_Model/Libs/Transformer/Classes/FeedForwardNetwork.py b/Project_Model/Libs/Transformer/Classes/FeedForwardNetwork.py index a5a29ac..4cfc7e6 100644 --- a/Project_Model/Libs/Transformer/Classes/FeedForwardNetwork.py +++ b/Project_Model/Libs/Transformer/Classes/FeedForwardNetwork.py @@ -35,7 +35,7 @@ class FeedForwardNetwork(nn.Module): x = self.__relu(x) # 3) Dropout - # x = self.__dropout(x) + x = self.__dropout(x) # 4) Linear Layer x = self.__fully_connected_2(x)