Added a way to detach models and create them standalone

2025-10-10 18:43:20 +02:00
parent 15f203cad5
commit 92ae40013d
10 changed files with 164 additions and 20 deletions
--- a/Project_Model/Libs/Transformer/Classes/Decoder.py
+++ b/Project_Model/Libs/Transformer/Classes/Decoder.py
@@ -1,3 +1,4 @@
+from typing import Optional
 import torch
 import torch.nn as nn
 from .FeedForwardNetwork import FeedForwardNetwork
@@ -42,13 +43,17 @@ class Decoder(nn.Module):
            torch.Tensor,
            torch.Tensor,
            torch.Tensor,
-            torch.Tensor
+            torch.Tensor,
+            Optional[bool]
        ]
    ):  # -> list[torch.Tensor]:  # k_x = v_x . While x_q = x
        # WARNING: args is needed to have sequential
-        x, k_x, v_x, src_padding_mask, tgt_padding_mask = args
+        if len(args) < 6:
+            args = args + (False)
+        x, k_x, v_x, src_padding_mask, tgt_padding_mask, decoder_only = args

        # build of attention mask
+        # TODO: create a prefix causal mask if needed
        attention_mask = get_causal_attention_mask(x.size(1))

        # 1) Masked Attention
@@ -67,21 +72,23 @@ class Decoder(nn.Module):
        # 4) Layer Normalization
        x = self.__layer_norm_1(x)

-        # 5) Encoder–decoder (cross) attention
-        CROSS_ATTENTION = self.__cross_attention(
-            x, k_x, v_x, key_padding_mask=src_padding_mask
-        )

-        # 6) Dropout
-        # DROPPED_CROSS_ATTENTION = self.__dropout(CROSS_ATTENTION)
-        # del CROSS_ATTENTION
+        if not decoder_only:
+            # 5) Encoder–decoder (cross) attention
+            CROSS_ATTENTION = self.__cross_attention(
+                x, k_x, v_x, key_padding_mask=src_padding_mask
+            )

-        # 7) Residual Connection
-        x = x + CROSS_ATTENTION
-        del CROSS_ATTENTION
+            # 6) Dropout
+            # DROPPED_CROSS_ATTENTION = self.__dropout(CROSS_ATTENTION)
+            # del CROSS_ATTENTION

-        # 8) Layer Normalization
-        x = self.__layer_norm_2(x)
+            # 7) Residual Connection
+            x = x + CROSS_ATTENTION
+            del CROSS_ATTENTION
+
+            # 8) Layer Normalization
+            x = self.__layer_norm_2(x)

        # 9) Position-wise feed-forward
        FEED_FORWARD = self.__feed_forward_network(x)
@@ -97,7 +104,7 @@ class Decoder(nn.Module):
        # 12) Layer Normalization
        x = self.__layer_norm_3(x)

-        return (x, k_x, v_x, src_padding_mask, tgt_padding_mask)
+        return (x, k_x, v_x, src_padding_mask, tgt_padding_mask, decoder_only)


 # use eval to disable dropout ecc