From 9a797a0485d2b41f72d2203d59622fa605004861 Mon Sep 17 00:00:00 2001
From: Christian Risi <75698846+CnF-Gris@users.noreply.github.com>
Date: Sat, 4 Oct 2025 19:43:25 +0200
Subject: [PATCH] Added embedder code for "Attention is all you need"

---
 .../Embedder/Classes/NanoSocratesEmbedder.py  | 32 +++++++++++++++++++
 .../Utils/fixed_positional_encoding.py        | 28 ++++++++++++++++
 2 files changed, 60 insertions(+)
 create mode 100644 Project_Model/Libs/Embedder/Classes/NanoSocratesEmbedder.py
 create mode 100644 Project_Model/Libs/Embedder/Utils/fixed_positional_encoding.py

diff --git a/Project_Model/Libs/Embedder/Classes/NanoSocratesEmbedder.py b/Project_Model/Libs/Embedder/Classes/NanoSocratesEmbedder.py
new file mode 100644
index 0000000..0377331
--- /dev/null
+++ b/Project_Model/Libs/Embedder/Classes/NanoSocratesEmbedder.py
@@ -0,0 +1,32 @@
+import torch
+from ..Utils import fixed_positional_encoding
+class NanoSocratesEmbedder(torch.nn.Module):
+
+    def __init__(
+        self,
+        vocabulary_size: int,
+        embedding_size: int
+    ) -> None:
+        super().__init__()
+        self.__embedder =  torch.nn.Embedding(
+            vocabulary_size,
+            embedding_size
+        )
+
+    def forward(self, tokenized_sentence: list[int]) -> torch.Tensor:
+
+        TOKENIZED_TENSOR = torch.tensor(tokenized_sentence)
+
+        computed_embeddings: torch.Tensor = self.__embedder(TOKENIZED_TENSOR)
+
+        SENTENCE_LENGHT, EMBEDDING_SIZE = computed_embeddings.shape
+
+        POSITIONAL_ENCODINGS = fixed_positional_encoding(
+            SENTENCE_LENGHT,
+            EMBEDDING_SIZE
+        )
+
+        computed_embeddings = computed_embeddings + POSITIONAL_ENCODINGS
+        return computed_embeddings
+
+
diff --git a/Project_Model/Libs/Embedder/Utils/fixed_positional_encoding.py b/Project_Model/Libs/Embedder/Utils/fixed_positional_encoding.py
new file mode 100644
index 0000000..bcdc0ee
--- /dev/null
+++ b/Project_Model/Libs/Embedder/Utils/fixed_positional_encoding.py
@@ -0,0 +1,28 @@
+import torch
+
+
+def fixed_positional_encoding(
+    sentence_dimension: int,
+    embedding_dimension: int,
+) -> torch.Tensor:
+
+    BIG_CONST = int(1e4)
+    INITIAL_ENCODING = torch.tensor([i for i in range(0, sentence_dimension)])
+
+    ENCODINGS: list[torch.Tensor] = []
+
+    for i in range(0, embedding_dimension):
+        EMBEDDING_POSITION = i
+
+        # Note: The original paper did not specify
+        #   to compute: pos mod 2!!
+        DIVISOR = BIG_CONST ** ((2 * (EMBEDDING_POSITION // 2)) / embedding_dimension)
+        INTERMEDIATE_ENCODING = INITIAL_ENCODING / DIVISOR
+
+        if EMBEDDING_POSITION % 2 == 0:
+            ENCODINGS.append(torch.sin(INTERMEDIATE_ENCODING))
+            continue
+
+        ENCODINGS.append(torch.cos(INTERMEDIATE_ENCODING))
+
+    return torch.stack(ENCODINGS).transpose(0, 1)