From 99b5198c9a13faff2f262d63db7c6c394ce601a8 Mon Sep 17 00:00:00 2001 From: Christian Risi <75698846+CnF-Gris@users.noreply.github.com> Date: Tue, 7 Oct 2025 16:38:08 +0200 Subject: [PATCH] WIP --- .../Transformer/Classes/NanoSocratesCore.py | 129 ++++++++++++------ 1 file changed, 91 insertions(+), 38 deletions(-) diff --git a/Project_Model/Libs/Transformer/Classes/NanoSocratesCore.py b/Project_Model/Libs/Transformer/Classes/NanoSocratesCore.py index bb2d971..fca307a 100644 --- a/Project_Model/Libs/Transformer/Classes/NanoSocratesCore.py +++ b/Project_Model/Libs/Transformer/Classes/NanoSocratesCore.py @@ -4,55 +4,108 @@ from .Encoder import Encoder from ....Libs.Embedder import NanoSocratesEmbedder import torch + class NanoSocratesCore(torch.nn.Module): - def __init__(self, - embedded_size: int, - feed_forward_dim: int, - encoder_layers: int, - decoder_layers:int, - attention_heads: int, - vocab_size: int) -> None: - + def __init__( + self, + sentence_length: int, + vocab_size: int, + embedding_size: int = 256, + feed_forward_multiplier: int = 4, + num_encoder_layers: int = 2, + num_decoder_layers: int = 2, + num_attention_heads: int = 4, + ) -> None: + + feed_forward_dim = embedding_size * feed_forward_multiplier + + self.__sentence_length = sentence_length self.__encoder_sequence = torch.nn.Sequential( - *[Encoder(embedded_size, feed_forward_dim, attention_heads) for _ in range(encoder_layers)] - ) - - #* unpack the list so that each encoder has its own weights - + *[ + Encoder(embedding_size, feed_forward_dim, num_attention_heads) + for _ in range(num_encoder_layers) + ] + ) + + # * unpack the list so that each encoder has its own weights + self.__decoder_sequence = torch.nn.Sequential( - *[Decoder(embedded_size, feed_forward_dim, attention_heads) for _ in range(decoder_layers)] + *[ + Decoder(embedding_size, feed_forward_dim, num_attention_heads) + for _ in range(num_decoder_layers) + ] + ) + + self.__linear = torch.nn.Linear(embedding_size, vocab_size) + + self.__input_embeder = NanoSocratesEmbedder(vocab_size, embedding_size) + self.__output_embedder = NanoSocratesEmbedder(vocab_size, embedding_size) + + def forward( + self, + encoder_input: list[list[int]], + decoder_input: list[list[int]], + encoder_padding_mask: list[list[int]], + ): + + if len(encoder_padding_mask) != len(encoder_input): + raise Exception("Mismatch in received_dimensions") + + # TODO: check for tensor in input to embedder + # 1) Embed User-Input for encoders + ENCODER_INPUT = self.__input_embeder(encoder_input) + + # 2) Encode User-Input + ENCODER_OUTPUT, _ = self.__encoder_sequence(ENCODER_INPUT, encoder_padding_mask) + del ENCODER_INPUT + + exit_loop = False + decoder_token_list = decoder_input[:] + decoder_phase = 0 + + LOGITS_HISTORY: list[torch.Tensor] = [] + + # 3) Autoregressive Output + while not exit_loop: + + # 3.0) Increment Counter + decoder_phase += 1 + + # 3.1) Embed Decoder Input + decoder_input = self.__output_embedder(decoder_token_list) + + # 3.2) Decode Decoder Input + DECODER_OUTPUT, _, _, _ = self.__decoder_sequence( + decoder_input, ENCODER_OUTPUT, ENCODER_OUTPUT ) - - self.__linear = torch.nn.Linear(embedded_size, vocab_size, bias=False) - self.__input_embeder = NanoSocratesEmbedder(vocab_size,embedded_size) - self.__output_embedder = NanoSocratesEmbedder(vocab_size,embedded_size) + # 3.3) Go back to Token space + # TODO: change name + LOGITS = self.__linear(DECODER_OUTPUT) + del DECODER_OUTPUT + # 3.4) Transform in probabilities + # TODO: change name + TOKEN_PROBABILITIES = torch.softmax(LOGITS, dim=-1) + del LOGITS - def forward(self, token_list, padding_mask = None): - x = self.__input_embeder(token_list) - x = self.__encoder_sequence(x, padding_mask)[0] + LOGITS_HISTORY.append(TOKEN_PROBABILITIES) + # 3.5) Take most probable tokens + TOKEN_IDS = torch.argmax(TOKEN_PROBABILITIES, -1) - # do while - x = self.__decoder_sequence(x,x,x, padding_mask)[0] - logits = self.__linear(x) - log_prob = torch.softmax(logits, dim=-1) - output = torch.argmax(log_prob) - while self.keep_going(log_prob): - # from log_prob again into x + # TODO: check for dimensions and for efficiency + DECODER_TOKEN_TENSOR = torch.tensor(decoder_token_list) + DECODER_TOKEN_TENSOR[:, decoder_phase] = TOKEN_IDS + decoder_token_list = DECODER_TOKEN_TENSOR.tolist() + del TOKEN_IDS + del DECODER_TOKEN_TENSOR - x = self.__decoder_sequence(x,x,x, padding_mask)[0] - logits = self.__linear(x) - log_prob = torch.softmax(logits, dim=-1) - # argmax - - return log_prob - - - - def keep_going(self, x: ) -> bool: + # 3.6) Check if we generated all tokens + if decoder_phase == self.__sentence_length - 1: + exit_loop = True + return LOGITS_HISTORY