From f3b83eda3dc31e2b02cc1a26f2edf3f0c333bdb4 Mon Sep 17 00:00:00 2001 From: Christian Risi <75698846+CnF-Gris@users.noreply.github.com> Date: Thu, 9 Oct 2025 11:37:46 +0200 Subject: [PATCH] Rework --- Playgrounds/nanosocrates-train-toy.ipynb | 46 ++++++++++-------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/Playgrounds/nanosocrates-train-toy.ipynb b/Playgrounds/nanosocrates-train-toy.ipynb index 41e1d51..89005f5 100644 --- a/Playgrounds/nanosocrates-train-toy.ipynb +++ b/Playgrounds/nanosocrates-train-toy.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "adbd9598", "metadata": {}, "outputs": [ @@ -11,30 +11,17 @@ "output_type": "stream", "text": [ "c:\\Users\\Chris\\miniconda3\\envs\\deep_learning\\Lib\\site-packages\\torch\\utils\\_device.py:103: UserWarning: Aten Op fallback from XPU to CPU happends. This may have performance implications. If need debug the fallback ops please set environment variable `PYTORCH_DEBUG_XPU_FALLBACK=1` (Triggered internally at C:\\actions-runner\\_work\\pytorch\\pytorch\\pytorch\\build\\xpu\\ATen\\RegisterXPU_0.cpp:54528.)\n", - " return func(*args, **kwargs)\n", - "252.87s - name 'tensor' is not defined\n", - "Traceback (most recent call last):\n", - " File \"c:\\Users\\Chris\\miniconda3\\envs\\deep_learning\\Lib\\site-packages\\debugpy\\_vendored\\pydevd\\_pydevd_bundle\\pydevd_vars.py\", line 636, in change_attr_expression\n", - " value = eval(expression, frame.f_globals, frame.f_locals)\n", - " File \"\", line 1, in \n", - "NameError: name 'tensor' is not defined\n" + " return func(*args, **kwargs)\n" ] }, { - "ename": "", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31mCannot execute code, session has been disposed. Please try restarting the Kernel." - ] - }, - { - "ename": "", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31mCannot execute code, session has been disposed. Please try restarting the Kernel. \n", - "\u001b[1;31mView Jupyter log for further details." + "name": "stdout", + "output_type": "stream", + "text": [ + "EPOCH 1\n", + "\tLoss: 9.161508560180664\n", + "EPOCH 2\n", + "\tLoss: 9.131484031677246\n" ] } ], @@ -124,7 +111,7 @@ ")\n", "cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN)\n", "optimizer = torch.optim.AdamW(NANOSOCRATES.parameters())\n", - "scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 4)\n", + "scheduler = Transformer.WarmupLR(optimizer, 4000, EMBEDDED_SIZE)\n", "last_loss = 0\n", "current_epoch = 0\n", "\n", @@ -146,18 +133,23 @@ " optimizer.zero_grad()\n", "\n", " logits: torch.Tensor = NANOSOCRATES((encoder_list, padding_list, decoder_list))\n", + " prob = torch.softmax(logits, 2)\n", "\n", - " most_probable_tokens = torch.argmax(logits, 2)\n", + " most_probable_tokens = torch.argmax(prob, 2)\n", "\n", - " logits = logits[:,i,:]\n", + " logits = logits[:,0:i,:]\n", + " logits = logits.permute(0, 2, 1)\n", + "\n", + " loss : torch.Tensor = cross_entropy(logits, target_logits[:, 0:i])\n", + " # loss : torch.Tensor = cross_entropy(logits, target_logits)\n", "\n", - " loss = cross_entropy(logits, target_logits[:,i])\n", " last_loss = loss\n", + " loss.backward()\n", " optimizer.step()\n", " scheduler.step()\n", "\n", " if i < SENTENCE_LENGTH - 1:\n", - " decoder_list[:,i+1] = most_probable_tokens[:,i]\n", + " decoder_list[:,i+1] = target_logits[:,i]\n", "\n", "\n", " current_epoch += 1\n",