{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "adbd9598", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\Chris\\miniconda3\\envs\\deep_learning\\Lib\\site-packages\\torch\\utils\\_device.py:103: UserWarning: Aten Op fallback from XPU to CPU happends. This may have performance implications. If need debug the fallback ops please set environment variable `PYTORCH_DEBUG_XPU_FALLBACK=1` (Triggered internally at C:\\actions-runner\\_work\\pytorch\\pytorch\\pytorch\\build\\xpu\\ATen\\RegisterXPU_0.cpp:54528.)\n", " return func(*args, **kwargs)\n" ] }, { "ename": "", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", "\u001b[1;31mClick here for more info. \n", "\u001b[1;31mView Jupyter log for further details." ] } ], "source": [ "import random\n", "import torch\n", "import pandas as pd\n", "from pathlib import Path\n", "import Project_Model.Libs.Embedder as Embedder\n", "import Project_Model.Libs.BPE as BPE\n", "import Project_Model.Libs.Transformer as Transformer\n", "import Project_Model.Libs.TorchShims as torch_shims\n", "\n", "# set a fixed seed\n", "torch.manual_seed(0)\n", "random.seed(0)\n", "DEVICE = torch_shims.get_default_device()\n", "torch.set_default_device(DEVICE)\n", "\n", "# set a default device\n", "\n", "# BPE Init\n", "VOCABULARY_PATH = Path(\"Assets/Model/toy_10/toy_dictionary.json\")\n", "SPECIAL_VOC = BPE.default_special_tokens()\n", "\n", "VOCABULARY = BPE.load_nanos_vocabulary(VOCABULARY_PATH)\n", "TOKENANO = BPE.TokeNanoCore(VOCABULARY, SPECIAL_VOC)\n", "\n", "\n", "# Constants\n", "TOKEN_SPACE_SIZE = TOKENANO.vocabulary_size + 1\n", "EMBEDDED_SIZE = 256\n", "FEED_FORWARD_MULTIPLIER = 4\n", "ATTENTION_HEADS = 8\n", "SENTENCE_LENGTH = 256\n", "NUMBER_OF_BLOCKS = 4\n", "MAX_EPOCHS = int(1e3)\n", "\n", "\n", "PAD_TOKEN = TOKENANO.encode(\"\")[0]\n", "END_TOKEN = TOKENANO.encode(\"\")[0]\n", "\n", "\n", "# Load CSV\n", "TOY_DATASET_PATH = Path(\"Assets/Dataset/1-hop/toy/rdf_text.csv\")\n", "\n", "TOY_DATASET = pd.read_csv(TOY_DATASET_PATH)\n", "\n", "TOY_BATCH_INPUT_LIST: list[list[int]] = []\n", "TOY_BATCH_PADDING_LIST: list[list[bool]] = []\n", "TOY_BATCH_TARGET_LIST: list[list[int]] = []\n", "TOY_BATCH_DECODER_DEFAULT: list[list[int]]= []\n", "\n", "\n", "for index, row in TOY_DATASET.iterrows():\n", "\n", " RDFs: str = row[\"RDFs\"]\n", " Abstract: str = row[\"Abstract\"]\n", "\n", " input_tokens = TOKENANO.encode(RDFs)\n", " output_tokens = TOKENANO.encode(Abstract)[1:]\n", " decoder_default_tokens = TOKENANO.encode(\"\")\n", "\n", " input_tokens, padding = Transformer.normalize_sequence(\n", " input_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN\n", " )\n", " output_tokens, _ = Transformer.normalize_sequence(\n", " output_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN\n", " )\n", " decoder_default_tokens, _ = Transformer.normalize_sequence(\n", " decoder_default_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN, False\n", " )\n", "\n", " TOY_BATCH_INPUT_LIST.append(input_tokens)\n", " TOY_BATCH_PADDING_LIST.append(padding)\n", " TOY_BATCH_TARGET_LIST.append(output_tokens)\n", " TOY_BATCH_DECODER_DEFAULT.append(decoder_default_tokens)\n", "\n", " output_tokens = TOKENANO.encode(RDFs)\n", " input_tokens = TOKENANO.encode(Abstract)[1:]\n", " decoder_default_tokens = TOKENANO.encode(\"\")\n", "\n", " input_tokens, padding = Transformer.normalize_sequence(\n", " input_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN\n", " )\n", " output_tokens, _ = Transformer.normalize_sequence(\n", " output_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN\n", " )\n", " decoder_default_tokens, _ = Transformer.normalize_sequence(\n", " decoder_default_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN, False\n", " )\n", "\n", " TOY_BATCH_INPUT_LIST.append(input_tokens)\n", " TOY_BATCH_PADDING_LIST.append(padding)\n", " TOY_BATCH_TARGET_LIST.append(output_tokens)\n", " TOY_BATCH_DECODER_DEFAULT.append(decoder_default_tokens)\n", "\n", "# Training loop\n", "LOSS_HISTORY = []\n", "NANOSOCRATES = Transformer.TrainingModel(\n", " TOKEN_SPACE_SIZE,\n", " EMBEDDED_SIZE,\n", " FEED_FORWARD_MULTIPLIER,\n", " ATTENTION_HEADS,\n", " NUMBER_OF_BLOCKS\n", ")\n", "cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN)\n", "optimizer = torch.optim.AdamW(NANOSOCRATES.parameters())\n", "scheduler = Transformer.WarmupLR(optimizer, 4000, EMBEDDED_SIZE)\n", "last_loss = 0\n", "current_epoch = 0\n", "\n", "while current_epoch < MAX_EPOCHS:\n", "\n", " optimizer.zero_grad()\n", "\n", " encoder_list = torch.tensor(TOY_BATCH_INPUT_LIST[:])\n", " decoder_list = torch.tensor(TOY_BATCH_DECODER_DEFAULT[:])\n", " src_padding = torch.tensor(TOY_BATCH_PADDING_LIST[:], dtype=torch.bool)\n", "\n", " # Transform target into logits\n", " target_logits = torch.tensor(TOY_BATCH_TARGET_LIST[:])\n", "\n", " last_loss = 0\n", " last_prediction: torch.Tensor\n", "\n", " for i in range(0, SENTENCE_LENGTH):\n", "\n", " optimizer.zero_grad()\n", " tgt_padding = decoder_list.eq(PAD_TOKEN)\n", "\n", " logits: torch.Tensor = NANOSOCRATES((encoder_list, src_padding, decoder_list, tgt_padding))\n", " prob = torch.softmax(logits, 2)\n", "\n", " most_probable_tokens = torch.argmax(prob, 2)\n", " last_prediction = most_probable_tokens\n", "\n", " logits = logits[:,:i,:]\n", " logits = logits.permute(0, 2, 1)\n", "\n", " loss : torch.Tensor = cross_entropy(logits, target_logits[:, 0:i])\n", " # loss : torch.Tensor = cross_entropy(logits, target_logits)\n", "\n", " last_loss = loss\n", " loss.backward()\n", " optimizer.step()\n", " scheduler.step()\n", "\n", " if i < SENTENCE_LENGTH - 1:\n", " decoder_list[:,i+1] = target_logits[:,i]\n", "\n", "\n", "\n", "\n", "\n", "\n", " current_epoch += 1\n", "\n", " if current_epoch % 1 == 0:\n", " print(f\"EPOCH {current_epoch}\\n\\tLoss: {last_loss}\")\n", "\n", " for encoded_sentence, expected_sentence in zip(\n", " Transformer.tensor2token(last_prediction[:,:], END_TOKEN), # type: ignore\n", " Transformer.tensor2token(target_logits[:,:], END_TOKEN)\n", " ):\n", " decoded_sentence = TOKENANO.decode(encoded_sentence)\n", " decoded_target = TOKENANO.decode(expected_sentence)\n", " print(f\"\\tACTUAL:\\n\\t\\t{decoded_sentence}\\n\\tEXPECTED:\\n\\t\\t{decoded_target}\\n\")\n", "\n", "\n", "\n", "\n", "\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "deep_learning", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.7" } }, "nbformat": 4, "nbformat_minor": 5 }