{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "c64b0e24", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[7706, 290, 756, 4270, 7357, 115, 351, 1507, 1213, 410, 3382, 317, 497, 4740, 2784, 7700], [7706, 290, 756, 4270, 7357, 115, 351, 1507, 1213, 410, 3382, 317, 497, 4740, 2784, 7700]]\n", "2\n", "Embedder Tensor: torch.Size([2, 16, 256])\n", "Values:\n", "tensor([[[-0.6981, 0.0804, -2.1672, ..., 0.3919, 0.3341, 1.0794],\n", " [ 2.5818, -0.2308, 0.6001, ..., -0.0500, -0.0408, -0.9852],\n", " [-0.6967, 0.8109, 1.3108, ..., 2.1693, 1.4143, -0.1236],\n", " ...,\n", " [ 2.1226, 2.5695, -1.6178, ..., -0.0652, -0.0802, 0.1103],\n", " [ 0.8770, -2.4782, 0.8536, ..., 2.0471, -1.5702, 0.7387],\n", " [-0.0495, -1.8601, 0.0405, ..., 2.3944, -0.4297, 1.1141]],\n", "\n", " [[-0.6981, 0.0804, -2.1672, ..., 0.3919, 0.3341, 1.0794],\n", " [ 2.5818, -0.2308, 0.6001, ..., -0.0500, -0.0408, -0.9852],\n", " [-0.6967, 0.8109, 1.3108, ..., 2.1693, 1.4143, -0.1236],\n", " ...,\n", " [ 2.1226, 2.5695, -1.6178, ..., -0.0652, -0.0802, 0.1103],\n", " [ 0.8770, -2.4782, 0.8536, ..., 2.0471, -1.5702, 0.7387],\n", " [-0.0495, -1.8601, 0.0405, ..., 2.3944, -0.4297, 1.1141]]],\n", " grad_fn=)\n", "ENCODER Tensor: torch.Size([2, 16, 256])\n", "Values:\n", "tensor([[[-1.6325, 0.4094, -2.1403, ..., 0.4654, 0.5993, 0.9683],\n", " [ 1.8236, 0.4025, -0.6972, ..., 0.2430, 0.2536, -1.0889],\n", " [-0.0587, 0.1618, -0.2335, ..., 1.7609, 1.2664, -0.4452],\n", " ...,\n", " [ 2.0337, 1.3184, -1.3165, ..., -0.3303, 0.6572, 0.0884],\n", " [ 0.5752, -2.5594, -0.2393, ..., 1.3318, -1.4236, 0.4686],\n", " [ 1.0075, -2.4273, -0.4593, ..., 1.6660, 0.0359, 0.2927]],\n", "\n", " [[-1.8300, -0.3079, -1.6585, ..., 0.4859, 0.5652, 0.8072],\n", " [ 1.5461, -0.5666, -0.0330, ..., 0.5651, 0.2974, -1.0879],\n", " [-0.9060, 0.2700, -0.4585, ..., 2.0363, 1.2657, -0.7060],\n", " ...,\n", " [ 1.6688, 1.7038, -1.9549, ..., -0.2052, 0.6270, 0.4598],\n", " [ 0.0482, -2.3951, -0.4351, ..., 1.6230, -1.3662, -0.0390],\n", " [ 0.8146, -2.6169, -0.6188, ..., 1.4525, 0.0507, 0.5177]]],\n", " grad_fn=)\n" ] } ], "source": [ "import random\n", "import torch\n", "from pathlib import Path\n", "import Project_Model.Libs.Embedder as Embedder\n", "import Project_Model.Libs.BPE as BPE\n", "import Project_Model.Libs.Transformer as Transformer\n", "\n", "# set a fixed seed\n", "torch.manual_seed(0)\n", "random.seed(0)\n", "\n", "TEXT = \"The Dark Knight is a 2008 superhero film directed by Christopher Nolan,\"\n", "\n", "VOCABULARY_PATH = Path(\"Assets/Model/toy_10/toy_dictionary.json\")\n", "SPECIAL_VOC = BPE.default_special_tokens()\n", "\n", "VOCABULARY = BPE.load_nanos_vocabulary(VOCABULARY_PATH)\n", "TOKENANO = BPE.TokeNanoCore(\n", " VOCABULARY,\n", " SPECIAL_VOC\n", ")\n", "\n", "TOKENIZATION = [TOKENANO.encode(TEXT), TOKENANO.encode(TEXT)]\n", "print(TOKENIZATION)\n", "\n", "TOKEN_SPACE_SIZE = TOKENANO.vocabulary_size\n", "EMBEDDED_SIZE = 256\n", "FEED_FORWARD_DIM = EMBEDDED_SIZE * 4\n", "\n", "EMBEDDER = Embedder.NanoSocratesEmbedder(TOKEN_SPACE_SIZE, EMBEDDED_SIZE)\n", "tensor: torch.Tensor = EMBEDDER(TOKENIZATION)\n", "ENCODER = torch.nn.Sequential(\n", " Transformer.Encoder(EMBEDDED_SIZE, FEED_FORWARD_DIM, 4),\n", " Transformer.Encoder(EMBEDDED_SIZE, FEED_FORWARD_DIM, 4),\n", " Transformer.Encoder(EMBEDDED_SIZE, FEED_FORWARD_DIM, 4),\n", " Transformer.Encoder(EMBEDDED_SIZE, FEED_FORWARD_DIM, 4),\n", " Transformer.Encoder(EMBEDDED_SIZE, FEED_FORWARD_DIM, 4),\n", " Transformer.Encoder(EMBEDDED_SIZE, FEED_FORWARD_DIM, 4),\n", ")\n", "print(len(TOKENIZATION))\n", "print(f\"Embedder Tensor: {tensor.shape}\")\n", "print(f\"Values:\\n{tensor}\")\n", "\n", "BATCH_SIZE, TOKENS, DIMENSIONS = tensor.shape\n", "PAD_MASK = torch.tensor([[True] * TOKENS] * BATCH_SIZE, dtype=torch.bool)\n", "tensor, _ = ENCODER((tensor, PAD_MASK))\n", "print(f\"ENCODER Tensor: {tensor.shape}\")\n", "print(f\"Values:\\n{tensor}\")\n", "\n", "\n", "\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "deep_learning", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.7" } }, "nbformat": 4, "nbformat_minor": 5 }