NanoSocrates/Playgrounds/encoder.ipynb
2025-10-05 16:30:23 +02:00

111 lines
3.8 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "c64b0e24",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[7706, 290, 756, 4270, 7357, 115, 351, 1507, 1213, 410, 3382, 317, 497, 4740, 2784, 7700]\n",
"16\n",
"Embedder Tensor: torch.Size([13, 256])\n",
"Values:\n",
"tensor([[-1.0474, 1.9119, 1.3443, ..., -1.5243, 1.2989, 0.3618],\n",
" [ 1.0083, 1.4955, 0.9479, ..., 1.7371, 0.2389, 2.1217],\n",
" [-0.7681, -1.7427, 1.7070, ..., 0.6594, -0.6083, -0.4595],\n",
" ...,\n",
" [-0.7209, -0.3639, -0.6911, ..., 3.3490, -2.7354, 1.1244],\n",
" [-0.7352, -1.6731, 0.2976, ..., 1.5605, -1.3298, 1.3615],\n",
" [-0.5377, 0.3704, -0.4427, ..., 0.4723, 0.5781, 0.2003]],\n",
" grad_fn=<AddBackward0>)\n",
"ENCODER Tensor: torch.Size([13, 256])\n",
"Values:\n",
"tensor([[-1.0270, 0.6589, -0.3154, ..., -1.3113, 0.5058, -0.0608],\n",
" [ 1.0235, 1.2011, -0.3139, ..., 0.1643, 0.6761, 0.9673],\n",
" [-0.7295, -1.5149, 0.4729, ..., 0.3185, -0.2433, -1.2669],\n",
" ...,\n",
" [-0.2189, -0.1399, -1.0049, ..., 1.8693, -2.4663, -0.3319],\n",
" [-0.1491, -0.4986, -0.7297, ..., 1.2760, -0.5654, 0.7038],\n",
" [-1.3576, 0.3478, -0.1016, ..., 0.0712, 0.3772, -0.1522]],\n",
" grad_fn=<NativeLayerNormBackward0>)\n"
]
}
],
"source": [
"import torch\n",
"from pathlib import Path\n",
"import Project_Model.Libs.Embedder as Embedder\n",
"import Project_Model.Libs.BPE as BPE\n",
"import Project_Model.Libs.Transformer as Transformer\n",
"\n",
"TEXT = \"<ABS>The Dark Knight is a 2008 superhero film directed by Christopher Nolan,<SOTL>\"\n",
"\n",
"VOCABULARY_PATH = Path(\"Assets/Model/toy_10/toy_dictionary.json\")\n",
"SPECIAL_VOC = BPE.default_special_tokens()\n",
"\n",
"VOCABULARY = BPE.load_nanos_vocabulary(VOCABULARY_PATH)\n",
"TOKENANO = BPE.TokeNanoCore(\n",
" VOCABULARY,\n",
" SPECIAL_VOC\n",
")\n",
"\n",
"TOKENIZATION = TOKENANO.encode(TEXT)\n",
"print(TOKENIZATION)\n",
"\n",
"TOKEN_SPACE_SIZE = TOKENANO.vocabulary_size\n",
"EMBEDDED_SIZE = 256\n",
"FEED_FORWARD_DIM = EMBEDDED_SIZE * 4\n",
"\n",
"EMBEDDER = Embedder.NanoSocratesEmbedder(TOKEN_SPACE_SIZE, EMBEDDED_SIZE)\n",
"tensor: torch.Tensor = EMBEDDER(TOKENIZATION[3:])\n",
"ENCODER = torch.nn.Sequential(\n",
" Transformer.Encoder(EMBEDDED_SIZE, FEED_FORWARD_DIM, 4),\n",
" Transformer.Encoder(EMBEDDED_SIZE, FEED_FORWARD_DIM, 4),\n",
" Transformer.Encoder(EMBEDDED_SIZE, FEED_FORWARD_DIM, 4),\n",
" Transformer.Encoder(EMBEDDED_SIZE, FEED_FORWARD_DIM, 4),\n",
" Transformer.Encoder(EMBEDDED_SIZE, FEED_FORWARD_DIM, 4),\n",
" Transformer.Encoder(EMBEDDED_SIZE, FEED_FORWARD_DIM, 4)\n",
")\n",
"print(len(TOKENIZATION))\n",
"print(f\"Embedder Tensor: {tensor.shape}\")\n",
"print(f\"Values:\\n{tensor}\")\n",
"\n",
"TOKENS, DIMENSIONS = tensor.shape\n",
"\n",
"tensor = ENCODER(tensor)\n",
"print(f\"ENCODER Tensor: {tensor.shape}\")\n",
"print(f\"Values:\\n{tensor}\")\n",
"\n",
"\n",
"\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "deep_learning",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}