2025-10-04 19:43:42 +02:00
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 1,
|
|
|
|
|
"id": "06229c81",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"slideshow": {
|
|
|
|
|
"slide_type": "slide"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"torch.Size([256, 256])\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA7UAAAKsCAYAAAAz95rSAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsnXl8XVW5/t+zz5x5Tpo0bdomnaCltNCBeSiUGRQVUAQRULzCVbjqVX9eEb3XWakoCiqzIoOICmgZCmVsKbQUaOncpmmbZp6TM+/z+2OfrOfdZK8mB5K0Ie/38/FzX9fZZ+219z6p95znWc/rSiaTSRIEQRAEQRAEQRCEMYhxqBcgCIIgCIIgCIIgCB8U+VIrCIIgCIIgCIIgjFnkS60gCIIgCIIgCIIwZpEvtYIgCIIgCIIgCMKYRb7UCoIgCIIgCIIgCGMW+VIrCIIgCIIgCIIgjFnkS60gCIIgCIIgCIIwZpEvtYIgCIIgCIIgCMKYRb7UCoIgCIIgCIIgCGMW+VIrCIIgCIIgCIIgjFk+Ml9qb7/9dqqqqqJAIECLFi2itWvXHuolCYIgCIIgCIIgHJa89NJLdP7551N5eTm5XC76+9//Puh7Vq1aRfPnzye/30/V1dV07733DjjmUHwv+0h8qX344YfppptuoptvvpnWr19PRx11FC1btoyampoO9dIEQRAEQRAEQRAOO3p7e+moo46i22+/fUjH7969m84991w69dRTacOGDfTVr36VrrnmGnr66afVMYfqe5krmUwmR/QMo8CiRYvo2GOPpd/85jdERGSaJlVWVtINN9xA3/zmNw/x6gRBEARBEARBEA5fXC4XPf7443TRRRdpj/nv//5veuqpp2jjxo1q7NJLL6WOjg5asWIFER2672WeEZt5lIhGo7Ru3Tr61re+pcYMw6ClS5fS6tWrHd8TiUQoEomo/26aJrW1tVFhYSG5XK4RX7MgCIIgCIIgCHqSySR1d3dTeXk5GcbYMpeGw2GKRqOH5NzJZHLA9xm/309+v/9Dz7169WpaunSpbWzZsmX01a9+lYg+2Pey4WLMf6ltaWmhRCJBpaWltvHS0lLasmWL43t+9KMf0S233DIayxMEQRAEQRAE4QOyd+9emjhx4qFexpAJh8M0ZXIWNTQlDsn5s7KyqKenxzZ288030/e+970PPXdDQ4Pjd66uri4KhULU3t6e9vey4WLMf6n9IHzrW9+im266Sf33zs5OmjRpElXe8h0yAgHa8LG7iYho3uOfV8f0j43m+Hg5Jx+Xey73/KN6Tj4u91zu+Uf1nHxc7rnc84/qOfm43PORO2dXj0mT59dSdnY2jSWi0Sg1NCVoz7oqysk2RvXcXd0mTV5QS3v37qWcnBw1Phwq7eHOmP9SW1RURG63mxobG23jjY2NVFZW5vgenQR/38X3UVa2Qd9uOYGIiO7+1P3qtZ91HaXqX33iUVX/Pjxd1T/42BOqftjEL0rfuPA5Vf/LVaTq/zjvFVW/7M5U9WfPfpOIiN7w+NTYx858V9Xv+bAN+qzT8avHLn9M1SeevFvVewOwWh97wj5VNwdDqj7quANERNQR7FNjMxY1q7o7A+NVC9pVHc7AHBVHd6k6lhlWdcncXlUnMrGWgiPxXsq0LBq5s/E+IxPXkzkDtTcrrupgTdxx3F+NX8f8WaaqfVNNx3FPlXVPg1m4t+5JsG7wcWOi87irAv9wZWUz28cEt/N4mdthzON8bAnGc7IxHxV7HceTRZrxwoHjyQKf87GaccrXjOdpxnP9Bx23jeVojtWNZ6c5nuVwzqyDHztgPDPwocddGc7HDtt48ODj6Rw7XOOH4px8fLjmNgIfbHw45hgL5+Tjh+KcfHy8nJOPyz2Xe/5ROycRjdmtgVnZLvv/LzcKmGSdLycnx/aldrgoKytz/M6Vk5NDwWCQ3G532t/LhovR/flgBPD5fLRgwQJauXKlGjNNk1auXElLliw5hCsTBEEQBEEQBEH4aLBkyRLbdy4iomeffVZ95zqU38vGvFJLRHTTTTfRlVdeSccccwwtXLiQli9fTr29vXTVVVcd6qUJgiAIgiAIgiAcdvT09NCOHTvUf9+9ezdt2LCBCgoKaNKkSfStb32L9u/fT/ffb7lXr7vuOvrNb35D3/jGN+jzn/88Pf/88/TII4/QU089peY4VN/LPhJfai+55BJqbm6m7373u9TQ0EDz5s2jFStWDNikPBil7ghluw1at3w+ERH9+McvqdduvOtEVd/yrXdU/c17z1T1pv/4napr/nSdqrdffoeqp/7zWlXvuOBOVU9/HnsJ3j3VGl+4Fg9/1bF/UPVFmy5X9Z9nwSJ9w+5PqPqHVY+r+ucHluE6KtBH6jfNJ6v6PyqeJyKiezsWqrFrJr6s6r91H6HqKyrXqPpfvVNV/anKdVhvCBaDCytxv16P5Kn6zImwTr8dDRIR0SkV+MPaHIP9ZElFrap3xGAbnl8OO/WeOOzHR5Y1qHpfApbnmhJYqhsTsDpPKWklIqJWE2MVxbBZt7Px0iLYrLuSmLuooFvVPSbG8wp6Hcdz8i1Ld5+JdLyMHFiyI0lYrgM5Ecdxn248G3PGkrgvnqzYgHF35sAxIiJ3BsZNwj13BeOO4xRMpDceSGjHBuA3HYeTunGfbtyhg5lX09VMO+48N3k0xzuMJzXHasfdIzee1Ph10h0nncPKaTydYw8zkpo1pjM+HHOM9DkFQRCED0ciaVJilBunJpKa/x9Fw5tvvkmnnnqq+u/9mUNXXnkl3XvvvXTgwAGqq6tTr0+ZMoWeeuopuvHGG+lXv/oVTZw4kf74xz/SsmX4rjFc38vS5SPxpZaI6Prrr6frr7/+UC9DEARBEARBEAThsOeUU06hZFL/zfvee+91fM9bb7110HkPxfeyj8yX2uHgrBe/REYwQDV/fp2IiM78zGfUa2V3bVD1dZ89QdVVdyOQ6VeXVam65v42Va+4GAE60x6E+rX5bKh/5Y/hmO6TrWOCT2CDd9ZCvN71NFTQSXOQCLdtFVTTI67NUPXLa2ar+p5PQX399IajVf3Ts6xr/vIbx6qx9SfgA3vSlnNwPfPuUvVlWy9T9e+n/0XV36y7SNXfnYgArdubTlP1Z4teVfUTndZaluZuUmPP985S9Wl5m1X9aqga68rbpup14UpVL8nfqep3I/hl6NiCPareGstX9VH59UREtDOG+3Zk/gFV703g/s/Mx+b3hgRkjql5eOaNCSiOU9h4mwmVsyK3k4iIOpnCWpoDtbfTxHhRDqLZe5JQYQuyEODVx+bJyYLiG2LjWSzAK5JSZTNZeFecWMBWMMaOZSpwBld2cbwv6DzuCWCc/4Lo9icGjBl+vI+rxi6fs9praMZdTE21jacUXJs67HM+VqfIatVU7fEDx3VzkEZhTX/cedgxRUE3h5HeeDKNce2xOjVxOFTjdNXhca5gijosCILw4TApSSaNrlQ72uc7nBjzQVGCIAiCIAiCIAjC+EWUWsb0X/WQxx2jxJK5RETkvw3tT4wCKF/r7pms6pJ2qJm//fvZqq7a+Jqq/+Nl7IGteRnHX/veZ1Wd+zRa9nzngLVPt3hFrRq7479qVF3xTKuqV3wZCuLElVjjps9BwSt/Eb/atH4C+zuLXsXjN89KtbRZi9ZCWSchXj20vkDVJQuyVL1nY7mqp83G+LqtVaqeOQX38flduI6fl7+g6hv2zSQiopvmQb39Ze0Zqh4OFZiI6MTsrapeH8Ia52fWEhHRuxGovUdl7lX1lgjU8SOy6lW9PVqs6lnZUHZr43mqnpbVour9cdzfKVnWc2xIoHVMVTZU3VYTvzlVZGIfbxvboDEhC+OdJlTLkkwou91M8SxgrZl6UuO5rC1THzs2NwOqbpgpr1lBvo+XtVcK8H28CTYOpZYrwb5UCyo+5vXz/bpJx3E+t5sptTYVmKmvNiXYO1Addjkoqe8ft6m92vHkkMd159TuhR2mvbaOqqxOkdUquM7DaY3rlDrdWlzpjTuqw8O0X3VYFF9RKh0Z6b3GgiAIo4lp93+N2jnHK6LUCoIgCIIgCIIgCGM
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 1200x800 with 2 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"import torch\n",
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
"import Project_Model.Libs.Embedder as Embedder\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"TOKENS = 256\n",
|
|
|
|
|
"DIMENSIONS = 256\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Custom code made by Christian Risi and Giuseppe Gassi\n",
|
|
|
|
|
"TENSOR = Embedder.fixed_positional_encoding(TOKENS, DIMENSIONS)\n",
|
|
|
|
|
"# print(TENSOR)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Code taken from\n",
|
|
|
|
|
"# https://github.com/jalammar/jalammar.github.io/blob/master/notebookes/transformer/transformer_positional_encoding_graph.ipynb\n",
|
|
|
|
|
"# to test for correctness of custom code\n",
|
|
|
|
|
"print (TENSOR.shape)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"plt.figure(figsize=(12,8))\n",
|
|
|
|
|
"plt.pcolormesh(TENSOR, cmap='viridis')\n",
|
|
|
|
|
"plt.xlabel('Embedding Dimensions')\n",
|
|
|
|
|
"plt.xlim((0, DIMENSIONS))\n",
|
|
|
|
|
"plt.ylim((TOKENS,0))\n",
|
|
|
|
|
"plt.ylabel('Token Position')\n",
|
|
|
|
|
"plt.colorbar()\n",
|
|
|
|
|
"plt.show()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2025-10-05 16:30:23 +02:00
|
|
|
"execution_count": 2,
|
2025-10-04 19:43:42 +02:00
|
|
|
"id": "c7ad6593",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"[7706, 290, 756, 4270, 7357, 115, 351, 1507, 1213, 410, 3382, 317, 497, 4740, 2784, 7700]\n",
|
|
|
|
|
"16\n",
|
|
|
|
|
"torch.Size([16, 256])\n",
|
2025-10-05 16:30:23 +02:00
|
|
|
"tensor([[ 0.9070, 0.6699, 0.2960, ..., 0.3870, 0.8941, 1.3026],\n",
|
|
|
|
|
" [-0.9618, -1.9069, 1.6814, ..., 0.0977, 0.0829, 2.6063],\n",
|
|
|
|
|
" [ 0.8923, 0.2553, 1.9925, ..., 1.8682, -0.1530, 0.7194],\n",
|
2025-10-04 19:43:42 +02:00
|
|
|
" ...,\n",
|
2025-10-05 16:30:23 +02:00
|
|
|
" [ 0.7546, 1.0222, -0.7065, ..., 4.0476, 0.5369, 1.5168],\n",
|
|
|
|
|
" [ 1.4984, 0.9595, 0.5984, ..., 0.7721, -0.6914, 1.1631],\n",
|
|
|
|
|
" [ 1.2913, -1.0850, 2.0208, ..., 0.1654, 0.9178, 0.5116]],\n",
|
2025-10-04 19:43:42 +02:00
|
|
|
" grad_fn=<AddBackward0>)\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
2025-10-05 16:30:23 +02:00
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA5YAAAKsCAYAAACXnSIBAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAAl3xJREFUeJzs3Xl8VNX9+P/3zCQzkz1kT4AQ9n0HEVQEQRZxr1attWj7sxtaK9Yq7ce1n5bWtn7sp/pRu3yli3sV14oiCogCyib7EggkLEmA7Hsyc39/TDjvmwaUYWAywdfz8ZgHb07uvWe55547J+dmxmFZliUAAAAAAJwiZ0cXAAAAAADQuTGxBAAAAACEhIklAAAAACAkTCwBAAAAACFhYgkAAAAACAkTSwAAAABASJhYAgAAAABCwsQSAAAAABASJpYAAAAAgJAwsQQAAAAAhKRTTCyfeOIJycvLE6/XK+PGjZNPP/20o4sEAAAAAGgV8RPLF198UebOnSsPPPCArFu3ToYPHy7Tp0+X0tLSji4aAAAAAEBEHJZlWR1diC8ybtw4GTt2rDz++OMiIuL3+6V79+5y++23y7333tvBpQMAAAAARHV0Ab5IU1OTrF27VubNm2fSnE6nTJ06VVauXHncfRobG6WxsdH83+/3S1lZmaSmporD4TjjZQYAAABwYpZlSXV1teTk5IjTGfEPULbT0NAgTU1NYc/X7XaL1+sNe74nK6InlkeOHBGfzyeZmZlt0jMzM2X79u3H3Wf+/Pny0EMPhaN4AAAAAE5RUVGRdOvWraOLEZSGhgbp2SNeikt9Yc87KytLCgoKInZyGdETy1Mxb948mTt3rvl/ZWWl5ObmStff/EycXq84GvS3IlG1GjentpjYXaLNkpwfeFK4oo+udsYc0fws2yLooK/rZHftsgEmbspoNrHnQLSm5+nKavKnHhO3xOgxXa2/DBl9w0aT9tEHw0zcZcRhE8c9Gm/i/bf6NU+P1q2mUg8et8Vt4qgLyk1cvSdZRES8h7Rydd314omqdWlZYzWf6Mw6Eycs0bK4LtMGO7w/WfPfre3ss10fMeeUiYhIxR7dduCwQhPvOJRh4v7Z+re2hW/mmdivzSxObX6JPazlreir578pU9tI/IF6O5q1/slbddv+N+l5Xve+nueWRH2qPGmX7lt9Qa3mvyrOxLlXFZh4y+c9AoGtP1lRejz3EW3ztE16Lg7O0jjlEz2flf1s+5brQe3t0pip+zqaAtsk7NF6VvfStsobdNDEVS92NfHRsbZB1a3bS5MeJ3Gbnmd3tZarKlfLFdUQ+Lc+W4/hSNHrwxurJzH+tQQTHx6tx4sr0jy9k7TPVW5IM7FlG/Fc9YF/c87fb9L2HtJto4q0U3qO6n51o+pN7Dyo2zhabCevT43m49Q6Odcm6nH6BS7uhI163hq0a0vfc/eaeM+HeVqWCltZbNt7yzSuHqq/Rc16Tyt9aLKWZcDvA9fOzp+naB2c2p6X9N9i4k/+MsbEflsbVo5rMLGzVMewmGJti16X7THx9mW9AsHAapNm5ev59Nv6vKe3btOyWdsttkTzr+6p28cc1Dxb9DKT5F1aZ/ctxSIicujTHJPmsP0xSI/frdf830g28Z53e5q4WYsrXbbpsZNu1n5UUR9r4tJDSVr2gsAF2KzVaXOd+zO0z8ds1b5V29s2iNm4S/WCbumu5+L8PvkmXrFmUCCfBB3jog9qn3P003Z2r9bKXXvTByZefvs4Ey/85/MmPveJ/89WDy1XQpHWyfp64OI5slv72ZRxej/7/OnhJk7/1j4TH3opz8Tlg3WcSd6uY2F1nuaT85Fuk/BjPRdHngmMrSPmfG7SPnlxhInrc2x9yNZvnbZbQv0EvZ6bq7Wfi8t2/zuk6c1JgbKk5ep91feWji3uy/W+dWRLupZ7r+bfkKrZJJyj41nDEj1Og+7aph9ZXQN9Ie0dLVOfH+h9a9fTA018dIaOZ74GPYk5ObYB5a+a0ZGh2v4u7XLSmGkb51rfZ/m9tvHcNj5GVWnsrtJjRF2keVZv62Li6ErdPmu1ZlrwTR3zneWBayGhQLet6q35R9Xotva+ai9jVIXtHmKrfmOybXvb6U8apucl9WeBf/fdbxvPS3Ugctruia4aLWP6mGITl5TpwND7riIT757bT/et032nzNDxavX/GykiIvFX6726MF8XZezvIRILta9U9LX1edviW1OujkWOKh1nosu1Hi3JrW3ns73H8Nja0/YeuyVJr09XfGA889c3SuFtj0hCgm1Q7SSampqkuNQn+9bmSWJC+FZbq6r90mP0XmlqamJieSrS0tLE5XJJSUlJm/SSkhLJyso67j4ej0c8Hk+7dKfXK84YrzgctoHIZ4tj9C7i8mqzREVbrWl64bh03BDL1p+i4/QHTtsJd8bYBmJvtC3dfkwts2W/b7X+647XY7tsx46K042jomx52iZ8Lo++KXE2eW3ptmPG6nGOld3lsQ04MTooOH0uW7otH3uebls+tjI6Y+z527qfvc6tZbFv26ZtY4+f7vLY3uTbJ5a2cxQVbSuj9/jn30wso+znR7dtk6ftXPi9lm17W9vF2gZUzwnqdKyuJ5hYurza5lHRtnNhOy8ut73/2fa1nUeHre/a93W4HK3HsLeJ7QZhO4f2c2s/RpuJpct2E7WdZ1ejvU629rXa5+mItf081na8Nvnb62nbxtaf25wj+8SyNSt73ex9y76fy9Y/nbGap/06t79xcsTaxhP7xNJjL7uz9dj286b5nKhv28cfl237NmWMsf0CLTrKlm47p87W68xWZ/vE0hOvF5G9zR22NnTafgnm9Nra3Nbnjnu92H5R4LffHO19Plbf5Vje49f/RP3cPobar/lj59p+bu0TyyjbwHGi9rf/Eux4xxYRcTlPNOZFtzuG/ToX+z2hTV/R69/Ofj/x61y2zf3iWP5Wm3ucbWJpOxf2PL2282+/tyQmuI67fZtf2rhtk5y49uN5m/uZ+wTj+QnGGZfbdv+xnX/7uGg/TlS0tzVPW3+2t+0J+pDT1uQu2/Xsazn+xNLe/4+V1z4Oia0+bcacNn3bfv5t2dj7Vpuy6zZtJpatfSEqWvezt/mxNhFpO55ZtovbXkaJto+LtvNv67pO2wTNeexzIU8wsXQ1nqCex3kfIiLiarBNStuMP7b7VX3g/La599rfn7Totvb7gL2M9vcEbcZZ+1tK+5hrK2+U6zh1sPV5+6OeLvsk294XGmx9xHn895Muv21SfpwxOupE77fs581tv4fZzoXtvZL9/amj2ZZPva0dve0nlvb2bPse2/a+JbbteNaZ/0wtPsEh8QnhK79fIr+tIvqhZrfbLaNHj5YlS5aYNL/fL0uWLJHx48d3YMkAAAAAAMdE9IqliMjcuXNl9uzZMmbMGDnnnHPksccek9raWrnllls6umgAAAAAAOkEE8vrrrtODh8+LPfff78UFxfLiBEjZNGiRe0+0AcAAAAAwsFn+cUXxi9t9Fn+L9+og0X8xFJE5LbbbpPbbruto4sBAAAAADiOTjGxBAAAAIBI4RdL/BK+Jctw5nWqIvrDewAAAAAAkY8VSwAAAAAIgl/8Es6/egxvbqeGFUsAAAAAQEiYWAIAAAAAQsKjsAAAAAAQBJ9lic8K3wfqhDOvU8WKJQAAAAAgJKxYAgAAAEAQ+LqR9lixBAAAAACEhIklAAAAACAkPAoLAAAAAEHwiyU+HoVtgxVLAAAAAEBIWLEEAAAAgCDw4T3tsWIJAAAAAAgJK5YAAAAAEASfZYnPCt8qYjjzOlWsWAIAAAAAQsLEEgAAAADOYr/+9a/F4XDIj3/84zOWB4/CAgAAAEAQ/K2vcOZ3qj777DN5+umnZdiwYaetPMfDiiUAAAAAnIVqamrkxhtvlD//+c/SpUuXM5oXE0sAAAAACIJPrLC/RESqqqravBobG7+wnHPmzJFZs2bJ1KlTz3ibMLEEAAAAgE6ge/fukpSUZF7z588/4bYvvPCCrFu37gu3OZ34G0sAAAAA6ASKiookMTHR/N/j8ZxwuzvuuEMWL14sXq83LGVjYgk
|
2025-10-04 19:43:42 +02:00
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 1200x800 with 2 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"from pathlib import Path\n",
|
|
|
|
|
"import Project_Model.Libs.BPE as BPE\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"TEXT = \"<ABS>The Dark Knight is a 2008 superhero film directed by Christopher Nolan,<SOTL>\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"VOCABULARY_PATH = Path(\"Assets/Model/toy_10/toy_dictionary.json\")\n",
|
|
|
|
|
"SPECIAL_VOC = BPE.default_special_tokens()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"VOCABULARY = BPE.load_nanos_vocabulary(VOCABULARY_PATH)\n",
|
|
|
|
|
"TOKENANO = BPE.TokeNanoCore(\n",
|
|
|
|
|
" VOCABULARY,\n",
|
|
|
|
|
" SPECIAL_VOC\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"TOKENIZATION = TOKENANO.encode(TEXT)\n",
|
|
|
|
|
"print(TOKENIZATION)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"TOKEN_SPACE_SIZE = TOKENANO.vocabulary_size\n",
|
|
|
|
|
"EMBEDDED_SIZE = 256\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"EMBEDDER = Embedder.NanoSocratesEmbedder(TOKEN_SPACE_SIZE, EMBEDDED_SIZE)\n",
|
|
|
|
|
"TENSOR: torch.Tensor = EMBEDDER(TOKENIZATION)\n",
|
|
|
|
|
"print(len(TOKENIZATION))\n",
|
|
|
|
|
"print(TENSOR.shape)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(TENSOR)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"TOKENS, DIMENSIONS = TENSOR.shape\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"plt.figure(figsize=(12,8))\n",
|
|
|
|
|
"plt.pcolormesh(TENSOR.detach().numpy(), cmap='viridis')\n",
|
|
|
|
|
"plt.xlabel('Embedding Dimensions')\n",
|
|
|
|
|
"plt.xlim((0, DIMENSIONS))\n",
|
|
|
|
|
"plt.ylim((TOKENS,0))\n",
|
|
|
|
|
"plt.ylabel('Token Position')\n",
|
|
|
|
|
"plt.colorbar()\n",
|
|
|
|
|
"plt.show()\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "deep_learning",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.13.7"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 5
|
|
|
|
|
}
|