diff --git a/Playgrounds/nanosocrates-train-toy.ipynb b/Playgrounds/nanosocrates-train-toy.ipynb index 262ea7e..44593d0 100644 --- a/Playgrounds/nanosocrates-train-toy.ipynb +++ b/Playgrounds/nanosocrates-train-toy.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "adbd9598", "metadata": {}, "outputs": [ @@ -8171,6 +8171,26 @@ "\tdbp-dbr:List_of_Honest_Trailers_episodesdbp-dbp:titledbp-dbr:Knives_Outdbp-dbr:Knives_Outdbp-dbp:captionTheatrical release posterdbp-dbr:Knives_Outdbp-dbp:directordbp-dbr:Rian_Johnsondbp-dbr:Knives_Outdbp-dbp:distributordbp-dbr:Lionsgate_Filmsdbp-dbr:Knives_Outdbp-dbp:producerRian Johnsondbp-dbr:Knives_Outdbp-dbp:producerRam Bergmandbp-dbr:Knives_Outdbp-dbp:starringChris Evansdbp-dbr:Knives_Outdbp-dbp:starringDaniel Craigdbp-dbr:Knives_Outdbp-dbp:starringMichael Shannondbp-dbr:Knives_Outdbp-dbp:starringChristopher Plummerdbp-dbr:Knives_Outdbp-dbp:starringJamie Lee Curtisdbp-dbr:Knives_Outdbp-dbp:starringToni Collettedbp-dbr:Knives_Outdbp-dbp:starringAna de Armasdbp-dbr:Knives_Outdbp-dbp:starringDon Johnsondbp-dbr:Knives_Outdbp-dbp:starringLaKeith Stanfielddbp-dbr:Knives_Outdbp-dbp:starringJaeden Martelldbp-dbr:Knives_Outdbp-dbp:starringKatherine Langforddbp-dbr:Knives_Outdbp-dbp:studioMRCdbp-dbr:Knives_Outdbp-dbp:studioT-Streetdbp-dbr:Knives_Outdbp-dbp:titleAwards for Knives Outdbp-dbr:Knives_Outdbp-dbp:writerRian Johnsondbp-dbr:Knives_Outpurl:linguistics/gold/hypernymdbp-dbr:Songdbp-dbr:Knives_Outdbp-dbo:directordbp-dbr:Rian_Johnsondbp-dbr:Knives_Outdbp-dbo:distributor\n", "\n" ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mKeyboardInterrupt\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 142\u001b[39m\n\u001b[32m 140\u001b[39m last_loss = loss\n\u001b[32m 141\u001b[39m loss.backward()\n\u001b[32m--> \u001b[39m\u001b[32m142\u001b[39m \u001b[43moptimizer\u001b[49m\u001b[43m.\u001b[49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 143\u001b[39m scheduler.step()\n\u001b[32m 145\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m i < SENTENCE_LENGTH - \u001b[32m1\u001b[39m:\n", + "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\Chris\\miniconda3\\envs\\deep_learning\\Lib\\site-packages\\torch\\optim\\lr_scheduler.py:133\u001b[39m, in \u001b[36mLRScheduler.__init__..patch_track_step_called..wrap_step..wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 131\u001b[39m opt = opt_ref()\n\u001b[32m 132\u001b[39m opt._opt_called = \u001b[38;5;28;01mTrue\u001b[39;00m \u001b[38;5;66;03m# type: ignore[union-attr]\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m133\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m.\u001b[49m\u001b[34;43m__get__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mopt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopt\u001b[49m\u001b[43m.\u001b[49m\u001b[34;43m__class__\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\Chris\\miniconda3\\envs\\deep_learning\\Lib\\site-packages\\torch\\optim\\optimizer.py:516\u001b[39m, in \u001b[36mOptimizer.profile_hook_step..wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 511\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 512\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[32m 513\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m must return None or a tuple of (new_args, new_kwargs), but got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresult\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 514\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m516\u001b[39m out = \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 517\u001b[39m \u001b[38;5;28mself\u001b[39m._optimizer_step_code()\n\u001b[32m 519\u001b[39m \u001b[38;5;66;03m# call optimizer step post hooks\u001b[39;00m\n", + "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\Chris\\miniconda3\\envs\\deep_learning\\Lib\\site-packages\\torch\\optim\\optimizer.py:81\u001b[39m, in \u001b[36m_use_grad_for_differentiable.._use_grad\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 79\u001b[39m torch.set_grad_enabled(\u001b[38;5;28mself\u001b[39m.defaults[\u001b[33m\"\u001b[39m\u001b[33mdifferentiable\u001b[39m\u001b[33m\"\u001b[39m])\n\u001b[32m 80\u001b[39m torch._dynamo.graph_break()\n\u001b[32m---> \u001b[39m\u001b[32m81\u001b[39m ret = \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 82\u001b[39m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[32m 83\u001b[39m torch._dynamo.graph_break()\n", + "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\Chris\\miniconda3\\envs\\deep_learning\\Lib\\site-packages\\torch\\optim\\adam.py:247\u001b[39m, in \u001b[36mAdam.step\u001b[39m\u001b[34m(self, closure)\u001b[39m\n\u001b[32m 235\u001b[39m beta1, beta2 = group[\u001b[33m\"\u001b[39m\u001b[33mbetas\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m 237\u001b[39m has_complex = \u001b[38;5;28mself\u001b[39m._init_group(\n\u001b[32m 238\u001b[39m group,\n\u001b[32m 239\u001b[39m params_with_grad,\n\u001b[32m (...)\u001b[39m\u001b[32m 244\u001b[39m state_steps,\n\u001b[32m 245\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m247\u001b[39m \u001b[43madam\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 248\u001b[39m \u001b[43m \u001b[49m\u001b[43mparams_with_grad\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 249\u001b[39m \u001b[43m \u001b[49m\u001b[43mgrads\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 250\u001b[39m \u001b[43m \u001b[49m\u001b[43mexp_avgs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 251\u001b[39m \u001b[43m \u001b[49m\u001b[43mexp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 252\u001b[39m \u001b[43m \u001b[49m\u001b[43mmax_exp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 253\u001b[39m \u001b[43m \u001b[49m\u001b[43mstate_steps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 254\u001b[39m \u001b[43m \u001b[49m\u001b[43mamsgrad\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mamsgrad\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 255\u001b[39m \u001b[43m \u001b[49m\u001b[43mhas_complex\u001b[49m\u001b[43m=\u001b[49m\u001b[43mhas_complex\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 256\u001b[39m \u001b[43m \u001b[49m\u001b[43mbeta1\u001b[49m\u001b[43m=\u001b[49m\u001b[43mbeta1\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 257\u001b[39m \u001b[43m \u001b[49m\u001b[43mbeta2\u001b[49m\u001b[43m=\u001b[49m\u001b[43mbeta2\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 258\u001b[39m \u001b[43m \u001b[49m\u001b[43mlr\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mlr\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 259\u001b[39m \u001b[43m \u001b[49m\u001b[43mweight_decay\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mweight_decay\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 260\u001b[39m \u001b[43m \u001b[49m\u001b[43meps\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43meps\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 261\u001b[39m \u001b[43m \u001b[49m\u001b[43mmaximize\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmaximize\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 262\u001b[39m \u001b[43m \u001b[49m\u001b[43mforeach\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mforeach\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 263\u001b[39m \u001b[43m \u001b[49m\u001b[43mcapturable\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcapturable\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 264\u001b[39m \u001b[43m \u001b[49m\u001b[43mdifferentiable\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mdifferentiable\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 265\u001b[39m \u001b[43m \u001b[49m\u001b[43mfused\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mfused\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 266\u001b[39m \u001b[43m \u001b[49m\u001b[43mgrad_scale\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mgrad_scale\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 267\u001b[39m \u001b[43m \u001b[49m\u001b[43mfound_inf\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mfound_inf\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 268\u001b[39m \u001b[43m \u001b[49m\u001b[43mdecoupled_weight_decay\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgroup\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mdecoupled_weight_decay\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 269\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 271\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m loss\n", + "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\Chris\\miniconda3\\envs\\deep_learning\\Lib\\site-packages\\torch\\optim\\optimizer.py:149\u001b[39m, in \u001b[36m_disable_dynamo_if_unsupported..wrapper..maybe_fallback\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 147\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m disabled_func(*args, **kwargs)\n\u001b[32m 148\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m149\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\Chris\\miniconda3\\envs\\deep_learning\\Lib\\site-packages\\torch\\optim\\adam.py:949\u001b[39m, in \u001b[36madam\u001b[39m\u001b[34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, foreach, capturable, differentiable, fused, grad_scale, found_inf, has_complex, decoupled_weight_decay, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize)\u001b[39m\n\u001b[32m 946\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 947\u001b[39m func = _single_tensor_adam\n\u001b[32m--> \u001b[39m\u001b[32m949\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 950\u001b[39m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 951\u001b[39m \u001b[43m \u001b[49m\u001b[43mgrads\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 952\u001b[39m \u001b[43m \u001b[49m\u001b[43mexp_avgs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 953\u001b[39m \u001b[43m \u001b[49m\u001b[43mexp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 954\u001b[39m \u001b[43m \u001b[49m\u001b[43mmax_exp_avg_sqs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 955\u001b[39m \u001b[43m \u001b[49m\u001b[43mstate_steps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 956\u001b[39m \u001b[43m \u001b[49m\u001b[43mamsgrad\u001b[49m\u001b[43m=\u001b[49m\u001b[43mamsgrad\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 957\u001b[39m \u001b[43m \u001b[49m\u001b[43mhas_complex\u001b[49m\u001b[43m=\u001b[49m\u001b[43mhas_complex\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 958\u001b[39m \u001b[43m \u001b[49m\u001b[43mbeta1\u001b[49m\u001b[43m=\u001b[49m\u001b[43mbeta1\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 959\u001b[39m \u001b[43m \u001b[49m\u001b[43mbeta2\u001b[49m\u001b[43m=\u001b[49m\u001b[43mbeta2\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 960\u001b[39m \u001b[43m \u001b[49m\u001b[43mlr\u001b[49m\u001b[43m=\u001b[49m\u001b[43mlr\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 961\u001b[39m \u001b[43m \u001b[49m\u001b[43mweight_decay\u001b[49m\u001b[43m=\u001b[49m\u001b[43mweight_decay\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 962\u001b[39m \u001b[43m \u001b[49m\u001b[43meps\u001b[49m\u001b[43m=\u001b[49m\u001b[43meps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 963\u001b[39m \u001b[43m \u001b[49m\u001b[43mmaximize\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmaximize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 964\u001b[39m \u001b[43m \u001b[49m\u001b[43mcapturable\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcapturable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 965\u001b[39m \u001b[43m \u001b[49m\u001b[43mdifferentiable\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdifferentiable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 966\u001b[39m \u001b[43m \u001b[49m\u001b[43mgrad_scale\u001b[49m\u001b[43m=\u001b[49m\u001b[43mgrad_scale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 967\u001b[39m \u001b[43m \u001b[49m\u001b[43mfound_inf\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfound_inf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 968\u001b[39m \u001b[43m \u001b[49m\u001b[43mdecoupled_weight_decay\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdecoupled_weight_decay\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 969\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\Chris\\miniconda3\\envs\\deep_learning\\Lib\\site-packages\\torch\\optim\\adam.py:755\u001b[39m, in \u001b[36m_multi_tensor_adam\u001b[39m\u001b[34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, grad_scale, found_inf, amsgrad, has_complex, beta1, beta2, lr, weight_decay, eps, maximize, capturable, differentiable, decoupled_weight_decay)\u001b[39m\n\u001b[32m 752\u001b[39m torch._foreach_addcdiv_(device_params, device_exp_avgs, exp_avg_sq_sqrt)\n\u001b[32m 753\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 754\u001b[39m bias_correction1 = [\n\u001b[32m--> \u001b[39m\u001b[32m755\u001b[39m \u001b[32m1\u001b[39m - beta1 ** \u001b[43m_get_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstep\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m step \u001b[38;5;129;01min\u001b[39;00m device_state_steps\n\u001b[32m 756\u001b[39m ]\n\u001b[32m 757\u001b[39m bias_correction2 = [\n\u001b[32m 758\u001b[39m \u001b[32m1\u001b[39m - beta2 ** _get_value(step) \u001b[38;5;28;01mfor\u001b[39;00m step \u001b[38;5;129;01min\u001b[39;00m device_state_steps\n\u001b[32m 759\u001b[39m ]\n\u001b[32m 761\u001b[39m step_size = _stack_if_compiling([(lr / bc) * -\u001b[32m1\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m bc \u001b[38;5;129;01min\u001b[39;00m bias_correction1])\n", + "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\Chris\\miniconda3\\envs\\deep_learning\\Lib\\site-packages\\torch\\optim\\optimizer.py:96\u001b[39m, in \u001b[36m_get_value\u001b[39m\u001b[34m(x)\u001b[39m\n\u001b[32m 94\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m x\n\u001b[32m 95\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m96\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mx\u001b[49m\u001b[43m.\u001b[49m\u001b[43mitem\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(x, torch.Tensor) \u001b[38;5;28;01melse\u001b[39;00m x\n", + "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\Chris\\miniconda3\\envs\\deep_learning\\Lib\\site-packages\\torch\\utils\\_device.py:103\u001b[39m, in \u001b[36mDeviceContext.__torch_function__\u001b[39m\u001b[34m(self, func, types, args, kwargs)\u001b[39m\n\u001b[32m 101\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m func \u001b[38;5;129;01min\u001b[39;00m _device_constructors() \u001b[38;5;129;01mand\u001b[39;00m kwargs.get(\u001b[33m\"\u001b[39m\u001b[33mdevice\u001b[39m\u001b[33m\"\u001b[39m) \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 102\u001b[39m kwargs[\u001b[33m\"\u001b[39m\u001b[33mdevice\u001b[39m\u001b[33m\"\u001b[39m] = \u001b[38;5;28mself\u001b[39m.device\n\u001b[32m--> \u001b[39m\u001b[32m103\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[31mKeyboardInterrupt\u001b[39m: " + ] } ], "source": [ diff --git a/Playgrounds/prova.py b/Playgrounds/prova.py index b81e5fc..39ee7f3 100644 --- a/Playgrounds/prova.py +++ b/Playgrounds/prova.py @@ -147,7 +147,7 @@ while current_epoch < MAX_EPOCHS: ): decoded_sentence = TOKENANO.decode(encoded_sentence) decoded_target = TOKENANO.decode(expected_sentence) - print(f"ACTUAL:\n\t{decoded_sentence}\nEXPECTED:\n\t{decoded_target}") + print(f"ACTUAL:\n\t\t{decoded_sentence}\nEXPECTED:\n\t\t{decoded_target}")