diff --git a/Playgrounds/nanosocrates-train-toy.ipynb b/Playgrounds/nanosocrates-train-toy.ipynb index 44593d0..53dcaa7 100644 --- a/Playgrounds/nanosocrates-train-toy.ipynb +++ b/Playgrounds/nanosocrates-train-toy.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "adbd9598", "metadata": {}, "outputs": [ @@ -8357,7 +8357,7 @@ " ):\n", " decoded_sentence = TOKENANO.decode(encoded_sentence)\n", " decoded_target = TOKENANO.decode(expected_sentence)\n", - " print(f\"\\tACTUAL:\\n\\t{decoded_sentence}\\n\\tEXPECTED:\\n\\t{decoded_target}\\n\")\n", + " print(f\"\\tACTUAL:\\n\\t\\t{decoded_sentence}\\n\\tEXPECTED:\\n\\t\\t{decoded_target}\\n\")\n", "\n", "\n", "\n", diff --git a/Playgrounds/prova.py b/Playgrounds/prova.py index 39ee7f3..f05f239 100644 --- a/Playgrounds/prova.py +++ b/Playgrounds/prova.py @@ -27,9 +27,9 @@ TOKENANO = BPE.TokeNanoCore(VOCABULARY, SPECIAL_VOC) TOKEN_SPACE_SIZE = TOKENANO.vocabulary_size + 1 EMBEDDED_SIZE = 256 FEED_FORWARD_MULTIPLIER = 4 -ATTENTION_HEADS = 4 +ATTENTION_HEADS = 8 SENTENCE_LENGTH = 256 -NUMBER_OF_BLOCKS = 2 +NUMBER_OF_BLOCKS = 4 MAX_EPOCHS = int(1e3) @@ -72,6 +72,25 @@ for index, row in TOY_DATASET.iterrows(): TOY_BATCH_TARGET_LIST.append(output_tokens) TOY_BATCH_DECODER_DEFAULT.append(decoder_default_tokens) + output_tokens = TOKENANO.encode(RDFs) + input_tokens = TOKENANO.encode(Abstract)[1:] + decoder_default_tokens = TOKENANO.encode("") + + input_tokens, padding = Transformer.normalize_sequence( + input_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN + ) + output_tokens, _ = Transformer.normalize_sequence( + output_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN + ) + decoder_default_tokens, _ = Transformer.normalize_sequence( + decoder_default_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN, False + ) + + TOY_BATCH_INPUT_LIST.append(input_tokens) + TOY_BATCH_PADDING_LIST.append(padding) + TOY_BATCH_TARGET_LIST.append(output_tokens) + TOY_BATCH_DECODER_DEFAULT.append(decoder_default_tokens) + # Training loop LOSS_HISTORY = [] NANOSOCRATES = Transformer.TrainingModel( @@ -91,15 +110,14 @@ while current_epoch < MAX_EPOCHS: optimizer.zero_grad() - encoder_list = torch.tensor([TOY_BATCH_INPUT_LIST[0]]) - decoder_list = torch.tensor([TOY_BATCH_DECODER_DEFAULT[0]]) - src_padding = torch.tensor([TOY_BATCH_PADDING_LIST[0]], dtype=torch.bool) + encoder_list = torch.tensor(TOY_BATCH_INPUT_LIST[:]) + decoder_list = torch.tensor(TOY_BATCH_DECODER_DEFAULT[:]) + src_padding = torch.tensor(TOY_BATCH_PADDING_LIST[:], dtype=torch.bool) # Transform target into logits - target_logits = torch.tensor([TOY_BATCH_TARGET_LIST[0]]) + target_logits = torch.tensor(TOY_BATCH_TARGET_LIST[:]) last_loss = 0 - loss_list = [] last_prediction: torch.Tensor for i in range(0, SENTENCE_LENGTH): @@ -116,11 +134,10 @@ while current_epoch < MAX_EPOCHS: logits = logits[:,:i,:] logits = logits.permute(0, 2, 1) - loss : torch.Tensor = cross_entropy(logits, target_logits[:, :i]) + loss : torch.Tensor = cross_entropy(logits, target_logits[:, 0:i]) # loss : torch.Tensor = cross_entropy(logits, target_logits) last_loss = loss - loss_list.append(loss.item()) loss.backward() optimizer.step() scheduler.step() @@ -136,10 +153,7 @@ while current_epoch < MAX_EPOCHS: current_epoch += 1 if current_epoch % 1 == 0: - loss_list = loss_list[1:] print(f"EPOCH {current_epoch}\n\tLoss: {last_loss}") - print(f"ALL LOSS HISTORY:{loss_list}") - print(f"Max loss:{max(loss_list)}, Min loss: {min(loss_list)}") for encoded_sentence, expected_sentence in zip( Transformer.tensor2token(last_prediction[:,:], END_TOKEN), # type: ignore @@ -147,7 +161,7 @@ while current_epoch < MAX_EPOCHS: ): decoded_sentence = TOKENANO.decode(encoded_sentence) decoded_target = TOKENANO.decode(expected_sentence) - print(f"ACTUAL:\n\t\t{decoded_sentence}\nEXPECTED:\n\t\t{decoded_target}") + print(f"\tACTUAL:\n\t\t{decoded_sentence}\n\tEXPECTED:\n\t\t{decoded_target}\n")