diff --git a/Playgrounds/prova.py b/Playgrounds/prova.py index f05f239..ef42ac9 100644 --- a/Playgrounds/prova.py +++ b/Playgrounds/prova.py @@ -1,4 +1,5 @@ import random +import time import torch import pandas as pd from pathlib import Path @@ -26,7 +27,7 @@ TOKENANO = BPE.TokeNanoCore(VOCABULARY, SPECIAL_VOC) # Constants TOKEN_SPACE_SIZE = TOKENANO.vocabulary_size + 1 EMBEDDED_SIZE = 256 -FEED_FORWARD_MULTIPLIER = 4 +FEED_FORWARD_MULTIPLIER = 4 ATTENTION_HEADS = 8 SENTENCE_LENGTH = 256 NUMBER_OF_BLOCKS = 4 @@ -45,7 +46,7 @@ TOY_DATASET = pd.read_csv(TOY_DATASET_PATH) TOY_BATCH_INPUT_LIST: list[list[int]] = [] TOY_BATCH_PADDING_LIST: list[list[bool]] = [] TOY_BATCH_TARGET_LIST: list[list[int]] = [] -TOY_BATCH_DECODER_DEFAULT: list[list[int]]= [] +TOY_BATCH_DECODER_DEFAULT: list[list[int]] = [] for index, row in TOY_DATASET.iterrows(): @@ -98,7 +99,7 @@ NANOSOCRATES = Transformer.TrainingModel( EMBEDDED_SIZE, FEED_FORWARD_MULTIPLIER, ATTENTION_HEADS, - NUMBER_OF_BLOCKS + NUMBER_OF_BLOCKS, ) cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN) optimizer = torch.optim.AdamW(NANOSOCRATES.parameters()) @@ -120,21 +121,31 @@ while current_epoch < MAX_EPOCHS: last_loss = 0 last_prediction: torch.Tensor + LOSS_HISTORY = [] + + start = time.time_ns() + + for i in range(0, SENTENCE_LENGTH): + optimizer.zero_grad() tgt_padding = decoder_list.eq(PAD_TOKEN) - logits: torch.Tensor = NANOSOCRATES((encoder_list, src_padding, decoder_list, tgt_padding)) + logits: torch.Tensor = NANOSOCRATES( + (encoder_list, src_padding, decoder_list, tgt_padding) + ) prob = torch.softmax(logits, 2) most_probable_tokens = torch.argmax(prob, 2) last_prediction = most_probable_tokens - logits = logits[:,:i,:] - logits = logits.permute(0, 2, 1) + logits = logits[:, i, :] + # logits = logits.permute(0, 2, 1) - loss : torch.Tensor = cross_entropy(logits, target_logits[:, 0:i]) + loss: torch.Tensor = cross_entropy(logits, target_logits[:, i]) + LOSS_HISTORY.append(loss.item()) + # loss : torch.Tensor = cross_entropy(logits, target_logits[:, 0:i]) # loss : torch.Tensor = cross_entropy(logits, target_logits) last_loss = loss @@ -143,28 +154,24 @@ while current_epoch < MAX_EPOCHS: scheduler.step() if i < SENTENCE_LENGTH - 1: - decoder_list[:,i+1] = target_logits[:,i] - - - - - + decoder_list[:, i + 1] = target_logits[:, i] current_epoch += 1 + end = time.time_ns() + if current_epoch % 1 == 0: - print(f"EPOCH {current_epoch}\n\tLoss: {last_loss}") - - for encoded_sentence, expected_sentence in zip( - Transformer.tensor2token(last_prediction[:,:], END_TOKEN), # type: ignore - Transformer.tensor2token(target_logits[:,:], END_TOKEN) - ): - decoded_sentence = TOKENANO.decode(encoded_sentence) - decoded_target = TOKENANO.decode(expected_sentence) - print(f"\tACTUAL:\n\t\t{decoded_sentence}\n\tEXPECTED:\n\t\t{decoded_target}\n") - - - - - - + MIN_LOSS = min(LOSS_HISTORY) + MAX_LOSS = max(LOSS_HISTORY) + AVERAGE_LOSS = sum(LOSS_HISTORY)/len(LOSS_HISTORY) + print(f"EPOCH {current_epoch}\n\tTime: {(end-start)/1E9}s\n\tLoss: {last_loss}") + print(f"\tMin Loss: {MIN_LOSS}\tAvg Loss: {AVERAGE_LOSS}\tMax Loss: {MAX_LOSS}\n") + # for encoded_sentence, expected_sentence in zip( + # Transformer.tensor2token(last_prediction[:, :], END_TOKEN), # type: ignore + # Transformer.tensor2token(target_logits[:, :], END_TOKEN), + # ): + # decoded_sentence = TOKENANO.decode(encoded_sentence) + # decoded_target = TOKENANO.decode(expected_sentence) + # print( + # f"\tACTUAL:\n\t\t{decoded_sentence}\n\tEXPECTED:\n\t\t{decoded_target}\n" + # )