import random import torch import pandas as pd from pathlib import Path import Project_Model.Libs.Embedder as Embedder import Project_Model.Libs.BPE as BPE import Project_Model.Libs.Transformer as Transformer import Project_Model.Libs.TorchShims as torch_shims # set a fixed seed torch.manual_seed(0) random.seed(0) DEVICE = torch_shims.get_default_device() torch.set_default_device(DEVICE) # set a default device # BPE Init VOCABULARY_PATH = Path("Assets/Model/toy_10/toy_dictionary.json") SPECIAL_VOC = BPE.default_special_tokens() VOCABULARY = BPE.load_nanos_vocabulary(VOCABULARY_PATH) TOKENANO = BPE.TokeNanoCore(VOCABULARY, SPECIAL_VOC) # Constants TOKEN_SPACE_SIZE = TOKENANO.vocabulary_size + 1 EMBEDDED_SIZE = 256 FEED_FORWARD_MULTIPLIER = 4 ATTENTION_HEADS = 8 SENTENCE_LENGTH = 256 NUMBER_OF_BLOCKS = 4 MAX_EPOCHS = int(1e3) PAD_TOKEN = TOKENANO.encode("")[0] END_TOKEN = TOKENANO.encode("")[0] # Load CSV TOY_DATASET_PATH = Path("Assets/Dataset/1-hop/toy/rdf_text.csv") TOY_DATASET = pd.read_csv(TOY_DATASET_PATH) TOY_BATCH_INPUT_LIST: list[list[int]] = [] TOY_BATCH_PADDING_LIST: list[list[bool]] = [] TOY_BATCH_TARGET_LIST: list[list[int]] = [] TOY_BATCH_DECODER_DEFAULT: list[list[int]]= [] for index, row in TOY_DATASET.iterrows(): RDFs: str = row["RDFs"] Abstract: str = row["Abstract"] input_tokens = TOKENANO.encode(RDFs) output_tokens = TOKENANO.encode(Abstract)[1:] decoder_default_tokens = TOKENANO.encode("") input_tokens, padding = Transformer.normalize_sequence( input_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN ) output_tokens, _ = Transformer.normalize_sequence( output_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN ) decoder_default_tokens, _ = Transformer.normalize_sequence( decoder_default_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN, False ) TOY_BATCH_INPUT_LIST.append(input_tokens) TOY_BATCH_PADDING_LIST.append(padding) TOY_BATCH_TARGET_LIST.append(output_tokens) TOY_BATCH_DECODER_DEFAULT.append(decoder_default_tokens) output_tokens = TOKENANO.encode(RDFs) input_tokens = TOKENANO.encode(Abstract)[1:] decoder_default_tokens = TOKENANO.encode("") input_tokens, padding = Transformer.normalize_sequence( input_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN ) output_tokens, _ = Transformer.normalize_sequence( output_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN ) decoder_default_tokens, _ = Transformer.normalize_sequence( decoder_default_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN, False ) TOY_BATCH_INPUT_LIST.append(input_tokens) TOY_BATCH_PADDING_LIST.append(padding) TOY_BATCH_TARGET_LIST.append(output_tokens) TOY_BATCH_DECODER_DEFAULT.append(decoder_default_tokens) # Training loop LOSS_HISTORY = [] NANOSOCRATES = Transformer.TrainingModel( TOKEN_SPACE_SIZE, EMBEDDED_SIZE, FEED_FORWARD_MULTIPLIER, ATTENTION_HEADS, NUMBER_OF_BLOCKS ) cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN) optimizer = torch.optim.AdamW(NANOSOCRATES.parameters()) scheduler = Transformer.WarmupLR(optimizer, 4000, EMBEDDED_SIZE) last_loss = 0 current_epoch = 0 while current_epoch < MAX_EPOCHS: optimizer.zero_grad() encoder_list = torch.tensor(TOY_BATCH_INPUT_LIST[:]) decoder_list = torch.tensor(TOY_BATCH_DECODER_DEFAULT[:]) src_padding = torch.tensor(TOY_BATCH_PADDING_LIST[:], dtype=torch.bool) # Transform target into logits target_logits = torch.tensor(TOY_BATCH_TARGET_LIST[:]) last_loss = 0 last_prediction: torch.Tensor for i in range(0, SENTENCE_LENGTH): optimizer.zero_grad() tgt_padding = decoder_list.eq(PAD_TOKEN) logits: torch.Tensor = NANOSOCRATES((encoder_list, src_padding, decoder_list, tgt_padding)) prob = torch.softmax(logits, 2) most_probable_tokens = torch.argmax(prob, 2) last_prediction = most_probable_tokens logits = logits[:,:i,:] logits = logits.permute(0, 2, 1) loss : torch.Tensor = cross_entropy(logits, target_logits[:, 0:i]) # loss : torch.Tensor = cross_entropy(logits, target_logits) last_loss = loss loss.backward() optimizer.step() scheduler.step() if i < SENTENCE_LENGTH - 1: decoder_list[:,i+1] = target_logits[:,i] current_epoch += 1 if current_epoch % 1 == 0: print(f"EPOCH {current_epoch}\n\tLoss: {last_loss}") for encoded_sentence, expected_sentence in zip( Transformer.tensor2token(last_prediction[:,:], END_TOKEN), # type: ignore Transformer.tensor2token(target_logits[:,:], END_TOKEN) ): decoded_sentence = TOKENANO.decode(encoded_sentence) decoded_target = TOKENANO.decode(expected_sentence) print(f"\tACTUAL:\n\t\t{decoded_sentence}\n\tEXPECTED:\n\t\t{decoded_target}\n")