Last fixes
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import random
|
||||
import torch
|
||||
from pathlib import Path
|
||||
import Project_Model.Libs.BPE as BPE
|
||||
@@ -10,14 +11,18 @@ import Project_Model.Libs.Batch as Batch
|
||||
DEVICE = torch_shims.get_default_device()
|
||||
torch.set_default_device(DEVICE)
|
||||
|
||||
# set a fixed seed
|
||||
torch.manual_seed(0)
|
||||
random.seed(0)
|
||||
|
||||
# Get paths
|
||||
# MODEL_DIR = "Assets/Model/curated"
|
||||
MODEL_DIR= "Assets/Dataset/Tmp"
|
||||
MODEL_DIR = "Assets/Model/curated"
|
||||
# MODEL_DIR= "Assets/Dataset/Tmp"
|
||||
VOCABULARY_PATH = Path("Assets/Model/small/bpe-small-16.json")
|
||||
TRAIN_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/train.csv")
|
||||
VALIDATION_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/evaluation.csv")
|
||||
TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/test.csv")
|
||||
TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/toy/rdf_text.csv")
|
||||
# TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/toy/rdf_text.csv")
|
||||
MODEL_PATH = Path(f"{MODEL_DIR}/NanoSocrates.zip")
|
||||
|
||||
|
||||
@@ -44,6 +49,7 @@ SUBJ_TOKEN = TOKENANO.encode("<SUBJ>")[0]
|
||||
REL_TOKEN = TOKENANO.encode("<PRED>")[0]
|
||||
OBJ_TOKEN = TOKENANO.encode("<OBJ>")[0]
|
||||
MASK_TOKEN = TOKENANO.encode("<MASK>")[0]
|
||||
CONTINUTE_TOKEN = TOKENANO.encode("<CONTINUERDF>")[0]
|
||||
|
||||
SPECIAL_TOKENS: set[int] = set(TOKENANO.encode("".join(BPE.default_special_tokens())))
|
||||
ALLOWED_TOKENS = set([SUBJ_TOKEN, REL_TOKEN, OBJ_TOKEN])
|
||||
@@ -51,7 +57,7 @@ FORBIDDEN_TOKENS = SPECIAL_TOKENS - ALLOWED_TOKENS
|
||||
|
||||
|
||||
# Spanned_Masker
|
||||
MASKER = Transformer.SpannedMasker(REAL_TOKEN_SPACE_SIZE, FORBIDDEN_TOKENS)
|
||||
MASKER = Transformer.SpannedMasker(REAL_TOKEN_SPACE_SIZE, FORBIDDEN_TOKENS, average_span=4)
|
||||
|
||||
TRAIN_BATCHER = Batch.Batcher(TRAIN_DATASET_PATH, SENTENCE_LENGTH, TOKENANO, MASKER)
|
||||
VALIDATION_BATCHER = Batch.Batcher(
|
||||
@@ -75,6 +81,7 @@ NANOSOCRATES = Transformer.NanoSocratesCore(
|
||||
SOS_TOKEN,
|
||||
PAD_TOKEN,
|
||||
END_TOKEN,
|
||||
CONTINUTE_TOKEN,
|
||||
EMBEDDED_SIZE,
|
||||
FEED_FORWARD_MULTIPLIER,
|
||||
ATTENTION_HEADS,
|
||||
|
||||
@@ -110,7 +110,7 @@ _, ENCODER_ONLY, DECODER_ONLY = TUtils.decompose_nano_socrates(
|
||||
|
||||
# Training constants
|
||||
nano_cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
|
||||
encoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
|
||||
encoder_ce = torch.nn.CrossEntropyLoss( label_smoothing=LABEL_SMOOTHING)
|
||||
decoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
|
||||
nano_optim = torch.optim.AdamW(NANOSOCRATES.parameters(), LEARNING_RATE)
|
||||
encoder_only_optim = torch.optim.AdamW(ENCODER_ONLY.parameters(), LEARNING_RATE)
|
||||
@@ -236,11 +236,24 @@ while current_epoch < MAX_EPOCHS:
|
||||
pred_logits = ENCODER_ONLY((enc_x, enc_x_pad))
|
||||
pred_logits = pred_logits.permute(0, 2, 1)
|
||||
# print(torch.max(tgt))
|
||||
|
||||
loss: torch.Tensor = encoder_ce(pred_logits, tgt)
|
||||
|
||||
loss.backward()
|
||||
encoder_only_optim.step()
|
||||
|
||||
exp_tokens: list[int] = tgt_y[0]
|
||||
exp_tokens = list(map(lambda x: MASK_TOKEN if x > TOKENANO.vocabulary_size else x, exp_tokens))
|
||||
exp_string = TOKENANO.decode(exp_tokens)
|
||||
|
||||
enc_tokens: list[int] = src_x[0]
|
||||
enc_tokens = list(map(lambda x: MASK_TOKEN if x > TOKENANO.vocabulary_size else x, enc_tokens))
|
||||
enc_string = TOKENANO.decode(enc_tokens)
|
||||
|
||||
print(f"PROMPT:\n{enc_string}")
|
||||
print(f"EXPECTED:\n{exp_string}")
|
||||
|
||||
|
||||
encoder_batch_losses.append(loss.item())
|
||||
|
||||
continue
|
||||
|
||||
Reference in New Issue
Block a user