Last fixes
This commit is contained in:
parent
540b78204c
commit
b79521995c
BIN
Assets/Model/curated/NanoSocrates.zip
(Stored with Git LFS)
BIN
Assets/Model/curated/NanoSocrates.zip
(Stored with Git LFS)
Binary file not shown.
BIN
Assets/Model/curated/dec_optim(5).zip
(Stored with Git LFS)
BIN
Assets/Model/curated/dec_optim(5).zip
(Stored with Git LFS)
Binary file not shown.
BIN
Assets/Model/curated/dec_optim.zip
(Stored with Git LFS)
Normal file
BIN
Assets/Model/curated/dec_optim.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
Assets/Model/curated/enc_optim(5).zip
(Stored with Git LFS)
BIN
Assets/Model/curated/enc_optim(5).zip
(Stored with Git LFS)
Binary file not shown.
BIN
Assets/Model/curated/enc_optim.zip
(Stored with Git LFS)
Normal file
BIN
Assets/Model/curated/enc_optim.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
Assets/Model/curated/last_epoch(5).txt
(Stored with Git LFS)
BIN
Assets/Model/curated/last_epoch(5).txt
(Stored with Git LFS)
Binary file not shown.
BIN
Assets/Model/curated/last_epoch.txt
(Stored with Git LFS)
Normal file
BIN
Assets/Model/curated/last_epoch.txt
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
Assets/Model/curated/log_loss.csv
(Stored with Git LFS)
Normal file
BIN
Assets/Model/curated/log_loss.csv
(Stored with Git LFS)
Normal file
Binary file not shown.
|
BIN
Assets/Model/curated/nano_optim(5).zip
(Stored with Git LFS)
BIN
Assets/Model/curated/nano_optim(5).zip
(Stored with Git LFS)
Binary file not shown.
BIN
Assets/Model/curated/nano_optim.zip
(Stored with Git LFS)
Normal file
BIN
Assets/Model/curated/nano_optim.zip
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
Assets/Model/curated/settings.txt
(Stored with Git LFS)
BIN
Assets/Model/curated/settings.txt
(Stored with Git LFS)
Binary file not shown.
@ -1,3 +1,4 @@
|
|||||||
|
import random
|
||||||
import torch
|
import torch
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import Project_Model.Libs.BPE as BPE
|
import Project_Model.Libs.BPE as BPE
|
||||||
@ -10,14 +11,18 @@ import Project_Model.Libs.Batch as Batch
|
|||||||
DEVICE = torch_shims.get_default_device()
|
DEVICE = torch_shims.get_default_device()
|
||||||
torch.set_default_device(DEVICE)
|
torch.set_default_device(DEVICE)
|
||||||
|
|
||||||
|
# set a fixed seed
|
||||||
|
torch.manual_seed(0)
|
||||||
|
random.seed(0)
|
||||||
|
|
||||||
# Get paths
|
# Get paths
|
||||||
# MODEL_DIR = "Assets/Model/curated"
|
MODEL_DIR = "Assets/Model/curated"
|
||||||
MODEL_DIR= "Assets/Dataset/Tmp"
|
# MODEL_DIR= "Assets/Dataset/Tmp"
|
||||||
VOCABULARY_PATH = Path("Assets/Model/small/bpe-small-16.json")
|
VOCABULARY_PATH = Path("Assets/Model/small/bpe-small-16.json")
|
||||||
TRAIN_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/train.csv")
|
TRAIN_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/train.csv")
|
||||||
VALIDATION_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/evaluation.csv")
|
VALIDATION_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/evaluation.csv")
|
||||||
TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/test.csv")
|
TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/test.csv")
|
||||||
TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/toy/rdf_text.csv")
|
# TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/toy/rdf_text.csv")
|
||||||
MODEL_PATH = Path(f"{MODEL_DIR}/NanoSocrates.zip")
|
MODEL_PATH = Path(f"{MODEL_DIR}/NanoSocrates.zip")
|
||||||
|
|
||||||
|
|
||||||
@ -44,6 +49,7 @@ SUBJ_TOKEN = TOKENANO.encode("<SUBJ>")[0]
|
|||||||
REL_TOKEN = TOKENANO.encode("<PRED>")[0]
|
REL_TOKEN = TOKENANO.encode("<PRED>")[0]
|
||||||
OBJ_TOKEN = TOKENANO.encode("<OBJ>")[0]
|
OBJ_TOKEN = TOKENANO.encode("<OBJ>")[0]
|
||||||
MASK_TOKEN = TOKENANO.encode("<MASK>")[0]
|
MASK_TOKEN = TOKENANO.encode("<MASK>")[0]
|
||||||
|
CONTINUTE_TOKEN = TOKENANO.encode("<CONTINUERDF>")[0]
|
||||||
|
|
||||||
SPECIAL_TOKENS: set[int] = set(TOKENANO.encode("".join(BPE.default_special_tokens())))
|
SPECIAL_TOKENS: set[int] = set(TOKENANO.encode("".join(BPE.default_special_tokens())))
|
||||||
ALLOWED_TOKENS = set([SUBJ_TOKEN, REL_TOKEN, OBJ_TOKEN])
|
ALLOWED_TOKENS = set([SUBJ_TOKEN, REL_TOKEN, OBJ_TOKEN])
|
||||||
@ -51,7 +57,7 @@ FORBIDDEN_TOKENS = SPECIAL_TOKENS - ALLOWED_TOKENS
|
|||||||
|
|
||||||
|
|
||||||
# Spanned_Masker
|
# Spanned_Masker
|
||||||
MASKER = Transformer.SpannedMasker(REAL_TOKEN_SPACE_SIZE, FORBIDDEN_TOKENS)
|
MASKER = Transformer.SpannedMasker(REAL_TOKEN_SPACE_SIZE, FORBIDDEN_TOKENS, average_span=4)
|
||||||
|
|
||||||
TRAIN_BATCHER = Batch.Batcher(TRAIN_DATASET_PATH, SENTENCE_LENGTH, TOKENANO, MASKER)
|
TRAIN_BATCHER = Batch.Batcher(TRAIN_DATASET_PATH, SENTENCE_LENGTH, TOKENANO, MASKER)
|
||||||
VALIDATION_BATCHER = Batch.Batcher(
|
VALIDATION_BATCHER = Batch.Batcher(
|
||||||
@ -75,6 +81,7 @@ NANOSOCRATES = Transformer.NanoSocratesCore(
|
|||||||
SOS_TOKEN,
|
SOS_TOKEN,
|
||||||
PAD_TOKEN,
|
PAD_TOKEN,
|
||||||
END_TOKEN,
|
END_TOKEN,
|
||||||
|
CONTINUTE_TOKEN,
|
||||||
EMBEDDED_SIZE,
|
EMBEDDED_SIZE,
|
||||||
FEED_FORWARD_MULTIPLIER,
|
FEED_FORWARD_MULTIPLIER,
|
||||||
ATTENTION_HEADS,
|
ATTENTION_HEADS,
|
||||||
|
|||||||
@ -110,7 +110,7 @@ _, ENCODER_ONLY, DECODER_ONLY = TUtils.decompose_nano_socrates(
|
|||||||
|
|
||||||
# Training constants
|
# Training constants
|
||||||
nano_cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
|
nano_cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
|
||||||
encoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
|
encoder_ce = torch.nn.CrossEntropyLoss( label_smoothing=LABEL_SMOOTHING)
|
||||||
decoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
|
decoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
|
||||||
nano_optim = torch.optim.AdamW(NANOSOCRATES.parameters(), LEARNING_RATE)
|
nano_optim = torch.optim.AdamW(NANOSOCRATES.parameters(), LEARNING_RATE)
|
||||||
encoder_only_optim = torch.optim.AdamW(ENCODER_ONLY.parameters(), LEARNING_RATE)
|
encoder_only_optim = torch.optim.AdamW(ENCODER_ONLY.parameters(), LEARNING_RATE)
|
||||||
@ -236,11 +236,24 @@ while current_epoch < MAX_EPOCHS:
|
|||||||
pred_logits = ENCODER_ONLY((enc_x, enc_x_pad))
|
pred_logits = ENCODER_ONLY((enc_x, enc_x_pad))
|
||||||
pred_logits = pred_logits.permute(0, 2, 1)
|
pred_logits = pred_logits.permute(0, 2, 1)
|
||||||
# print(torch.max(tgt))
|
# print(torch.max(tgt))
|
||||||
|
|
||||||
loss: torch.Tensor = encoder_ce(pred_logits, tgt)
|
loss: torch.Tensor = encoder_ce(pred_logits, tgt)
|
||||||
|
|
||||||
loss.backward()
|
loss.backward()
|
||||||
encoder_only_optim.step()
|
encoder_only_optim.step()
|
||||||
|
|
||||||
|
exp_tokens: list[int] = tgt_y[0]
|
||||||
|
exp_tokens = list(map(lambda x: MASK_TOKEN if x > TOKENANO.vocabulary_size else x, exp_tokens))
|
||||||
|
exp_string = TOKENANO.decode(exp_tokens)
|
||||||
|
|
||||||
|
enc_tokens: list[int] = src_x[0]
|
||||||
|
enc_tokens = list(map(lambda x: MASK_TOKEN if x > TOKENANO.vocabulary_size else x, enc_tokens))
|
||||||
|
enc_string = TOKENANO.decode(enc_tokens)
|
||||||
|
|
||||||
|
print(f"PROMPT:\n{enc_string}")
|
||||||
|
print(f"EXPECTED:\n{exp_string}")
|
||||||
|
|
||||||
|
|
||||||
encoder_batch_losses.append(loss.item())
|
encoder_batch_losses.append(loss.item())
|
||||||
|
|
||||||
continue
|
continue
|
||||||
|
|||||||
@ -1,10 +1,14 @@
|
|||||||
import os
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
class Log:
|
class Log:
|
||||||
def __init__(self, path):
|
def __init__(self, path):
|
||||||
self.path = path
|
self.path = path
|
||||||
header = ["epoch","avg_txt","avg_enc","avg_dec","txt_loss","masking_loss","prediction_loss"]
|
header = ["epoch","avg_txt","avg_enc","avg_dec","txt_loss","masking_loss","prediction_loss"]
|
||||||
|
|
||||||
|
if Path(path).is_file():
|
||||||
|
return
|
||||||
|
|
||||||
with open(self.path, "w", encoding="utf-8", newline="") as f:
|
with open(self.path, "w", encoding="utf-8", newline="") as f:
|
||||||
f.write(",".join(header) + "\n")
|
f.write(",".join(header) + "\n")
|
||||||
|
|
||||||
|
|||||||
@ -14,6 +14,7 @@ class NanoSocratesCore(torch.nn.Module):
|
|||||||
sos: int,
|
sos: int,
|
||||||
pad: int,
|
pad: int,
|
||||||
eos: int,
|
eos: int,
|
||||||
|
continuerdf: int,
|
||||||
latent_space: int = 256,
|
latent_space: int = 256,
|
||||||
feed_forward_multiplier: int = 4,
|
feed_forward_multiplier: int = 4,
|
||||||
attention_heads: int = 4,
|
attention_heads: int = 4,
|
||||||
@ -24,6 +25,7 @@ class NanoSocratesCore(torch.nn.Module):
|
|||||||
self.__sos = sos
|
self.__sos = sos
|
||||||
self.__pad = pad
|
self.__pad = pad
|
||||||
self.__eos = eos
|
self.__eos = eos
|
||||||
|
self.__continuerdf = continuerdf
|
||||||
self.__sentence_len = sentence_max_length
|
self.__sentence_len = sentence_max_length
|
||||||
|
|
||||||
feed_forward_latent_space = latent_space * feed_forward_multiplier
|
feed_forward_latent_space = latent_space * feed_forward_multiplier
|
||||||
@ -156,7 +158,9 @@ class NanoSocratesCore(torch.nn.Module):
|
|||||||
decoder_in_pad_mask = decoder_in.eq(self.__pad)
|
decoder_in_pad_mask = decoder_in.eq(self.__pad)
|
||||||
|
|
||||||
continue_generating = True
|
continue_generating = True
|
||||||
token_idx = 0
|
token_idx: int= int((decoder_in[0] == self.__continuerdf).nonzero()[0].item()) + 1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
while continue_generating:
|
while continue_generating:
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user