Last fixes

This commit is contained in:
Christian Risi
2025-10-17 22:17:24 +02:00
parent 540b78204c
commit b79521995c
17 changed files with 50 additions and 6041 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
Assets/Model/curated/log_loss.csv LFS Normal file

Binary file not shown.
1 version https://git-lfs.github.com/spec/v1
2 oid sha256:203b6cb364cf95cbb6cc0ebbff9e8b80e80dda73ff210ad91edeedf6024f6ab1
3 size 2876

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,3 +1,4 @@
import random
import torch
from pathlib import Path
import Project_Model.Libs.BPE as BPE
@@ -10,14 +11,18 @@ import Project_Model.Libs.Batch as Batch
DEVICE = torch_shims.get_default_device()
torch.set_default_device(DEVICE)
# set a fixed seed
torch.manual_seed(0)
random.seed(0)
# Get paths
# MODEL_DIR = "Assets/Model/curated"
MODEL_DIR= "Assets/Dataset/Tmp"
MODEL_DIR = "Assets/Model/curated"
# MODEL_DIR= "Assets/Dataset/Tmp"
VOCABULARY_PATH = Path("Assets/Model/small/bpe-small-16.json")
TRAIN_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/train.csv")
VALIDATION_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/evaluation.csv")
TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/test.csv")
TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/toy/rdf_text.csv")
# TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/toy/rdf_text.csv")
MODEL_PATH = Path(f"{MODEL_DIR}/NanoSocrates.zip")
@@ -44,6 +49,7 @@ SUBJ_TOKEN = TOKENANO.encode("<SUBJ>")[0]
REL_TOKEN = TOKENANO.encode("<PRED>")[0]
OBJ_TOKEN = TOKENANO.encode("<OBJ>")[0]
MASK_TOKEN = TOKENANO.encode("<MASK>")[0]
CONTINUTE_TOKEN = TOKENANO.encode("<CONTINUERDF>")[0]
SPECIAL_TOKENS: set[int] = set(TOKENANO.encode("".join(BPE.default_special_tokens())))
ALLOWED_TOKENS = set([SUBJ_TOKEN, REL_TOKEN, OBJ_TOKEN])
@@ -51,7 +57,7 @@ FORBIDDEN_TOKENS = SPECIAL_TOKENS - ALLOWED_TOKENS
# Spanned_Masker
MASKER = Transformer.SpannedMasker(REAL_TOKEN_SPACE_SIZE, FORBIDDEN_TOKENS)
MASKER = Transformer.SpannedMasker(REAL_TOKEN_SPACE_SIZE, FORBIDDEN_TOKENS, average_span=4)
TRAIN_BATCHER = Batch.Batcher(TRAIN_DATASET_PATH, SENTENCE_LENGTH, TOKENANO, MASKER)
VALIDATION_BATCHER = Batch.Batcher(
@@ -75,6 +81,7 @@ NANOSOCRATES = Transformer.NanoSocratesCore(
SOS_TOKEN,
PAD_TOKEN,
END_TOKEN,
CONTINUTE_TOKEN,
EMBEDDED_SIZE,
FEED_FORWARD_MULTIPLIER,
ATTENTION_HEADS,

View File

@@ -110,7 +110,7 @@ _, ENCODER_ONLY, DECODER_ONLY = TUtils.decompose_nano_socrates(
# Training constants
nano_cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
encoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
encoder_ce = torch.nn.CrossEntropyLoss( label_smoothing=LABEL_SMOOTHING)
decoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
nano_optim = torch.optim.AdamW(NANOSOCRATES.parameters(), LEARNING_RATE)
encoder_only_optim = torch.optim.AdamW(ENCODER_ONLY.parameters(), LEARNING_RATE)
@@ -236,11 +236,24 @@ while current_epoch < MAX_EPOCHS:
pred_logits = ENCODER_ONLY((enc_x, enc_x_pad))
pred_logits = pred_logits.permute(0, 2, 1)
# print(torch.max(tgt))
loss: torch.Tensor = encoder_ce(pred_logits, tgt)
loss.backward()
encoder_only_optim.step()
exp_tokens: list[int] = tgt_y[0]
exp_tokens = list(map(lambda x: MASK_TOKEN if x > TOKENANO.vocabulary_size else x, exp_tokens))
exp_string = TOKENANO.decode(exp_tokens)
enc_tokens: list[int] = src_x[0]
enc_tokens = list(map(lambda x: MASK_TOKEN if x > TOKENANO.vocabulary_size else x, enc_tokens))
enc_string = TOKENANO.decode(enc_tokens)
print(f"PROMPT:\n{enc_string}")
print(f"EXPECTED:\n{exp_string}")
encoder_batch_losses.append(loss.item())
continue

View File

@@ -1,10 +1,14 @@
import os
from pathlib import Path
class Log:
def __init__(self, path):
self.path = path
header = ["epoch","avg_txt","avg_enc","avg_dec","txt_loss","masking_loss","prediction_loss"]
if Path(path).is_file():
return
with open(self.path, "w", encoding="utf-8", newline="") as f:
f.write(",".join(header) + "\n")

View File

@@ -14,6 +14,7 @@ class NanoSocratesCore(torch.nn.Module):
sos: int,
pad: int,
eos: int,
continuerdf: int,
latent_space: int = 256,
feed_forward_multiplier: int = 4,
attention_heads: int = 4,
@@ -24,6 +25,7 @@ class NanoSocratesCore(torch.nn.Module):
self.__sos = sos
self.__pad = pad
self.__eos = eos
self.__continuerdf = continuerdf
self.__sentence_len = sentence_max_length
feed_forward_latent_space = latent_space * feed_forward_multiplier
@@ -156,7 +158,9 @@ class NanoSocratesCore(torch.nn.Module):
decoder_in_pad_mask = decoder_in.eq(self.__pad)
continue_generating = True
token_idx = 0
token_idx: int= int((decoder_in[0] == self.__continuerdf).nonzero()[0].item()) + 1
while continue_generating:

File diff suppressed because it is too large Load Diff

Binary file not shown.