Last fixes

This commit is contained in:
Christian Risi
2025-10-17 22:17:24 +02:00
parent 540b78204c
commit b79521995c
17 changed files with 50 additions and 6041 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
Assets/Model/curated/log_loss.csv LFS Normal file

Binary file not shown.
1 version https://git-lfs.github.com/spec/v1
2 oid sha256:203b6cb364cf95cbb6cc0ebbff9e8b80e80dda73ff210ad91edeedf6024f6ab1
3 size 2876

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,3 +1,4 @@
import random
import torch import torch
from pathlib import Path from pathlib import Path
import Project_Model.Libs.BPE as BPE import Project_Model.Libs.BPE as BPE
@@ -10,14 +11,18 @@ import Project_Model.Libs.Batch as Batch
DEVICE = torch_shims.get_default_device() DEVICE = torch_shims.get_default_device()
torch.set_default_device(DEVICE) torch.set_default_device(DEVICE)
# set a fixed seed
torch.manual_seed(0)
random.seed(0)
# Get paths # Get paths
# MODEL_DIR = "Assets/Model/curated" MODEL_DIR = "Assets/Model/curated"
MODEL_DIR= "Assets/Dataset/Tmp" # MODEL_DIR= "Assets/Dataset/Tmp"
VOCABULARY_PATH = Path("Assets/Model/small/bpe-small-16.json") VOCABULARY_PATH = Path("Assets/Model/small/bpe-small-16.json")
TRAIN_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/train.csv") TRAIN_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/train.csv")
VALIDATION_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/evaluation.csv") VALIDATION_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/evaluation.csv")
TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/test.csv") TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/test.csv")
TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/toy/rdf_text.csv") # TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/toy/rdf_text.csv")
MODEL_PATH = Path(f"{MODEL_DIR}/NanoSocrates.zip") MODEL_PATH = Path(f"{MODEL_DIR}/NanoSocrates.zip")
@@ -44,6 +49,7 @@ SUBJ_TOKEN = TOKENANO.encode("<SUBJ>")[0]
REL_TOKEN = TOKENANO.encode("<PRED>")[0] REL_TOKEN = TOKENANO.encode("<PRED>")[0]
OBJ_TOKEN = TOKENANO.encode("<OBJ>")[0] OBJ_TOKEN = TOKENANO.encode("<OBJ>")[0]
MASK_TOKEN = TOKENANO.encode("<MASK>")[0] MASK_TOKEN = TOKENANO.encode("<MASK>")[0]
CONTINUTE_TOKEN = TOKENANO.encode("<CONTINUERDF>")[0]
SPECIAL_TOKENS: set[int] = set(TOKENANO.encode("".join(BPE.default_special_tokens()))) SPECIAL_TOKENS: set[int] = set(TOKENANO.encode("".join(BPE.default_special_tokens())))
ALLOWED_TOKENS = set([SUBJ_TOKEN, REL_TOKEN, OBJ_TOKEN]) ALLOWED_TOKENS = set([SUBJ_TOKEN, REL_TOKEN, OBJ_TOKEN])
@@ -51,7 +57,7 @@ FORBIDDEN_TOKENS = SPECIAL_TOKENS - ALLOWED_TOKENS
# Spanned_Masker # Spanned_Masker
MASKER = Transformer.SpannedMasker(REAL_TOKEN_SPACE_SIZE, FORBIDDEN_TOKENS) MASKER = Transformer.SpannedMasker(REAL_TOKEN_SPACE_SIZE, FORBIDDEN_TOKENS, average_span=4)
TRAIN_BATCHER = Batch.Batcher(TRAIN_DATASET_PATH, SENTENCE_LENGTH, TOKENANO, MASKER) TRAIN_BATCHER = Batch.Batcher(TRAIN_DATASET_PATH, SENTENCE_LENGTH, TOKENANO, MASKER)
VALIDATION_BATCHER = Batch.Batcher( VALIDATION_BATCHER = Batch.Batcher(
@@ -75,6 +81,7 @@ NANOSOCRATES = Transformer.NanoSocratesCore(
SOS_TOKEN, SOS_TOKEN,
PAD_TOKEN, PAD_TOKEN,
END_TOKEN, END_TOKEN,
CONTINUTE_TOKEN,
EMBEDDED_SIZE, EMBEDDED_SIZE,
FEED_FORWARD_MULTIPLIER, FEED_FORWARD_MULTIPLIER,
ATTENTION_HEADS, ATTENTION_HEADS,

View File

@@ -110,7 +110,7 @@ _, ENCODER_ONLY, DECODER_ONLY = TUtils.decompose_nano_socrates(
# Training constants # Training constants
nano_cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING) nano_cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
encoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING) encoder_ce = torch.nn.CrossEntropyLoss( label_smoothing=LABEL_SMOOTHING)
decoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING) decoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
nano_optim = torch.optim.AdamW(NANOSOCRATES.parameters(), LEARNING_RATE) nano_optim = torch.optim.AdamW(NANOSOCRATES.parameters(), LEARNING_RATE)
encoder_only_optim = torch.optim.AdamW(ENCODER_ONLY.parameters(), LEARNING_RATE) encoder_only_optim = torch.optim.AdamW(ENCODER_ONLY.parameters(), LEARNING_RATE)
@@ -236,11 +236,24 @@ while current_epoch < MAX_EPOCHS:
pred_logits = ENCODER_ONLY((enc_x, enc_x_pad)) pred_logits = ENCODER_ONLY((enc_x, enc_x_pad))
pred_logits = pred_logits.permute(0, 2, 1) pred_logits = pred_logits.permute(0, 2, 1)
# print(torch.max(tgt)) # print(torch.max(tgt))
loss: torch.Tensor = encoder_ce(pred_logits, tgt) loss: torch.Tensor = encoder_ce(pred_logits, tgt)
loss.backward() loss.backward()
encoder_only_optim.step() encoder_only_optim.step()
exp_tokens: list[int] = tgt_y[0]
exp_tokens = list(map(lambda x: MASK_TOKEN if x > TOKENANO.vocabulary_size else x, exp_tokens))
exp_string = TOKENANO.decode(exp_tokens)
enc_tokens: list[int] = src_x[0]
enc_tokens = list(map(lambda x: MASK_TOKEN if x > TOKENANO.vocabulary_size else x, enc_tokens))
enc_string = TOKENANO.decode(enc_tokens)
print(f"PROMPT:\n{enc_string}")
print(f"EXPECTED:\n{exp_string}")
encoder_batch_losses.append(loss.item()) encoder_batch_losses.append(loss.item())
continue continue

View File

@@ -1,10 +1,14 @@
import os import os
from pathlib import Path
class Log: class Log:
def __init__(self, path): def __init__(self, path):
self.path = path self.path = path
header = ["epoch","avg_txt","avg_enc","avg_dec","txt_loss","masking_loss","prediction_loss"] header = ["epoch","avg_txt","avg_enc","avg_dec","txt_loss","masking_loss","prediction_loss"]
if Path(path).is_file():
return
with open(self.path, "w", encoding="utf-8", newline="") as f: with open(self.path, "w", encoding="utf-8", newline="") as f:
f.write(",".join(header) + "\n") f.write(",".join(header) + "\n")

View File

@@ -14,6 +14,7 @@ class NanoSocratesCore(torch.nn.Module):
sos: int, sos: int,
pad: int, pad: int,
eos: int, eos: int,
continuerdf: int,
latent_space: int = 256, latent_space: int = 256,
feed_forward_multiplier: int = 4, feed_forward_multiplier: int = 4,
attention_heads: int = 4, attention_heads: int = 4,
@@ -24,6 +25,7 @@ class NanoSocratesCore(torch.nn.Module):
self.__sos = sos self.__sos = sos
self.__pad = pad self.__pad = pad
self.__eos = eos self.__eos = eos
self.__continuerdf = continuerdf
self.__sentence_len = sentence_max_length self.__sentence_len = sentence_max_length
feed_forward_latent_space = latent_space * feed_forward_multiplier feed_forward_latent_space = latent_space * feed_forward_multiplier
@@ -156,7 +158,9 @@ class NanoSocratesCore(torch.nn.Module):
decoder_in_pad_mask = decoder_in.eq(self.__pad) decoder_in_pad_mask = decoder_in.eq(self.__pad)
continue_generating = True continue_generating = True
token_idx = 0 token_idx: int= int((decoder_in[0] == self.__continuerdf).nonzero()[0].item()) + 1
while continue_generating: while continue_generating:

File diff suppressed because it is too large Load Diff

Binary file not shown.