Last fixes

This commit is contained in:
Christian Risi 2025-10-17 22:17:24 +02:00
parent 540b78204c
commit b79521995c
17 changed files with 50 additions and 6041 deletions

BIN
Assets/Model/curated/NanoSocrates.zip (Stored with Git LFS)

Binary file not shown.

BIN
Assets/Model/curated/dec_optim(5).zip (Stored with Git LFS)

Binary file not shown.

BIN
Assets/Model/curated/dec_optim.zip (Stored with Git LFS) Normal file

Binary file not shown.

BIN
Assets/Model/curated/enc_optim(5).zip (Stored with Git LFS)

Binary file not shown.

BIN
Assets/Model/curated/enc_optim.zip (Stored with Git LFS) Normal file

Binary file not shown.

BIN
Assets/Model/curated/last_epoch(5).txt (Stored with Git LFS)

Binary file not shown.

BIN
Assets/Model/curated/last_epoch.txt (Stored with Git LFS) Normal file

Binary file not shown.

BIN
Assets/Model/curated/log_loss.csv (Stored with Git LFS) Normal file

Binary file not shown.
1 version https://git-lfs.github.com/spec/v1
2 oid sha256:203b6cb364cf95cbb6cc0ebbff9e8b80e80dda73ff210ad91edeedf6024f6ab1
3 size 2876

BIN
Assets/Model/curated/nano_optim(5).zip (Stored with Git LFS)

Binary file not shown.

BIN
Assets/Model/curated/nano_optim.zip (Stored with Git LFS) Normal file

Binary file not shown.

BIN
Assets/Model/curated/settings.txt (Stored with Git LFS)

Binary file not shown.

View File

@ -1,3 +1,4 @@
import random
import torch import torch
from pathlib import Path from pathlib import Path
import Project_Model.Libs.BPE as BPE import Project_Model.Libs.BPE as BPE
@ -10,14 +11,18 @@ import Project_Model.Libs.Batch as Batch
DEVICE = torch_shims.get_default_device() DEVICE = torch_shims.get_default_device()
torch.set_default_device(DEVICE) torch.set_default_device(DEVICE)
# set a fixed seed
torch.manual_seed(0)
random.seed(0)
# Get paths # Get paths
# MODEL_DIR = "Assets/Model/curated" MODEL_DIR = "Assets/Model/curated"
MODEL_DIR= "Assets/Dataset/Tmp" # MODEL_DIR= "Assets/Dataset/Tmp"
VOCABULARY_PATH = Path("Assets/Model/small/bpe-small-16.json") VOCABULARY_PATH = Path("Assets/Model/small/bpe-small-16.json")
TRAIN_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/train.csv") TRAIN_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/train.csv")
VALIDATION_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/evaluation.csv") VALIDATION_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/evaluation.csv")
TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/test.csv") TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/test.csv")
TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/toy/rdf_text.csv") # TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/toy/rdf_text.csv")
MODEL_PATH = Path(f"{MODEL_DIR}/NanoSocrates.zip") MODEL_PATH = Path(f"{MODEL_DIR}/NanoSocrates.zip")
@ -44,6 +49,7 @@ SUBJ_TOKEN = TOKENANO.encode("<SUBJ>")[0]
REL_TOKEN = TOKENANO.encode("<PRED>")[0] REL_TOKEN = TOKENANO.encode("<PRED>")[0]
OBJ_TOKEN = TOKENANO.encode("<OBJ>")[0] OBJ_TOKEN = TOKENANO.encode("<OBJ>")[0]
MASK_TOKEN = TOKENANO.encode("<MASK>")[0] MASK_TOKEN = TOKENANO.encode("<MASK>")[0]
CONTINUTE_TOKEN = TOKENANO.encode("<CONTINUERDF>")[0]
SPECIAL_TOKENS: set[int] = set(TOKENANO.encode("".join(BPE.default_special_tokens()))) SPECIAL_TOKENS: set[int] = set(TOKENANO.encode("".join(BPE.default_special_tokens())))
ALLOWED_TOKENS = set([SUBJ_TOKEN, REL_TOKEN, OBJ_TOKEN]) ALLOWED_TOKENS = set([SUBJ_TOKEN, REL_TOKEN, OBJ_TOKEN])
@ -51,7 +57,7 @@ FORBIDDEN_TOKENS = SPECIAL_TOKENS - ALLOWED_TOKENS
# Spanned_Masker # Spanned_Masker
MASKER = Transformer.SpannedMasker(REAL_TOKEN_SPACE_SIZE, FORBIDDEN_TOKENS) MASKER = Transformer.SpannedMasker(REAL_TOKEN_SPACE_SIZE, FORBIDDEN_TOKENS, average_span=4)
TRAIN_BATCHER = Batch.Batcher(TRAIN_DATASET_PATH, SENTENCE_LENGTH, TOKENANO, MASKER) TRAIN_BATCHER = Batch.Batcher(TRAIN_DATASET_PATH, SENTENCE_LENGTH, TOKENANO, MASKER)
VALIDATION_BATCHER = Batch.Batcher( VALIDATION_BATCHER = Batch.Batcher(
@ -75,6 +81,7 @@ NANOSOCRATES = Transformer.NanoSocratesCore(
SOS_TOKEN, SOS_TOKEN,
PAD_TOKEN, PAD_TOKEN,
END_TOKEN, END_TOKEN,
CONTINUTE_TOKEN,
EMBEDDED_SIZE, EMBEDDED_SIZE,
FEED_FORWARD_MULTIPLIER, FEED_FORWARD_MULTIPLIER,
ATTENTION_HEADS, ATTENTION_HEADS,

View File

@ -110,7 +110,7 @@ _, ENCODER_ONLY, DECODER_ONLY = TUtils.decompose_nano_socrates(
# Training constants # Training constants
nano_cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING) nano_cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
encoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING) encoder_ce = torch.nn.CrossEntropyLoss( label_smoothing=LABEL_SMOOTHING)
decoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING) decoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
nano_optim = torch.optim.AdamW(NANOSOCRATES.parameters(), LEARNING_RATE) nano_optim = torch.optim.AdamW(NANOSOCRATES.parameters(), LEARNING_RATE)
encoder_only_optim = torch.optim.AdamW(ENCODER_ONLY.parameters(), LEARNING_RATE) encoder_only_optim = torch.optim.AdamW(ENCODER_ONLY.parameters(), LEARNING_RATE)
@ -236,11 +236,24 @@ while current_epoch < MAX_EPOCHS:
pred_logits = ENCODER_ONLY((enc_x, enc_x_pad)) pred_logits = ENCODER_ONLY((enc_x, enc_x_pad))
pred_logits = pred_logits.permute(0, 2, 1) pred_logits = pred_logits.permute(0, 2, 1)
# print(torch.max(tgt)) # print(torch.max(tgt))
loss: torch.Tensor = encoder_ce(pred_logits, tgt) loss: torch.Tensor = encoder_ce(pred_logits, tgt)
loss.backward() loss.backward()
encoder_only_optim.step() encoder_only_optim.step()
exp_tokens: list[int] = tgt_y[0]
exp_tokens = list(map(lambda x: MASK_TOKEN if x > TOKENANO.vocabulary_size else x, exp_tokens))
exp_string = TOKENANO.decode(exp_tokens)
enc_tokens: list[int] = src_x[0]
enc_tokens = list(map(lambda x: MASK_TOKEN if x > TOKENANO.vocabulary_size else x, enc_tokens))
enc_string = TOKENANO.decode(enc_tokens)
print(f"PROMPT:\n{enc_string}")
print(f"EXPECTED:\n{exp_string}")
encoder_batch_losses.append(loss.item()) encoder_batch_losses.append(loss.item())
continue continue

View File

@ -1,10 +1,14 @@
import os import os
from pathlib import Path
class Log: class Log:
def __init__(self, path): def __init__(self, path):
self.path = path self.path = path
header = ["epoch","avg_txt","avg_enc","avg_dec","txt_loss","masking_loss","prediction_loss"] header = ["epoch","avg_txt","avg_enc","avg_dec","txt_loss","masking_loss","prediction_loss"]
if Path(path).is_file():
return
with open(self.path, "w", encoding="utf-8", newline="") as f: with open(self.path, "w", encoding="utf-8", newline="") as f:
f.write(",".join(header) + "\n") f.write(",".join(header) + "\n")

View File

@ -14,6 +14,7 @@ class NanoSocratesCore(torch.nn.Module):
sos: int, sos: int,
pad: int, pad: int,
eos: int, eos: int,
continuerdf: int,
latent_space: int = 256, latent_space: int = 256,
feed_forward_multiplier: int = 4, feed_forward_multiplier: int = 4,
attention_heads: int = 4, attention_heads: int = 4,
@ -24,6 +25,7 @@ class NanoSocratesCore(torch.nn.Module):
self.__sos = sos self.__sos = sos
self.__pad = pad self.__pad = pad
self.__eos = eos self.__eos = eos
self.__continuerdf = continuerdf
self.__sentence_len = sentence_max_length self.__sentence_len = sentence_max_length
feed_forward_latent_space = latent_space * feed_forward_multiplier feed_forward_latent_space = latent_space * feed_forward_multiplier
@ -156,7 +158,9 @@ class NanoSocratesCore(torch.nn.Module):
decoder_in_pad_mask = decoder_in.eq(self.__pad) decoder_in_pad_mask = decoder_in.eq(self.__pad)
continue_generating = True continue_generating = True
token_idx = 0 token_idx: int= int((decoder_in[0] == self.__continuerdf).nonzero()[0].item()) + 1
while continue_generating: while continue_generating:

File diff suppressed because it is too large Load Diff

Binary file not shown.