dev.train #8

Merged
gape_01 merged 50 commits from dev.train into dev 2025-10-17 22:20:14 +02:00
17 changed files with 50 additions and 6041 deletions
Showing only changes of commit b79521995c - Show all commits

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
Assets/Model/curated/log_loss.csv LFS Normal file

Binary file not shown.
1 version https://git-lfs.github.com/spec/v1
2 oid sha256:203b6cb364cf95cbb6cc0ebbff9e8b80e80dda73ff210ad91edeedf6024f6ab1
3 size 2876

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,3 +1,4 @@
import random
import torch
from pathlib import Path
import Project_Model.Libs.BPE as BPE
@@ -10,14 +11,18 @@ import Project_Model.Libs.Batch as Batch
DEVICE = torch_shims.get_default_device()
torch.set_default_device(DEVICE)
# set a fixed seed
torch.manual_seed(0)
random.seed(0)
# Get paths
# MODEL_DIR = "Assets/Model/curated"
MODEL_DIR= "Assets/Dataset/Tmp"
MODEL_DIR = "Assets/Model/curated"
# MODEL_DIR= "Assets/Dataset/Tmp"
VOCABULARY_PATH = Path("Assets/Model/small/bpe-small-16.json")
TRAIN_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/train.csv")
VALIDATION_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/evaluation.csv")
TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/test.csv")
TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/toy/rdf_text.csv")
# TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/toy/rdf_text.csv")
MODEL_PATH = Path(f"{MODEL_DIR}/NanoSocrates.zip")
@@ -44,6 +49,7 @@ SUBJ_TOKEN = TOKENANO.encode("<SUBJ>")[0]
REL_TOKEN = TOKENANO.encode("<PRED>")[0]
OBJ_TOKEN = TOKENANO.encode("<OBJ>")[0]
MASK_TOKEN = TOKENANO.encode("<MASK>")[0]
CONTINUTE_TOKEN = TOKENANO.encode("<CONTINUERDF>")[0]
SPECIAL_TOKENS: set[int] = set(TOKENANO.encode("".join(BPE.default_special_tokens())))
ALLOWED_TOKENS = set([SUBJ_TOKEN, REL_TOKEN, OBJ_TOKEN])
@@ -51,7 +57,7 @@ FORBIDDEN_TOKENS = SPECIAL_TOKENS - ALLOWED_TOKENS
# Spanned_Masker
MASKER = Transformer.SpannedMasker(REAL_TOKEN_SPACE_SIZE, FORBIDDEN_TOKENS)
MASKER = Transformer.SpannedMasker(REAL_TOKEN_SPACE_SIZE, FORBIDDEN_TOKENS, average_span=4)
TRAIN_BATCHER = Batch.Batcher(TRAIN_DATASET_PATH, SENTENCE_LENGTH, TOKENANO, MASKER)
VALIDATION_BATCHER = Batch.Batcher(
@@ -75,6 +81,7 @@ NANOSOCRATES = Transformer.NanoSocratesCore(
SOS_TOKEN,
PAD_TOKEN,
END_TOKEN,
CONTINUTE_TOKEN,
EMBEDDED_SIZE,
FEED_FORWARD_MULTIPLIER,
ATTENTION_HEADS,

View File

@@ -110,7 +110,7 @@ _, ENCODER_ONLY, DECODER_ONLY = TUtils.decompose_nano_socrates(
# Training constants
nano_cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
encoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
encoder_ce = torch.nn.CrossEntropyLoss( label_smoothing=LABEL_SMOOTHING)
decoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
nano_optim = torch.optim.AdamW(NANOSOCRATES.parameters(), LEARNING_RATE)
encoder_only_optim = torch.optim.AdamW(ENCODER_ONLY.parameters(), LEARNING_RATE)
@@ -236,11 +236,24 @@ while current_epoch < MAX_EPOCHS:
pred_logits = ENCODER_ONLY((enc_x, enc_x_pad))
pred_logits = pred_logits.permute(0, 2, 1)
# print(torch.max(tgt))
loss: torch.Tensor = encoder_ce(pred_logits, tgt)
loss.backward()
encoder_only_optim.step()
exp_tokens: list[int] = tgt_y[0]
exp_tokens = list(map(lambda x: MASK_TOKEN if x > TOKENANO.vocabulary_size else x, exp_tokens))
exp_string = TOKENANO.decode(exp_tokens)
enc_tokens: list[int] = src_x[0]
enc_tokens = list(map(lambda x: MASK_TOKEN if x > TOKENANO.vocabulary_size else x, enc_tokens))
enc_string = TOKENANO.decode(enc_tokens)
print(f"PROMPT:\n{enc_string}")
print(f"EXPECTED:\n{exp_string}")
encoder_batch_losses.append(loss.item())
continue

View File

@@ -1,10 +1,14 @@
import os
from pathlib import Path
class Log:
def __init__(self, path):
self.path = path
header = ["epoch","avg_txt","avg_enc","avg_dec","txt_loss","masking_loss","prediction_loss"]
if Path(path).is_file():
return
with open(self.path, "w", encoding="utf-8", newline="") as f:
f.write(",".join(header) + "\n")

View File

@@ -14,6 +14,7 @@ class NanoSocratesCore(torch.nn.Module):
sos: int,
pad: int,
eos: int,
continuerdf: int,
latent_space: int = 256,
feed_forward_multiplier: int = 4,
attention_heads: int = 4,
@@ -24,6 +25,7 @@ class NanoSocratesCore(torch.nn.Module):
self.__sos = sos
self.__pad = pad
self.__eos = eos
self.__continuerdf = continuerdf
self.__sentence_len = sentence_max_length
feed_forward_latent_space = latent_space * feed_forward_multiplier
@@ -156,7 +158,9 @@ class NanoSocratesCore(torch.nn.Module):
decoder_in_pad_mask = decoder_in.eq(self.__pad)
continue_generating = True
token_idx = 0
token_idx: int= int((decoder_in[0] == self.__continuerdf).nonzero()[0].item()) + 1
while continue_generating:

File diff suppressed because it is too large Load Diff

Binary file not shown.