dev.train #8

Merged
gape_01 merged 50 commits from dev.train into dev 2025-10-17 22:20:14 +02:00
17 changed files with 50 additions and 6041 deletions
Showing only changes of commit b79521995c - Show all commits

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
Assets/Model/curated/log_loss.csv LFS Normal file

Binary file not shown.
1 version https://git-lfs.github.com/spec/v1
2 oid sha256:203b6cb364cf95cbb6cc0ebbff9e8b80e80dda73ff210ad91edeedf6024f6ab1
3 size 2876

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,3 +1,4 @@
import random
import torch import torch
from pathlib import Path from pathlib import Path
import Project_Model.Libs.BPE as BPE import Project_Model.Libs.BPE as BPE
@@ -10,14 +11,18 @@ import Project_Model.Libs.Batch as Batch
DEVICE = torch_shims.get_default_device() DEVICE = torch_shims.get_default_device()
torch.set_default_device(DEVICE) torch.set_default_device(DEVICE)
# set a fixed seed
torch.manual_seed(0)
random.seed(0)
# Get paths # Get paths
# MODEL_DIR = "Assets/Model/curated" MODEL_DIR = "Assets/Model/curated"
MODEL_DIR= "Assets/Dataset/Tmp" # MODEL_DIR= "Assets/Dataset/Tmp"
VOCABULARY_PATH = Path("Assets/Model/small/bpe-small-16.json") VOCABULARY_PATH = Path("Assets/Model/small/bpe-small-16.json")
TRAIN_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/train.csv") TRAIN_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/train.csv")
VALIDATION_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/evaluation.csv") VALIDATION_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/evaluation.csv")
TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/test.csv") TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/small/holdout/test.csv")
TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/toy/rdf_text.csv") # TEST_DATASET_PATH = Path("Assets/Dataset/1-hop/toy/rdf_text.csv")
MODEL_PATH = Path(f"{MODEL_DIR}/NanoSocrates.zip") MODEL_PATH = Path(f"{MODEL_DIR}/NanoSocrates.zip")
@@ -44,6 +49,7 @@ SUBJ_TOKEN = TOKENANO.encode("<SUBJ>")[0]
REL_TOKEN = TOKENANO.encode("<PRED>")[0] REL_TOKEN = TOKENANO.encode("<PRED>")[0]
OBJ_TOKEN = TOKENANO.encode("<OBJ>")[0] OBJ_TOKEN = TOKENANO.encode("<OBJ>")[0]
MASK_TOKEN = TOKENANO.encode("<MASK>")[0] MASK_TOKEN = TOKENANO.encode("<MASK>")[0]
CONTINUTE_TOKEN = TOKENANO.encode("<CONTINUERDF>")[0]
SPECIAL_TOKENS: set[int] = set(TOKENANO.encode("".join(BPE.default_special_tokens()))) SPECIAL_TOKENS: set[int] = set(TOKENANO.encode("".join(BPE.default_special_tokens())))
ALLOWED_TOKENS = set([SUBJ_TOKEN, REL_TOKEN, OBJ_TOKEN]) ALLOWED_TOKENS = set([SUBJ_TOKEN, REL_TOKEN, OBJ_TOKEN])
@@ -51,7 +57,7 @@ FORBIDDEN_TOKENS = SPECIAL_TOKENS - ALLOWED_TOKENS
# Spanned_Masker # Spanned_Masker
MASKER = Transformer.SpannedMasker(REAL_TOKEN_SPACE_SIZE, FORBIDDEN_TOKENS) MASKER = Transformer.SpannedMasker(REAL_TOKEN_SPACE_SIZE, FORBIDDEN_TOKENS, average_span=4)
TRAIN_BATCHER = Batch.Batcher(TRAIN_DATASET_PATH, SENTENCE_LENGTH, TOKENANO, MASKER) TRAIN_BATCHER = Batch.Batcher(TRAIN_DATASET_PATH, SENTENCE_LENGTH, TOKENANO, MASKER)
VALIDATION_BATCHER = Batch.Batcher( VALIDATION_BATCHER = Batch.Batcher(
@@ -75,6 +81,7 @@ NANOSOCRATES = Transformer.NanoSocratesCore(
SOS_TOKEN, SOS_TOKEN,
PAD_TOKEN, PAD_TOKEN,
END_TOKEN, END_TOKEN,
CONTINUTE_TOKEN,
EMBEDDED_SIZE, EMBEDDED_SIZE,
FEED_FORWARD_MULTIPLIER, FEED_FORWARD_MULTIPLIER,
ATTENTION_HEADS, ATTENTION_HEADS,

View File

@@ -110,7 +110,7 @@ _, ENCODER_ONLY, DECODER_ONLY = TUtils.decompose_nano_socrates(
# Training constants # Training constants
nano_cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING) nano_cross_entropy = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
encoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING) encoder_ce = torch.nn.CrossEntropyLoss( label_smoothing=LABEL_SMOOTHING)
decoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING) decoder_ce = torch.nn.CrossEntropyLoss(ignore_index=PAD_TOKEN, label_smoothing=LABEL_SMOOTHING)
nano_optim = torch.optim.AdamW(NANOSOCRATES.parameters(), LEARNING_RATE) nano_optim = torch.optim.AdamW(NANOSOCRATES.parameters(), LEARNING_RATE)
encoder_only_optim = torch.optim.AdamW(ENCODER_ONLY.parameters(), LEARNING_RATE) encoder_only_optim = torch.optim.AdamW(ENCODER_ONLY.parameters(), LEARNING_RATE)
@@ -236,11 +236,24 @@ while current_epoch < MAX_EPOCHS:
pred_logits = ENCODER_ONLY((enc_x, enc_x_pad)) pred_logits = ENCODER_ONLY((enc_x, enc_x_pad))
pred_logits = pred_logits.permute(0, 2, 1) pred_logits = pred_logits.permute(0, 2, 1)
# print(torch.max(tgt)) # print(torch.max(tgt))
loss: torch.Tensor = encoder_ce(pred_logits, tgt) loss: torch.Tensor = encoder_ce(pred_logits, tgt)
loss.backward() loss.backward()
encoder_only_optim.step() encoder_only_optim.step()
exp_tokens: list[int] = tgt_y[0]
exp_tokens = list(map(lambda x: MASK_TOKEN if x > TOKENANO.vocabulary_size else x, exp_tokens))
exp_string = TOKENANO.decode(exp_tokens)
enc_tokens: list[int] = src_x[0]
enc_tokens = list(map(lambda x: MASK_TOKEN if x > TOKENANO.vocabulary_size else x, enc_tokens))
enc_string = TOKENANO.decode(enc_tokens)
print(f"PROMPT:\n{enc_string}")
print(f"EXPECTED:\n{exp_string}")
encoder_batch_losses.append(loss.item()) encoder_batch_losses.append(loss.item())
continue continue

View File

@@ -1,10 +1,14 @@
import os import os
from pathlib import Path
class Log: class Log:
def __init__(self, path): def __init__(self, path):
self.path = path self.path = path
header = ["epoch","avg_txt","avg_enc","avg_dec","txt_loss","masking_loss","prediction_loss"] header = ["epoch","avg_txt","avg_enc","avg_dec","txt_loss","masking_loss","prediction_loss"]
if Path(path).is_file():
return
with open(self.path, "w", encoding="utf-8", newline="") as f: with open(self.path, "w", encoding="utf-8", newline="") as f:
f.write(",".join(header) + "\n") f.write(",".join(header) + "\n")

View File

@@ -14,6 +14,7 @@ class NanoSocratesCore(torch.nn.Module):
sos: int, sos: int,
pad: int, pad: int,
eos: int, eos: int,
continuerdf: int,
latent_space: int = 256, latent_space: int = 256,
feed_forward_multiplier: int = 4, feed_forward_multiplier: int = 4,
attention_heads: int = 4, attention_heads: int = 4,
@@ -24,6 +25,7 @@ class NanoSocratesCore(torch.nn.Module):
self.__sos = sos self.__sos = sos
self.__pad = pad self.__pad = pad
self.__eos = eos self.__eos = eos
self.__continuerdf = continuerdf
self.__sentence_len = sentence_max_length self.__sentence_len = sentence_max_length
feed_forward_latent_space = latent_space * feed_forward_multiplier feed_forward_latent_space = latent_space * feed_forward_multiplier
@@ -156,7 +158,9 @@ class NanoSocratesCore(torch.nn.Module):
decoder_in_pad_mask = decoder_in.eq(self.__pad) decoder_in_pad_mask = decoder_in.eq(self.__pad)
continue_generating = True continue_generating = True
token_idx = 0 token_idx: int= int((decoder_in[0] == self.__continuerdf).nonzero()[0].item()) + 1
while continue_generating: while continue_generating:

File diff suppressed because it is too large Load Diff

Binary file not shown.