2025-10-16 19:20:09 +02:00

20 lines
575 B (Stored with Git LFS)
Plaintext

# Constants
MASK_EXTRA_SPACE = 100
REAL_TOKEN_SPACE_SIZE = TOKENANO.vocabulary_size
TOKEN_SPACE_SIZE = TOKENANO.vocabulary_size + MASK_EXTRA_SPACE
EMBEDDED_SIZE = 256
FEED_FORWARD_MULTIPLIER = 4
ATTENTION_HEADS = 4
SENTENCE_LENGTH = 512
NUMBER_OF_BLOCKS = 2
MAX_EPOCHS = int(2000)
PRETRAIN_EPOCHS = int(800)
WARMUP_EPOCHS = int(1400)
MINI_BATCH_SIZE = 150
VALIDATION_STEPS = 10
CHECKPOINT_STEPS = VALIDATION_STEPS
PATIENCE = 4
CURRENT_EPOCH = -1 if not LAST_EPOCH_PATH.is_file() else int(LAST_EPOCH_PATH.read_text())
VERBOSE = False
LEARNING_RATE = 0.1
LABEL_SMOOTHING = 0.01