# Constants MASK_EXTRA_SPACE = 100 REAL_TOKEN_SPACE_SIZE = TOKENANO.vocabulary_size TOKEN_SPACE_SIZE = TOKENANO.vocabulary_size + MASK_EXTRA_SPACE EMBEDDED_SIZE = 256 FEED_FORWARD_MULTIPLIER = 4 ATTENTION_HEADS = 4 SENTENCE_LENGTH = 512 NUMBER_OF_BLOCKS = 2 MAX_EPOCHS = int(2000) PRETRAIN_EPOCHS = int(800) WARMUP_EPOCHS = int(1400) MINI_BATCH_SIZE = 150 VALIDATION_STEPS = 10 CHECKPOINT_STEPS = VALIDATION_STEPS PATIENCE = 4 CURRENT_EPOCH = -1 if not LAST_EPOCH_PATH.is_file() else int(LAST_EPOCH_PATH.read_text()) VERBOSE = False LEARNING_RATE = 0.1 LABEL_SMOOTHING = 0.01