diff --git a/Playgrounds/nanosocrates-train-experiment-2.py b/Playgrounds/nanosocrates-train-experiment-2.py index 450630f..71844f4 100644 --- a/Playgrounds/nanosocrates-train-experiment-2.py +++ b/Playgrounds/nanosocrates-train-experiment-2.py @@ -46,7 +46,7 @@ NUMBER_OF_BLOCKS = 4 MAX_EPOCHS = int(3e3) PRETRAIN_EPOCHS = int(300) WARMUP_EPOCHS = int(1e3) -MINI_BATCH_SIZE = 300 +MINI_BATCH_SIZE = 80 VALIDATION_STEPS = 5 CHECKPOINT_STEPS = VALIDATION_STEPS * 4 PATIENCE = 4 @@ -185,7 +185,7 @@ while current_epoch < MAX_EPOCHS: pred_logits = ENCODER_ONLY((enc_x, enc_x_pad)) pred_logits = pred_logits.permute(0, 2, 1) - print(torch.max(tgt)) + # print(torch.max(tgt)) loss: torch.Tensor = encoder_ce(pred_logits, tgt) loss.backward()