From 17d82f0a4ece9560600a1f65d46cb70c7c4ed72c Mon Sep 17 00:00:00 2001 From: Christian Risi <75698846+CnF-Gris@users.noreply.github.com> Date: Thu, 2 Oct 2025 08:48:28 +0200 Subject: [PATCH] Added support to resume workload --- Scripts/Training/bpe_trainer_pool.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Scripts/Training/bpe_trainer_pool.py b/Scripts/Training/bpe_trainer_pool.py index 5c7ab6e..966816d 100644 --- a/Scripts/Training/bpe_trainer_pool.py +++ b/Scripts/Training/bpe_trainer_pool.py @@ -72,11 +72,17 @@ def train(args: ProgramArgs): VOCABULARY_PATH = Path(args.output_file) CACHE_PATH = Path(args.cache_file) + start_bpe = BPE.NanoSocratesBPE() + if CACHE_PATH.is_file(): + voc = BPE.load_nanos_vocabulary(CACHE_PATH) + start_bpe = BPE.NanoSocratesBPE(voc) + print(f"Training BPE") BPE_ENCODER = TRAINER.trainBPE( DATASET_PATH, - CACHE_PATH + CACHE_PATH, + start_bpe ) VOCABULARY = BPE_ENCODER.vocabulary