From 7cfaf601b411ea4e6ad5c929793f5aad7b8b127a Mon Sep 17 00:00:00 2001
From: Christian Risi <75698846+CnF-Gris@users.noreply.github.com>
Date: Wed, 1 Oct 2025 19:42:22 +0200
Subject: [PATCH] Refactored to remove tokens that can't be compressed anymore

---
 .../Libs/BPE/Classes/NanoSocraTraineRam.py    | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/Project_Model/Libs/BPE/Classes/NanoSocraTraineRam.py b/Project_Model/Libs/BPE/Classes/NanoSocraTraineRam.py
index 9c4f444..aca820e 100644
--- a/Project_Model/Libs/BPE/Classes/NanoSocraTraineRam.py
+++ b/Project_Model/Libs/BPE/Classes/NanoSocraTraineRam.py
@@ -105,18 +105,29 @@ class NanoSocraTraineRam:
         return BPE
 
     def __round_train(self, bpe: NanoSocratesBPE, data: list[list[int]]):
+
         DATA_LEN = len(data)
+        NEW_DATA = []
 
+        counter = 0
         memory = NanoSocratesBatchMemoryBPE({}, 0)
-        for piece, index in zip(data, range(0, DATA_LEN)):
+        while len(data) > 0:
+            counter += 1
+            last_batch = len(data) == 1
 
-            last_batch = index == DATA_LEN - 1
+            piece = data.pop()
 
             bpe, memory, output = bpe.fit(piece, memory, last_batch)
 
-            data[index] = output
+            if counter % int(1E6) == 0:
+                print(f"Fitted: {counter}/{DATA_LEN}")
 
-        return (bpe, data, memory)
+            if len(output) < 2:
+                continue
+
+            NEW_DATA.append(output)
+
+        return (bpe, NEW_DATA, memory)
 
     def __gather_data_from_file(self, path: Path) -> list[list[int]]: