diff --git a/Project_Model/Libs/BPE/Classes/NanoSocratesSplitter.py b/Project_Model/Libs/BPE/Classes/NanoSocratesSplitter.py
index ccca300..399fa77 100644
--- a/Project_Model/Libs/BPE/Classes/NanoSocratesSplitter.py
+++ b/Project_Model/Libs/BPE/Classes/NanoSocratesSplitter.py
@@ -1,40 +1,82 @@
 import re
+from collections import deque 
 from typing import Generator
 from ..Enums import TokenType
 
 
+
 class NanoSocratesSplitter:
 
     def __init__(
         self,
-        special_token_regex: re.Pattern
+        special_token_regex: re.Pattern,
+        max_bpe_token_id: int = 255
     ) -> None:
+        # attention the regex got already compiled
         self.__special_token_regex = special_token_regex
+        self.__max_bpe_token_id : int = max_bpe_token_id # used for decoding
 
     def split_text(self, corpus: str) -> Generator[tuple[str, TokenType]]:
+        """ Split a text using a regex given 
+        Args:
+            corpus (str): all the corpus string to split
+        Yields:
+            Generator[tuple[str, TokenType]]: each time returns a piece of the splitted text: string and its TokenType. \n
+            TokenType describe if the string is for the BPE or a special Token [BPE, SPECIAL]
+        """
 
         bpe_start = 0
-        bpe_end = len(corpus)
+        bpe_end = len(corpus) # this can be deleted!
 
-        for bound_start, bound_end in self.__find_boundaries(corpus):
+        for special_token_start, special_token_end in self.__find_boundaries(corpus):
 
-            bpe_end = bound_start
+            # FIND BPE
+            bpe_end = special_token_start
             BPE_TOKEN_TEXT = corpus[bpe_start:bpe_end]
-
             if BPE_TOKEN_TEXT != "":
                 yield (BPE_TOKEN_TEXT, TokenType.BPE)
 
-            bpe_start = bound_end
-            SPECIAL_TOKEN_TEXT = corpus[bound_start:bound_end]
-
+            # FIND SPECIAL TOKEN
+            SPECIAL_TOKEN_TEXT = corpus[special_token_start:special_token_end]
             if SPECIAL_TOKEN_TEXT != "":
                 yield (SPECIAL_TOKEN_TEXT, TokenType.SPECIAL)
 
-    def __find_boundaries(self, corpus: str) -> Generator[tuple[int, int]]:
+            # now save the new bpe start point
+            # it will used in the next interaction
+            bpe_start = special_token_end
 
+
+    def __find_boundaries(self, corpus: str) -> Generator[tuple[int, int]]:
+        """
+        Find each time the start and end (not included) of the special token
+        Args:
+            corpus (str): the string where the special token will be searched
+        Yields:
+            Generator[tuple[int, int]]: Note the end is not included
+        """        
         for match in self.__special_token_regex.finditer(corpus):
             start = match.start()
             end = match.end()
 
             yield (start, end)
+        
+        # make the last boundary be the end of corpus
+        # eof = len(corpus)
+        # yield(eof,eof)
+
+
+    def split_tokens(self, corpus: list[int]) -> Generator[tuple[list[int], TokenType]] :
+        
+        not_special_token_list : list[int]= []
+        for token in corpus:
+            if token > self.__max_bpe_token_id:
+
+                if len(not_special_token_list) > 0:
+                    yield (not_special_token_list, TokenType.BPE)
+                    not_special_token_list = []
+
+                yield (token, TokenType.SPECIAL)
+                continue
+            
+            not_special_token_list.append(token)