From 7020c9e68366e0adeabc8f09babfea784d0d7019 Mon Sep 17 00:00:00 2001 From: Christian Risi <75698846+CnF-Gris@users.noreply.github.com> Date: Tue, 30 Sep 2025 13:33:12 +0200 Subject: [PATCH] Added utils to make regexps and iterators that check for last element --- .../Libs/BPE/Utils/lag_checker_iterator.py | 27 +++++++++++++++++++ .../Libs/BPE/Utils/special_regex_maker.py | 9 +++++++ 2 files changed, 36 insertions(+) create mode 100644 Project_Model/Libs/BPE/Utils/lag_checker_iterator.py create mode 100644 Project_Model/Libs/BPE/Utils/special_regex_maker.py diff --git a/Project_Model/Libs/BPE/Utils/lag_checker_iterator.py b/Project_Model/Libs/BPE/Utils/lag_checker_iterator.py new file mode 100644 index 0000000..28bbade --- /dev/null +++ b/Project_Model/Libs/BPE/Utils/lag_checker_iterator.py @@ -0,0 +1,27 @@ +from collections import deque +from typing import Generator, TypeVar + +T1 = TypeVar("T1") +T2 = TypeVar("T2") +T3 = TypeVar("T3") + + +def iterator_with_checks( + generator: Generator[T1, T2, T3], +) -> Generator[tuple[T1, bool], T2, T3]: + + # Here we can ignore to catch stop iteration + # we will propagate it + last_element = next(generator) + + while True: + + RETURN_ELEMENT = last_element + try: + element = next(generator) + last_element = element + yield (RETURN_ELEMENT, False) + + except StopIteration: + yield (RETURN_ELEMENT, True) + break diff --git a/Project_Model/Libs/BPE/Utils/special_regex_maker.py b/Project_Model/Libs/BPE/Utils/special_regex_maker.py new file mode 100644 index 0000000..414eabf --- /dev/null +++ b/Project_Model/Libs/BPE/Utils/special_regex_maker.py @@ -0,0 +1,9 @@ +import re + + +def special_regex_maker(special_tokens: list[str]) -> re.Pattern: + + REGEX_STR = "|".join(special_tokens) + + return re.compile(REGEX_STR) +