diff --git a/Project_Model/Libs/BPE/Utils/lag_checker_iterator.py b/Project_Model/Libs/BPE/Utils/lag_checker_iterator.py new file mode 100644 index 0000000..28bbade --- /dev/null +++ b/Project_Model/Libs/BPE/Utils/lag_checker_iterator.py @@ -0,0 +1,27 @@ +from collections import deque +from typing import Generator, TypeVar + +T1 = TypeVar("T1") +T2 = TypeVar("T2") +T3 = TypeVar("T3") + + +def iterator_with_checks( + generator: Generator[T1, T2, T3], +) -> Generator[tuple[T1, bool], T2, T3]: + + # Here we can ignore to catch stop iteration + # we will propagate it + last_element = next(generator) + + while True: + + RETURN_ELEMENT = last_element + try: + element = next(generator) + last_element = element + yield (RETURN_ELEMENT, False) + + except StopIteration: + yield (RETURN_ELEMENT, True) + break diff --git a/Project_Model/Libs/BPE/Utils/special_regex_maker.py b/Project_Model/Libs/BPE/Utils/special_regex_maker.py new file mode 100644 index 0000000..414eabf --- /dev/null +++ b/Project_Model/Libs/BPE/Utils/special_regex_maker.py @@ -0,0 +1,9 @@ +import re + + +def special_regex_maker(special_tokens: list[str]) -> re.Pattern: + + REGEX_STR = "|".join(special_tokens) + + return re.compile(REGEX_STR) +