Added Chunker
This commit is contained in:
parent
ed0255e99b
commit
b071145f6e
@ -10,6 +10,10 @@ class NanoSocratesChunker:
|
||||
self.__special_token_regex: re.Pattern = special_token_regex
|
||||
self.__residual: str = ""
|
||||
|
||||
# max theorethical size of chars
|
||||
# between special tokens:
|
||||
# - min: size - len(longest_token)
|
||||
# - MAX: size - len(shortest_token)
|
||||
def chunk(self, file_path: Path):
|
||||
# read_file
|
||||
FILE = open(file_path, "r", encoding="utf-8")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user