Added Chunker
This commit is contained in:
parent
ed0255e99b
commit
b071145f6e
@ -10,6 +10,10 @@ class NanoSocratesChunker:
|
|||||||
self.__special_token_regex: re.Pattern = special_token_regex
|
self.__special_token_regex: re.Pattern = special_token_regex
|
||||||
self.__residual: str = ""
|
self.__residual: str = ""
|
||||||
|
|
||||||
|
# max theorethical size of chars
|
||||||
|
# between special tokens:
|
||||||
|
# - min: size - len(longest_token)
|
||||||
|
# - MAX: size - len(shortest_token)
|
||||||
def chunk(self, file_path: Path):
|
def chunk(self, file_path: Path):
|
||||||
# read_file
|
# read_file
|
||||||
FILE = open(file_path, "r", encoding="utf-8")
|
FILE = open(file_path, "r", encoding="utf-8")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user