diff --git a/Project_Model/Libs/BPE/Utils/json_utils.py b/Project_Model/Libs/BPE/Utils/json_utils.py new file mode 100644 index 0000000..716e93a --- /dev/null +++ b/Project_Model/Libs/BPE/Utils/json_utils.py @@ -0,0 +1,18 @@ +import json +from pathlib import Path + + +def save_json(vocabulary: dict, path: Path): + + json_string = json.dumps(vocabulary) + FILE = open(path, "w") + FILE.write(json_string) + FILE.close() + + +def load_json(path: Path) -> dict[tuple[int, int], int]: + FILE = open(path, "r") + json_string = FILE.read() + FILE.close() + + return json.loads(json_string) diff --git a/Project_Model/Libs/BPE/Utils/vocabulary.py b/Project_Model/Libs/BPE/Utils/vocabulary.py new file mode 100644 index 0000000..fa245d5 --- /dev/null +++ b/Project_Model/Libs/BPE/Utils/vocabulary.py @@ -0,0 +1,49 @@ +import json +from pathlib import Path +from ..Errors import OutOfDictionaryException + + +def nanos_vocabulary2json_str(vocabulary: dict[tuple[int, int], int]) -> str: + + JSON: dict[str, int] = {} + + for key, item in vocabulary.items(): + TUPLE_STR = f"{key}" + JSON[TUPLE_STR] = item + + return json.dumps(JSON) + + +def nanos_json_str2vocabulary(json_string: str) -> dict[tuple[int, int], int]: + + JSON: dict[str, int] = json.loads(json_string) + VOCABULARY: dict[tuple[int, int], int] = {} + + for key, item in JSON.items(): + REDUCED_KEY = len(key) - 1 + KEY_STR = key[1:REDUCED_KEY] + VOC_KEY = tuple(map(int, KEY_STR.split(","))) + + if len(VOC_KEY) != 2: + raise OutOfDictionaryException() + + # Checked for weird things above + VOCABULARY[VOC_KEY] = item # type: ignore + + return VOCABULARY + + +def save_nanos_vocabulary(vocabulary: dict[tuple[int, int], int], path: Path): + + json_string = nanos_vocabulary2json_str(vocabulary) + FILE = open(path, "w") + FILE.write(json_string) + FILE.close() + + +def load_nanos_vocabulary(path: Path) -> dict[tuple[int, int], int]: + FILE = open(path, "r") + json_string = FILE.read() + FILE.close() + + return nanos_json_str2vocabulary(json_string)