# to cut the mad trained dict into a short one from Project_Model.Libs.BPE.Utils.vocabulary import load_nanos_vocabulary, save_nanos_vocabulary from pathlib import Path DICTIONARY_PATH = "Assets/Dataset/Tmp/mad_cache.json" OUTPUT_PATH = "Assets/Dataset/Tmp/trimmed.json" big_dict = load_nanos_vocabulary(Path(DICTIONARY_PATH)) big_dict = dict(list(big_dict.items())[:31744]) save_nanos_vocabulary(big_dict,Path(OUTPUT_PATH))