NanoSocrates/Scripts/Training/dictionary_adjuster.py

12 lines
429 B
Python
Raw Normal View History

# to cut the mad trained dict into a short one
from Project_Model.Libs.BPE.Utils.vocabulary import load_nanos_vocabulary, save_nanos_vocabulary
from pathlib import Path
DICTIONARY_PATH = "Assets/Dataset/Tmp/mad_cache.json"
OUTPUT_PATH = "Assets/Dataset/Tmp/trimmed.json"
big_dict = load_nanos_vocabulary(Path(DICTIONARY_PATH))
big_dict = dict(list(big_dict.items())[:31744])
save_nanos_vocabulary(big_dict,Path(OUTPUT_PATH))