12 lines
429 B
Python
12 lines
429 B
Python
# to cut the mad trained dict into a short one
|
|
from Project_Model.Libs.BPE.Utils.vocabulary import load_nanos_vocabulary, save_nanos_vocabulary
|
|
from pathlib import Path
|
|
|
|
DICTIONARY_PATH = "Assets/Dataset/Tmp/mad_cache.json"
|
|
OUTPUT_PATH = "Assets/Dataset/Tmp/trimmed.json"
|
|
|
|
|
|
big_dict = load_nanos_vocabulary(Path(DICTIONARY_PATH))
|
|
big_dict = dict(list(big_dict.items())[:31744])
|
|
|
|
save_nanos_vocabulary(big_dict,Path(OUTPUT_PATH)) |