little snippet to trim big dictionaries
This commit is contained in:
parent
165290162c
commit
1d23b9cc8b
12
Scripts/Training/dictionary_adjuster.py
Normal file
12
Scripts/Training/dictionary_adjuster.py
Normal file
@ -0,0 +1,12 @@
|
||||
# to cut the mad trained dict into a short one
|
||||
from Project_Model.Libs.BPE.Utils.vocabulary import load_nanos_vocabulary, save_nanos_vocabulary
|
||||
from pathlib import Path
|
||||
|
||||
DICTIONARY_PATH = "Assets/Dataset/Tmp/mad_cache.json"
|
||||
OUTPUT_PATH = "Assets/Dataset/Tmp/trimmed.json"
|
||||
|
||||
|
||||
big_dict = load_nanos_vocabulary(Path(DICTIONARY_PATH))
|
||||
big_dict = dict(list(big_dict.items())[:31744])
|
||||
|
||||
save_nanos_vocabulary(big_dict,Path(OUTPUT_PATH))
|
||||
Loading…
x
Reference in New Issue
Block a user