Updated NanoSocratesSpecial to work with TokeNano
This commit is contained in:
parent
7c935d2700
commit
a5b8692a77
@ -5,34 +5,43 @@ class NanoSocratesSpecial(Encoder):
|
|||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
initial_vocabulary: list[str] | None = None
|
vocabulary_index: int ,
|
||||||
) -> None:
|
vocabulary: dict[str, int] | None = None
|
||||||
|
) -> None:
|
||||||
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.__vocabulary: dict[str, int] = {}
|
if vocabulary is None:
|
||||||
|
self.__vocabulary: dict[str, int] = {}
|
||||||
|
else:
|
||||||
|
self.__vocabulary: dict[str, int] = vocabulary
|
||||||
|
|
||||||
self.__reverse_vocabulary: dict[int, str] = {}
|
self.__reverse_vocabulary: dict[int, str] = {}
|
||||||
self.__current_index = 0
|
|
||||||
|
|
||||||
if initial_vocabulary is None:
|
if vocabulary_index is None:
|
||||||
return
|
self.__vocabulary_index = 0
|
||||||
|
else:
|
||||||
|
self.__vocabulary_index = vocabulary_index
|
||||||
|
|
||||||
for word in initial_vocabulary:
|
# self.__build_reverse_vocabulary()
|
||||||
|
|
||||||
CURRENT_INDEX = self.__current_index
|
|
||||||
self.__vocabulary[word] = CURRENT_INDEX
|
|
||||||
self.__reverse_vocabulary[CURRENT_INDEX] = word
|
|
||||||
|
|
||||||
self.__current_index += 1
|
|
||||||
|
|
||||||
@property
|
def build_reverse_vocabulary(self):
|
||||||
def vocabulary_size(self):
|
self.__reverse_vocabulary = {v: k for k, v in self.__vocabulary.items()}
|
||||||
return self.__current_index
|
|
||||||
|
|
||||||
def add_special_word(self, word:str):
|
# @property
|
||||||
CURRENT_INDEX = self.__current_index
|
# def vocabulary_size(self):
|
||||||
|
# return self.__current_index
|
||||||
|
|
||||||
|
def set_vocabulary_index(self, vocabulary_index: int):
|
||||||
|
self.__vocabulary_index = vocabulary_index
|
||||||
|
|
||||||
|
def add_special_word_to_vocabulary(self, word:str):
|
||||||
|
self.__vocabulary_index = self.__vocabulary_index + 1
|
||||||
|
CURRENT_INDEX = self.__vocabulary_index
|
||||||
self.__vocabulary[word] = CURRENT_INDEX
|
self.__vocabulary[word] = CURRENT_INDEX
|
||||||
self.__reverse_vocabulary[CURRENT_INDEX] = word
|
self.__reverse_vocabulary[CURRENT_INDEX] = word
|
||||||
self.__current_index += 1
|
|
||||||
|
|
||||||
def encode(self, word: str) -> list[int]:
|
def encode(self, word: str) -> list[int]:
|
||||||
ID = self.__vocabulary.get(word)
|
ID = self.__vocabulary.get(word)
|
||||||
@ -52,3 +61,5 @@ class NanoSocratesSpecial(Encoder):
|
|||||||
|
|
||||||
return WORD
|
return WORD
|
||||||
|
|
||||||
|
def get_reverse_vocabulary(self)-> dict[int, str]:
|
||||||
|
return self.__reverse_vocabulary
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user