Added Special Encoder
This commit is contained in:
parent
d179e01971
commit
b46df4f91a
4
Project_Model/Libs/BPE/Classes/Encoder.py
Normal file
4
Project_Model/Libs/BPE/Classes/Encoder.py
Normal file
@ -0,0 +1,4 @@
|
||||
from abc import ABC
|
||||
|
||||
class Encoder(ABC):
|
||||
pass
|
||||
54
Project_Model/Libs/BPE/Classes/NanoSocratesSpecial.py
Normal file
54
Project_Model/Libs/BPE/Classes/NanoSocratesSpecial.py
Normal file
@ -0,0 +1,54 @@
|
||||
from .Encoder import Encoder
|
||||
from ..Errors import OutOfDictionaryException
|
||||
|
||||
class NanoSocratesSpecial(Encoder):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
initial_vocabulary: list[str] | None = None
|
||||
) -> None:
|
||||
super().__init__()
|
||||
|
||||
self.__vocabulary: dict[str, int] = {}
|
||||
self.__reverse_vocabulary: dict[int, str] = {}
|
||||
self.__current_index = 0
|
||||
|
||||
if initial_vocabulary is None:
|
||||
return
|
||||
|
||||
for word in initial_vocabulary:
|
||||
|
||||
CURRENT_INDEX = self.__current_index
|
||||
self.__vocabulary[word] = CURRENT_INDEX
|
||||
self.__reverse_vocabulary[CURRENT_INDEX] = word
|
||||
|
||||
self.__current_index += 1
|
||||
|
||||
@property
|
||||
def vocabulary_size(self):
|
||||
return self.__current_index
|
||||
|
||||
def add_special_word(self, word:str):
|
||||
CURRENT_INDEX = self.__current_index
|
||||
self.__vocabulary[word] = CURRENT_INDEX
|
||||
self.__reverse_vocabulary[CURRENT_INDEX] = word
|
||||
self.__current_index += 1
|
||||
|
||||
def encode(self, word: str) -> list[int]:
|
||||
ID = self.__vocabulary.get(word)
|
||||
|
||||
if ID is None:
|
||||
raise OutOfDictionaryException()
|
||||
|
||||
return [ID]
|
||||
|
||||
def decode(self, token_id: int) -> str:
|
||||
|
||||
ID = token_id
|
||||
WORD = self.__reverse_vocabulary.get(ID)
|
||||
|
||||
if WORD is None:
|
||||
raise OutOfDictionaryException()
|
||||
|
||||
return WORD
|
||||
|
||||
@ -0,0 +1,4 @@
|
||||
class OutOfDictionaryException(Exception):
|
||||
|
||||
def __init__(self, *args: object) -> None:
|
||||
super().__init__(*args)
|
||||
Loading…
x
Reference in New Issue
Block a user