Added Special Encoder

This commit is contained in:
Christian Risi 2025-09-28 18:03:47 +02:00
parent d179e01971
commit b46df4f91a
3 changed files with 62 additions and 0 deletions

View File

@ -0,0 +1,4 @@
from abc import ABC
class Encoder(ABC):
pass

View File

@ -0,0 +1,54 @@
from .Encoder import Encoder
from ..Errors import OutOfDictionaryException
class NanoSocratesSpecial(Encoder):
def __init__(
self,
initial_vocabulary: list[str] | None = None
) -> None:
super().__init__()
self.__vocabulary: dict[str, int] = {}
self.__reverse_vocabulary: dict[int, str] = {}
self.__current_index = 0
if initial_vocabulary is None:
return
for word in initial_vocabulary:
CURRENT_INDEX = self.__current_index
self.__vocabulary[word] = CURRENT_INDEX
self.__reverse_vocabulary[CURRENT_INDEX] = word
self.__current_index += 1
@property
def vocabulary_size(self):
return self.__current_index
def add_special_word(self, word:str):
CURRENT_INDEX = self.__current_index
self.__vocabulary[word] = CURRENT_INDEX
self.__reverse_vocabulary[CURRENT_INDEX] = word
self.__current_index += 1
def encode(self, word: str) -> list[int]:
ID = self.__vocabulary.get(word)
if ID is None:
raise OutOfDictionaryException()
return [ID]
def decode(self, token_id: int) -> str:
ID = token_id
WORD = self.__reverse_vocabulary.get(ID)
if WORD is None:
raise OutOfDictionaryException()
return WORD

View File

@ -0,0 +1,4 @@
class OutOfDictionaryException(Exception):
def __init__(self, *args: object) -> None:
super().__init__(*args)