from .Encoder import Encoder
from ..Errors import OutOfDictionaryException


class NanoSocratesSpecial(Encoder):

    def __init__(
        self, bpe_vocabulary_size: int, special_tokens: list[str] = []
    ) -> None:

        super().__init__()

        self.__bpe_offset = bpe_vocabulary_size
        self.__vocabulary: dict[str, int] = {}
        self.__reverse_vocabulary: dict[int, str] = {}

        if len(special_tokens) == 0:
            return

        for index, TOKEN in zip(range(0, len(special_tokens)), special_tokens):

            CANDIDATE_ID = self.__bpe_offset + index + 1
            self.__vocabulary[TOKEN] = CANDIDATE_ID
            self.__reverse_vocabulary[CANDIDATE_ID] = TOKEN

    @property
    def __next_id(self):
        BPE_OFFSET = self.__bpe_offset
        VOC_LENGTH = len(self.__vocabulary)
        return BPE_OFFSET + VOC_LENGTH + 1

    @property
    def vocabulary_size(self) -> int:
        return len(self.vocabulary)

    @property
    def vocabulary(self) -> dict[str, int]:
        return self.__vocabulary

    @property
    def reverse_vocabulary(self) -> dict[int, str]:
        return self.__reverse_vocabulary

    def add_special_word_to_vocabulary(self, word: str):
        CANDIDATE_INDEX = self.__next_id
        self.__vocabulary[word] = CANDIDATE_INDEX
        self.__reverse_vocabulary[CANDIDATE_INDEX] = word

    def encode(self, word: str) -> list[int]:
        ID = self.__vocabulary.get(word)

        if ID is None:
            raise OutOfDictionaryException()

        return [ID]

    def decode(self, token_id: list[int]) -> str:

        if len(token_id) != 1:
            raise OutOfDictionaryException()

        ID = token_id[0]
        WORD = self.__reverse_vocabulary.get(ID)

        if WORD is None:
            raise OutOfDictionaryException()

        return WORD