typo
This commit is contained in:
parent
1c0ddb8753
commit
93865bee8a
@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": null,
|
||||||
"id": "adbd9598",
|
"id": "adbd9598",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -8357,7 +8357,7 @@
|
|||||||
" ):\n",
|
" ):\n",
|
||||||
" decoded_sentence = TOKENANO.decode(encoded_sentence)\n",
|
" decoded_sentence = TOKENANO.decode(encoded_sentence)\n",
|
||||||
" decoded_target = TOKENANO.decode(expected_sentence)\n",
|
" decoded_target = TOKENANO.decode(expected_sentence)\n",
|
||||||
" print(f\"\\tACTUAL:\\n\\t{decoded_sentence}\\n\\tEXPECTED:\\n\\t{decoded_target}\\n\")\n",
|
" print(f\"\\tACTUAL:\\n\\t\\t{decoded_sentence}\\n\\tEXPECTED:\\n\\t\\t{decoded_target}\\n\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|||||||
@ -27,9 +27,9 @@ TOKENANO = BPE.TokeNanoCore(VOCABULARY, SPECIAL_VOC)
|
|||||||
TOKEN_SPACE_SIZE = TOKENANO.vocabulary_size + 1
|
TOKEN_SPACE_SIZE = TOKENANO.vocabulary_size + 1
|
||||||
EMBEDDED_SIZE = 256
|
EMBEDDED_SIZE = 256
|
||||||
FEED_FORWARD_MULTIPLIER = 4
|
FEED_FORWARD_MULTIPLIER = 4
|
||||||
ATTENTION_HEADS = 4
|
ATTENTION_HEADS = 8
|
||||||
SENTENCE_LENGTH = 256
|
SENTENCE_LENGTH = 256
|
||||||
NUMBER_OF_BLOCKS = 2
|
NUMBER_OF_BLOCKS = 4
|
||||||
MAX_EPOCHS = int(1e3)
|
MAX_EPOCHS = int(1e3)
|
||||||
|
|
||||||
|
|
||||||
@ -72,6 +72,25 @@ for index, row in TOY_DATASET.iterrows():
|
|||||||
TOY_BATCH_TARGET_LIST.append(output_tokens)
|
TOY_BATCH_TARGET_LIST.append(output_tokens)
|
||||||
TOY_BATCH_DECODER_DEFAULT.append(decoder_default_tokens)
|
TOY_BATCH_DECODER_DEFAULT.append(decoder_default_tokens)
|
||||||
|
|
||||||
|
output_tokens = TOKENANO.encode(RDFs)
|
||||||
|
input_tokens = TOKENANO.encode(Abstract)[1:]
|
||||||
|
decoder_default_tokens = TOKENANO.encode("<SOS>")
|
||||||
|
|
||||||
|
input_tokens, padding = Transformer.normalize_sequence(
|
||||||
|
input_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN
|
||||||
|
)
|
||||||
|
output_tokens, _ = Transformer.normalize_sequence(
|
||||||
|
output_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN
|
||||||
|
)
|
||||||
|
decoder_default_tokens, _ = Transformer.normalize_sequence(
|
||||||
|
decoder_default_tokens, SENTENCE_LENGTH, PAD_TOKEN, END_TOKEN, False
|
||||||
|
)
|
||||||
|
|
||||||
|
TOY_BATCH_INPUT_LIST.append(input_tokens)
|
||||||
|
TOY_BATCH_PADDING_LIST.append(padding)
|
||||||
|
TOY_BATCH_TARGET_LIST.append(output_tokens)
|
||||||
|
TOY_BATCH_DECODER_DEFAULT.append(decoder_default_tokens)
|
||||||
|
|
||||||
# Training loop
|
# Training loop
|
||||||
LOSS_HISTORY = []
|
LOSS_HISTORY = []
|
||||||
NANOSOCRATES = Transformer.TrainingModel(
|
NANOSOCRATES = Transformer.TrainingModel(
|
||||||
@ -91,15 +110,14 @@ while current_epoch < MAX_EPOCHS:
|
|||||||
|
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
|
|
||||||
encoder_list = torch.tensor([TOY_BATCH_INPUT_LIST[0]])
|
encoder_list = torch.tensor(TOY_BATCH_INPUT_LIST[:])
|
||||||
decoder_list = torch.tensor([TOY_BATCH_DECODER_DEFAULT[0]])
|
decoder_list = torch.tensor(TOY_BATCH_DECODER_DEFAULT[:])
|
||||||
src_padding = torch.tensor([TOY_BATCH_PADDING_LIST[0]], dtype=torch.bool)
|
src_padding = torch.tensor(TOY_BATCH_PADDING_LIST[:], dtype=torch.bool)
|
||||||
|
|
||||||
# Transform target into logits
|
# Transform target into logits
|
||||||
target_logits = torch.tensor([TOY_BATCH_TARGET_LIST[0]])
|
target_logits = torch.tensor(TOY_BATCH_TARGET_LIST[:])
|
||||||
|
|
||||||
last_loss = 0
|
last_loss = 0
|
||||||
loss_list = []
|
|
||||||
last_prediction: torch.Tensor
|
last_prediction: torch.Tensor
|
||||||
|
|
||||||
for i in range(0, SENTENCE_LENGTH):
|
for i in range(0, SENTENCE_LENGTH):
|
||||||
@ -116,11 +134,10 @@ while current_epoch < MAX_EPOCHS:
|
|||||||
logits = logits[:,:i,:]
|
logits = logits[:,:i,:]
|
||||||
logits = logits.permute(0, 2, 1)
|
logits = logits.permute(0, 2, 1)
|
||||||
|
|
||||||
loss : torch.Tensor = cross_entropy(logits, target_logits[:, :i])
|
loss : torch.Tensor = cross_entropy(logits, target_logits[:, 0:i])
|
||||||
# loss : torch.Tensor = cross_entropy(logits, target_logits)
|
# loss : torch.Tensor = cross_entropy(logits, target_logits)
|
||||||
|
|
||||||
last_loss = loss
|
last_loss = loss
|
||||||
loss_list.append(loss.item())
|
|
||||||
loss.backward()
|
loss.backward()
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
scheduler.step()
|
scheduler.step()
|
||||||
@ -136,10 +153,7 @@ while current_epoch < MAX_EPOCHS:
|
|||||||
current_epoch += 1
|
current_epoch += 1
|
||||||
|
|
||||||
if current_epoch % 1 == 0:
|
if current_epoch % 1 == 0:
|
||||||
loss_list = loss_list[1:]
|
|
||||||
print(f"EPOCH {current_epoch}\n\tLoss: {last_loss}")
|
print(f"EPOCH {current_epoch}\n\tLoss: {last_loss}")
|
||||||
print(f"ALL LOSS HISTORY:{loss_list}")
|
|
||||||
print(f"Max loss:{max(loss_list)}, Min loss: {min(loss_list)}")
|
|
||||||
|
|
||||||
for encoded_sentence, expected_sentence in zip(
|
for encoded_sentence, expected_sentence in zip(
|
||||||
Transformer.tensor2token(last_prediction[:,:], END_TOKEN), # type: ignore
|
Transformer.tensor2token(last_prediction[:,:], END_TOKEN), # type: ignore
|
||||||
@ -147,7 +161,7 @@ while current_epoch < MAX_EPOCHS:
|
|||||||
):
|
):
|
||||||
decoded_sentence = TOKENANO.decode(encoded_sentence)
|
decoded_sentence = TOKENANO.decode(encoded_sentence)
|
||||||
decoded_target = TOKENANO.decode(expected_sentence)
|
decoded_target = TOKENANO.decode(expected_sentence)
|
||||||
print(f"ACTUAL:\n\t\t{decoded_sentence}\nEXPECTED:\n\t\t{decoded_target}")
|
print(f"\tACTUAL:\n\t\t{decoded_sentence}\n\tEXPECTED:\n\t\t{decoded_target}\n")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user