This commit is contained in:
GassiGiuseppe 2025-10-07 23:16:20 +02:00
commit 7027414342
13 changed files with 348 additions and 6 deletions

BIN
Assets/Dataset/1-hop/curated/corpus.txt (Stored with Git LFS) Normal file

Binary file not shown.

BIN
Assets/Dataset/1-hop/small/corpus.txt (Stored with Git LFS) Normal file

Binary file not shown.

BIN
Assets/Dataset/1-hop/small/rdf_completation.csv (Stored with Git LFS) Normal file

Binary file not shown.
1 version https://git-lfs.github.com/spec/v1
2 oid sha256:80da574017b251c9f07ecbce837d9d36a9ee8183a2a3bdbe0a2e31e22226ab79
3 size 12773126

BIN
Assets/Dataset/1-hop/small/rdf_text.csv (Stored with Git LFS) Normal file

Binary file not shown.
1 version https://git-lfs.github.com/spec/v1
2 oid sha256:41b30ab739a01482036c40b6560adfe751c5905ae80aafef6ee0f1a716849c68
3 size 13222824

BIN
Assets/Dataset/1-hop/toy/corpus.txt (Stored with Git LFS) Normal file

Binary file not shown.

BIN
Assets/Dataset/1-hop/toy/rdf_completation.csv (Stored with Git LFS) Normal file

Binary file not shown.
1 version https://git-lfs.github.com/spec/v1
2 oid sha256:39012a1e59eaa740d01515aa6b9744267dbb3ae13941b28558060795a94d90e0
3 size 86122

BIN
Assets/Dataset/1-hop/toy/rdf_mask.csv (Stored with Git LFS) Normal file

Binary file not shown.
1 version https://git-lfs.github.com/spec/v1
2 oid sha256:8f31602eba47f7daff3b13bb243abaf429ff5900a8d26ae854ba790fda47d287
3 size 517642

BIN
Assets/Dataset/1-hop/toy/rdf_text.csv (Stored with Git LFS) Normal file

Binary file not shown.
1 version https://git-lfs.github.com/spec/v1
2 oid sha256:1189e04d3ba9d9138a4e216200313f5842b8a49de1745bb553ba2e3abf18d818
3 size 102533

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": null,
"id": "f5762da9", "id": "f5762da9",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -127,6 +127,8 @@
"\n", "\n",
"\n", "\n",
"\n", "\n",
"\n",
"\n",
"\n" "\n"
] ]
} }

File diff suppressed because one or more lines are too long

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"id": "4ae47336", "id": "4ae47336",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -15,6 +15,77 @@
"mha = torch.nn.MultiheadAttention(D, num_heads=4, batch_first=True)\n", "mha = torch.nn.MultiheadAttention(D, num_heads=4, batch_first=True)\n",
"y, _ = mha(x, x, x, attn_mask=attn_mask, key_padding_mask=pad_mask) # should work\n" "y, _ = mha(x, x, x, attn_mask=attn_mask, key_padding_mask=pad_mask) # should work\n"
] ]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e38e3fb5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],\n",
" [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],\n",
" [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]],\n",
"\n",
" [[0, 0, 1, 0, 0, 0, 0, 0, 0, 0],\n",
" [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],\n",
" [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]]])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"torch.nn.functional.one_hot(torch.tensor([\n",
" [4, 1, 9],\n",
" [2,4,5]\n",
"]))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "7119ad53",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"device(type='cpu')"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"torch.get_default_device()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "8c95691a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"xpu\n"
]
}
],
"source": [
"from Project_Model.Libs.TorchShims import get_default_device\n",
"\n",
"print(get_default_device())"
]
} }
], ],
"metadata": { "metadata": {

View File

@ -21,4 +21,7 @@ class SpecialToken(Enum):
# NanoSocrates # NanoSocrates
START = "<START>" START = "<START>"
CORPUS_END = "<END>" CORPUS_END = "<END>"
PAD = "<PAD>" START_OF_SEQUENCE = "<SOS>"
END_OF_SEQUENCE = "<EOS>"
PAD = "<PAD>"

View File

@ -45,9 +45,8 @@ def normalize_sequence(
pad_token: int, pad_token: int,
end_token: int, end_token: int,
) -> tuple[list[int], list[bool]]: ) -> tuple[list[int], list[bool]]:
new_sequence = truncate_sequence(sequence, max_length, end_token)
new_sequence = pad_sequence(sequence, max_length, pad_token) new_sequence = pad_sequence(new_sequence, max_length, pad_token)
new_sequence = truncate_sequence(new_sequence, max_length, end_token)
PADDING_MASK = create_padding_mask(new_sequence, pad_token) PADDING_MASK = create_padding_mask(new_sequence, pad_token)
return (new_sequence, PADDING_MASK) return (new_sequence, PADDING_MASK)