Added small dataset

This commit is contained in:
Christian Risi 2025-10-07 20:44:40 +02:00
parent 3f465991f0
commit d1ff88da82
5 changed files with 14 additions and 3 deletions

BIN
Assets/Dataset/1-hop/curated/corpus.txt (Stored with Git LFS) Normal file

Binary file not shown.

BIN
Assets/Dataset/1-hop/small/corpus.txt (Stored with Git LFS) Normal file

Binary file not shown.

BIN
Assets/Dataset/1-hop/small/rdf_completation.csv (Stored with Git LFS) Normal file

Binary file not shown.
1 version https://git-lfs.github.com/spec/v1
2 oid sha256:80da574017b251c9f07ecbce837d9d36a9ee8183a2a3bdbe0a2e31e22226ab79
3 size 12773126

BIN
Assets/Dataset/1-hop/small/rdf_text.csv (Stored with Git LFS) Normal file

Binary file not shown.
1 version https://git-lfs.github.com/spec/v1
2 oid sha256:41b30ab739a01482036c40b6560adfe751c5905ae80aafef6ee0f1a716849c68
3 size 13222824

View File

@ -45,9 +45,8 @@ def normalize_sequence(
pad_token: int,
end_token: int,
) -> tuple[list[int], list[bool]]:
new_sequence = pad_sequence(sequence, max_length, pad_token)
new_sequence = truncate_sequence(new_sequence, max_length, end_token)
new_sequence = truncate_sequence(sequence, max_length, end_token)
new_sequence = pad_sequence(new_sequence, max_length, pad_token)
PADDING_MASK = create_padding_mask(new_sequence, pad_token)
return (new_sequence, PADDING_MASK)