diff --git a/.gitattributes b/.gitattributes index 9449b78..02fa4a6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,3 @@ Exam/Deep_Learning_2025_VIII.pdf filter=lfs diff=lfs merge=lfs -text Assets/** filter=lfs diff=lfs merge=lfs -text +Assets/Dataset/1-hop/dataset.csv filter=lfs diff=lfs merge=lfs -text diff --git a/Assets/Dataset/1-hop/dataset.csv b/Assets/Dataset/1-hop/dataset.csv index e723295..bdc31b7 100644 --- a/Assets/Dataset/1-hop/dataset.csv +++ b/Assets/Dataset/1-hop/dataset.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c68dcddd7cc45445504836ebd40f3dc0df1dfe4de244e09a346390f0f0dbefc5 -size 81313652 +oid sha256:331d8ef4e99c5200f1323e7149bd8aade39dc17ee5778b553bb32c593ff601cf +size 2443211793 diff --git a/Assets/Dataset/1-hop/movies.csv b/Assets/Dataset/1-hop/movies.csv index e826a3f..881e9a4 100644 --- a/Assets/Dataset/1-hop/movies.csv +++ b/Assets/Dataset/1-hop/movies.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:66163b5ce1e35c685b886b6de9b662f7d2571c2397256a043f80b23507b648c6 -size 9120409 +oid sha256:8d81c8801ea79bd46747769a288cd0c507b3b94b2fb4bbb9605e282776ca5efb +size 8808636 diff --git a/Script/fetchdata.py b/Script/fetchdata.py index 7573751..e1cbf7a 100644 --- a/Script/fetchdata.py +++ b/Script/fetchdata.py @@ -1,3 +1,4 @@ +from math import floor from time import sleep import SPARQLWrapper import requests @@ -8,9 +9,11 @@ TYPE = SPARQLWrapper.CSV TIMEOUT_SECONDS = 1.5 LIMIT = int(1E4) OFFSET = LIMIT -INITIAL_OFFSET = 0 +INITIAL_OFFSET = 15200000 MAX_PAGES = int(1E9) +# Missing page 13220000 + FILE_URI = "./Assets/Dataset/1-hop/dataset.csv" QUERY = """ @@ -35,13 +38,13 @@ WHERE { def main(): exit = False - page = INITIAL_OFFSET + page = int(floor(INITIAL_OFFSET / LIMIT)) -1 while not exit: print(f"Starting to get page {page}") - CURRENT_OFFSET = OFFSET * page + CURRENT_OFFSET = int(OFFSET + (page * LIMIT)) sparql = SPARQLWrapper.SPARQLWrapper(BASE_URL) sparql.setReturnFormat(TYPE)