diff --git a/Script/fetchdata.py b/Script/fetchdata.py index 7573751..e1cbf7a 100644 --- a/Script/fetchdata.py +++ b/Script/fetchdata.py @@ -1,3 +1,4 @@ +from math import floor from time import sleep import SPARQLWrapper import requests @@ -8,9 +9,11 @@ TYPE = SPARQLWrapper.CSV TIMEOUT_SECONDS = 1.5 LIMIT = int(1E4) OFFSET = LIMIT -INITIAL_OFFSET = 0 +INITIAL_OFFSET = 15200000 MAX_PAGES = int(1E9) +# Missing page 13220000 + FILE_URI = "./Assets/Dataset/1-hop/dataset.csv" QUERY = """ @@ -35,13 +38,13 @@ WHERE { def main(): exit = False - page = INITIAL_OFFSET + page = int(floor(INITIAL_OFFSET / LIMIT)) -1 while not exit: print(f"Starting to get page {page}") - CURRENT_OFFSET = OFFSET * page + CURRENT_OFFSET = int(OFFSET + (page * LIMIT)) sparql = SPARQLWrapper.SPARQLWrapper(BASE_URL) sparql.setReturnFormat(TYPE)