From 4315d701091cbee0d69943261a46416a55618417 Mon Sep 17 00:00:00 2001 From: Christian Risi <75698846+CnF-Gris@users.noreply.github.com> Date: Wed, 24 Sep 2025 19:29:43 +0200 Subject: [PATCH] Merged abbreviation_datawarehouse into datawarehouse --- .../abbrevietion_datawarehouse.py | 105 ----- Scripts/DatasetMerging/datawarehouse.py | 410 ++++++++++++++---- 2 files changed, 334 insertions(+), 181 deletions(-) delete mode 100644 Scripts/DataCleaning/abbrevietion_datawarehouse.py diff --git a/Scripts/DataCleaning/abbrevietion_datawarehouse.py b/Scripts/DataCleaning/abbrevietion_datawarehouse.py deleted file mode 100644 index bc88cd5..0000000 --- a/Scripts/DataCleaning/abbrevietion_datawarehouse.py +++ /dev/null @@ -1,105 +0,0 @@ -import sqlite3 -import csv -import pandas as pd - -DB_NAME = "./Assets/Dataset/DatawareHouse/dataset.db" -CSV_MAPPER = "./Assets/Dataset/1-hop/uri-abbreviations.csv" -# MAPPER_HANDLER = open(CSV_MAPPER,"r",newline='', encoding="utf-8") -mapper = pd.read_csv(CSV_MAPPER) -mapper_key_list = mapper["uri"].to_list() -mapper_value_list = mapper["abbreviation"].to_list() - -CONN = sqlite3.connect(DB_NAME) -CURS = CONN.cursor() - -def insert_abbreviation(uri, abbreviation) -> bool: - QUERY = "INSERT INTO Abbreviations(URI, Abbreviation) VALUES (?,?);" - try: - CURS.execute(QUERY,[uri, abbreviation]) - return True - except sqlite3.IntegrityError: - return False - -def inserto_object_abbreviation(object_id, abbreviation_id) -> bool: - QUERY = "INSERT INTO Objects_Abbreviations(ObjectID, AbrreviationID) VALUES (?,?);" - try: - CURS.execute(QUERY,[object_id, abbreviation_id]) - return True - except sqlite3.IntegrityError: - return False - -def insert_relationship_abbreviation(relationship_id, abbreviation_id) -> bool: - QUERY = "INSERT INTO Relationships_Abbreviations(RelationshipID, AbbreviationID) VALUES (?,?);" - try: - CURS.execute(QUERY,[relationship_id, abbreviation_id]) - return True - except sqlite3.IntegrityError: - return False - -def insert_subject_abbreviation(subject_id, abbreviation_id) -> bool: - QUERY = "INSERT INTO Subjects_Abbreviations(SubjectID, AbbreviationID) VALUES (?,?);" - try: - CURS.execute(QUERY,[subject_id, abbreviation_id]) - return True - except sqlite3.IntegrityError: - return False - -def select_abbreviation_id(uri) -> int | None: - QUERY = "SELECT AbbreviationID FROM Abbreviations WHERE URI LIKE ?;" - CURS.execute(QUERY, [uri]) - abbreviation_id = CURS.fetchone() - if not abbreviation_id: - return None - - # in this case the real id is the first element of the tuple - return abbreviation_id[0] - -def parser(element: pd.DataFrame): - # df.replace(['Boston Celtics', 'Amir Johnson', 'R.J. Hunter'], - # ['Omega Warriors', 'Mitcell Johnson', 'Shivang Thomas']) - return element.replace(mapper_key_list, mapper_value_list) -# # map by csv - - - -def populate(): - # get subject, relationships, objects - # for index, row in df.iterrows(): - Subjects = pd.read_sql_query('SELECT * FROM Subjects;', CONN) - Objects = pd.read_sql_query('SELECT * FROM Objects;', CONN) - Relationships = pd.read_sql_query('SELECT * FROM Relationships;', CONN) - # add at each df their abbreviation - Subjects["Abbreviation"] = Subjects["SubjectURI"] - Objects["Abbreviation"] = Objects["ObjectURI"] - Relationships["Abbreviation"] = Relationships["RelationshipURI"] - - - for index, row in Subjects.iterrows(): - subject_uri = row["SubjectURI"] - subject_id = row["SubjectID"] - abbreviation = parser(subject_uri) - insert_abbreviation(subject_uri,abbreviation) - abbreviation_id = select_abbreviation_id(subject_uri) - insert_subject_abbreviation(subject_id,abbreviation_id) - - for index, row in Objects.iterrows(): - object_uri = row["ObjectURI"] - object_id = row["ObjectID"] - abbreviation = parser(object_uri) - insert_abbreviation(object_uri,abbreviation) - abbreviation_id = select_abbreviation_id(object_uri) - insert_subject_abbreviation(object_id,abbreviation_id) - - for index, row in Relationships.iterrows(): - relationship_uri = row["RelationshipURI"] - relationship_id = row["RelationshipID"] - abbreviation = parser(relationship_uri) - insert_abbreviation(relationship_uri,abbreviation) - abbreviation_id = select_abbreviation_id(relationship_uri) - insert_subject_abbreviation(relationship_id,abbreviation_id) - - -CONN.commit() -CONN.close() - -# MAPPER_HANDLER.close() \ No newline at end of file diff --git a/Scripts/DatasetMerging/datawarehouse.py b/Scripts/DatasetMerging/datawarehouse.py index f28c2e8..7bb5da9 100644 --- a/Scripts/DatasetMerging/datawarehouse.py +++ b/Scripts/DatasetMerging/datawarehouse.py @@ -8,7 +8,7 @@ import csv ##################################################################### # sometimes you may need to build a new db file, here a little snippet for you -# sqlite3 ./Assets/Dataset/Tmp/dataset.db < ./Scripts/DataCleaning/SQL_Queries/db_creation.sql +# sqlite3 ./Assets/Dataset/Tmp/dataset.db < ./Scripts/DataCleaning/SQL_Queries/db_creation.sql # --- Global configuration --- DB_NAME = "./Assets/Dataset/DatawareHouse/dataset.db" @@ -17,12 +17,15 @@ PAGEID_CSV = "./Assets/Dataset/1-hop/movie-pageid.csv" SUMMARY_CSV = "./Assets/Dataset/1-hop/wikipedia-summary.csv" DATASET_CSV = "./Assets/Dataset/1-hop/dataset.csv" REVERSE_CSV = "./Assets/Dataset/1-hop/reverse.csv" +URI_CSV = "./Assets/Dataset/1-hop/uri-abbreviations.csv" + +MOVIES_CSV_HANDLER = open(MOVIES_CSV, "r", newline="", encoding="utf-8") +PAGEID_CSV_HANDLER = open(PAGEID_CSV, "r", newline="", encoding="utf-8") +SUMMARY_CSV_HANDLER = open(SUMMARY_CSV, "r", newline="", encoding="utf-8") +DATASET_CSV_HANDLER = open(DATASET_CSV, "r", newline="", encoding="utf-8") +REVERSE_CSV_HANDLER = open(REVERSE_CSV, "r", newline="", encoding="utf-8") +URI_ABBR_CSV_HANDLER = open(URI_CSV, "r", newline="", encoding="utf-8") -MOVIES_CSV_HANDLER = open(MOVIES_CSV,"r",newline='', encoding="utf-8") -PAGEID_CSV_HANDLER = open(PAGEID_CSV,"r",newline='', encoding="utf-8") -SUMMARY_CSV_HANDLER = open(SUMMARY_CSV,"r",newline='', encoding="utf-8") -DATASET_CSV_HANDLER = open(DATASET_CSV,"r",newline='', encoding="utf-8") -REVERSE_CSV_HANDLER = open(REVERSE_CSV,"r",newline='', encoding="utf-8") CONN = sqlite3.connect(DB_NAME) CURS = CONN.cursor() @@ -30,7 +33,8 @@ CURS = CONN.cursor() # MARK: SQL Definitions # Insert MovieURI -def insertOrigin(curs : sqlite3.Cursor ) -> bool: + +def insertOrigin(curs: sqlite3.Cursor) -> bool: QUERY = "INSERT INTO Origins (OriginName) VALUES ('dataset.csv'),('reverse.csv');" try: @@ -38,24 +42,26 @@ def insertOrigin(curs : sqlite3.Cursor ) -> bool: return True except sqlite3.IntegrityError: return False - + + def selectOrigin(curs: sqlite3.Cursor, originName: str) -> int | None: QUERY = "SELECT OriginID FROM Origins WHERE OriginName = ?;" - + curs.execute(QUERY, [originName]) originId = curs.fetchone() if not originId: return None - + # in this case the real id is the first element of the tuple return originId[0] -def insertMovie(curs : sqlite3.Cursor , movieUri: str) -> bool: + +def insertMovie(curs: sqlite3.Cursor, movieUri: str) -> bool: QUERY = "INSERT INTO Movies (MovieURI) VALUES (?);" try: - curs.execute(QUERY,[movieUri]) + curs.execute(QUERY, [movieUri]) return True except sqlite3.IntegrityError: return False @@ -64,12 +70,12 @@ def insertMovie(curs : sqlite3.Cursor , movieUri: str) -> bool: def selectMovieId(curs: sqlite3.Cursor, movieUri: str) -> int | None: QUERY = "SELECT MovieID FROM Movies WHERE MovieURI = ?;" - + curs.execute(QUERY, [movieUri]) movieId = curs.fetchone() if not movieId: return None - + # in this case the real id is the first element of the tuple return movieId[0] @@ -77,105 +83,164 @@ def selectMovieId(curs: sqlite3.Cursor, movieUri: str) -> int | None: def insertWikiPageId(curs: sqlite3.Cursor, movieId: int, pageId: int) -> bool: QUERY = "INSERT INTO WikiPageIDs (MovieID, PageID) VALUES (?,?);" try: - curs.execute(QUERY,[movieId, pageId]) + curs.execute(QUERY, [movieId, pageId]) return True except sqlite3.IntegrityError: return False - -def selectMovieIdFromWikiPageId(curs: sqlite3.Cursor,pageId: int) -> int | None: + + +def selectMovieIdFromWikiPageId(curs: sqlite3.Cursor, pageId: int) -> int | None: QUERY = "SELECT MovieID FROM WikiPageIDs WHERE PageID = ?;" - + curs.execute(QUERY, [pageId]) movieId = curs.fetchone() if not movieId: return None - + # in this case the real id is the first element of the tuple return movieId[0] + def insertWikiAbstract(curs: sqlite3.Cursor, movieId: int, abstract: str) -> bool: QUERY = "INSERT INTO WikipediaAbstracts (MovieID, Abstract) VALUES (?,?);" try: - curs.execute(QUERY,[movieId, abstract]) + curs.execute(QUERY, [movieId, abstract]) return True except sqlite3.IntegrityError: return False + def insertSubject(curs: sqlite3.Cursor, subjectURI: str, originID: int) -> bool: QUERY = "INSERT INTO Subjects (SubjectURI, OriginID) VALUES (?,?);" try: - curs.execute(QUERY,[subjectURI, originID]) + curs.execute(QUERY, [subjectURI, originID]) return True except sqlite3.IntegrityError: return False - + + def insertRelationship(curs: sqlite3.Cursor, relationshipURI: str) -> bool: QUERY = "INSERT INTO Relationships (RelationshipURI) VALUES (?);" try: - curs.execute(QUERY,[relationshipURI]) + curs.execute(QUERY, [relationshipURI]) return True except sqlite3.IntegrityError: return False + def insertObject(curs: sqlite3.Cursor, objectURI: str, originID: int) -> bool: QUERY = "INSERT INTO objects (ObjectURI, OriginID) VALUES (?,?);" try: - curs.execute(QUERY,[objectURI, originID]) + curs.execute(QUERY, [objectURI, originID]) return True except sqlite3.IntegrityError: return False - + + def selectSubjectId(curs: sqlite3.Cursor, subjectURI: str) -> int | None: QUERY = "SELECT SubjectID FROM Subjects WHERE SubjectURI = ?;" - + curs.execute(QUERY, [subjectURI]) subjectId = curs.fetchone() if not subjectId: return None - + # in this case the real id is the first element of the tuple return subjectId[0] + def selectRelationshipId(curs: sqlite3.Cursor, relationshipURI: str) -> int | None: QUERY = "SELECT RelationshipID FROM Relationships WHERE RelationshipURI = ?;" - + curs.execute(QUERY, [relationshipURI]) relationshipId = curs.fetchone() if not relationshipId: return None - + # in this case the real id is the first element of the tuple return relationshipId[0] + def selectObjectId(curs: sqlite3.Cursor, objectURI: str) -> int | None: QUERY = "SELECT ObjectID FROM Objects WHERE ObjectURI = ?;" - + curs.execute(QUERY, [objectURI]) objectId = curs.fetchone() if not objectId: return None - + # in this case the real id is the first element of the tuple return objectId[0] - + + def insertRDF( - curs: sqlite3.Cursor, - movieId: int, + curs: sqlite3.Cursor, + movieId: int, subjectId: int, relationshipId: int, - objectId: int + objectId: int, ) -> bool: QUERY = "INSERT INTO RDFs (MovieID, SubjectID, RelationshipID, ObjectID) VALUES (?,?,?,?);" try: - curs.execute(QUERY,[movieId,subjectId,relationshipId,objectId]) + curs.execute(QUERY, [movieId, subjectId, relationshipId, objectId]) return True except sqlite3.IntegrityError: return False - + +# UGLY: correct method to add cursor +def insert_abbreviation(uri, abbreviation) -> bool: + QUERY = "INSERT INTO Abbreviations(URI, Abbreviation) VALUES (?,?);" + try: + CURS.execute(QUERY, [uri, abbreviation]) + return True + except sqlite3.IntegrityError: + return False + +# UGLY: correct method to add cursor +def insert_object_abbreviation(object_id, abbreviation_id) -> bool: + QUERY = "INSERT INTO Objects_Abbreviations(ObjectID, AbbreviationID) VALUES (?,?);" + try: + CURS.execute(QUERY, [object_id, abbreviation_id]) + return True + except sqlite3.IntegrityError: + return False + +# UGLY: correct method to add cursor +def insert_relationship_abbreviation(relationship_id, abbreviation_id) -> bool: + QUERY = "INSERT INTO Relationships_Abbreviations(RelationshipID, AbbreviationID) VALUES (?,?);" + try: + CURS.execute(QUERY, [relationship_id, abbreviation_id]) + return True + except sqlite3.IntegrityError: + return False + +# UGLY: correct method to add cursor +def insert_subject_abbreviation(subject_id, abbreviation_id) -> bool: + QUERY = ( + "INSERT INTO Subjects_Abbreviations(SubjectID, AbbreviationID) VALUES (?,?);" + ) + try: + CURS.execute(QUERY, [subject_id, abbreviation_id]) + return True + except sqlite3.IntegrityError: + return False + +# UGLY: correct method to add cursor +def select_abbreviation_id(uri) -> int | None: + QUERY = "SELECT AbbreviationID FROM Abbreviations WHERE URI LIKE ?;" + CURS.execute(QUERY, [uri]) + abbreviation_id = CURS.fetchone() + if not abbreviation_id: + return None + + # in this case the real id is the first element of the tuple + return abbreviation_id[0] + + # MARK: Parsing def parseMovies(): @@ -203,12 +268,11 @@ def parseWikiPageId(): def parseAbstract(): CSV_READER = csv.DictReader(SUMMARY_CSV_HANDLER) for row in CSV_READER: - + WIKI_PAGE_ID = int(row["subject"]) ABSTRACT = row["text"] MOVIE_ID = selectMovieIdFromWikiPageId(CURS, WIKI_PAGE_ID) - if MOVIE_ID is None: print(f"The WikiPageId: {WIKI_PAGE_ID} has not a MovieId ") continue @@ -216,10 +280,24 @@ def parseAbstract(): insertWikiAbstract(CURS, MOVIE_ID, ABSTRACT) +def parseAbbreviations(): + URI_CSV = csv.DictReader(URI_ABBR_CSV_HANDLER) + for row in URI_CSV: + + URI = row["uri"] + ABBREVIATION = row["abbreviation"] + + insert_abbreviation(URI, ABBREVIATION) + + def parseRDF_Reverse(): REVERSE_CSV_READER = csv.DictReader(REVERSE_CSV_HANDLER) - REVERSE_ORIGIN_ID = selectOrigin(CURS, 'reverse.csv') + REVERSE_ORIGIN_ID = selectOrigin(CURS, "reverse.csv") + + if REVERSE_ORIGIN_ID is None: + return + total = 0 for row in REVERSE_CSV_READER: @@ -227,7 +305,7 @@ def parseRDF_Reverse(): RELATIONSHIP = row["relationship"] OBJECT = row["object"] print(f"RDF triplets:\n\t{SUBJECT} - {RELATIONSHIP} - {OBJECT}") - insertSubject(CURS,SUBJECT,REVERSE_ORIGIN_ID) + insertSubject(CURS, SUBJECT, REVERSE_ORIGIN_ID) insertRelationship(CURS, RELATIONSHIP) insertObject(CURS, OBJECT, REVERSE_ORIGIN_ID) @@ -236,7 +314,6 @@ def parseRDF_Reverse(): RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP) MOVIE_ID = selectMovieId(CURS, OBJECT) - skip = False # guard @@ -259,17 +336,19 @@ def parseRDF_Reverse(): if skip: continue - if insertRDF(CURS, MOVIE_ID, SUBJECT_ID, RELATIONSHIP_ID, OBJECT_ID): + if insertRDF(CURS, MOVIE_ID, SUBJECT_ID, RELATIONSHIP_ID, OBJECT_ID): # type: ignore total += 1 print(total) - def parseRDF_Dataset(): DATASET_CSV_READER = csv.DictReader(DATASET_CSV_HANDLER) - DATASET_ORIGIN_ID = selectOrigin(CURS, 'dataset.csv') + DATASET_ORIGIN_ID = selectOrigin(CURS, "dataset.csv") + + if DATASET_ORIGIN_ID is None: + return total = 0 rdf_idx = 0 @@ -284,7 +363,7 @@ def parseRDF_Dataset(): if rdf_idx % 100000 == 0: print(f"RDF number {rdf_idx}:\n\t{SUBJECT} - {RELATIONSHIP} - {OBJECT}") - insertSubject(CURS,SUBJECT,DATASET_ORIGIN_ID) + insertSubject(CURS, SUBJECT, DATASET_ORIGIN_ID) insertRelationship(CURS, RELATIONSHIP) insertObject(CURS, OBJECT, DATASET_ORIGIN_ID) @@ -293,7 +372,6 @@ def parseRDF_Dataset(): RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP) MOVIE_ID = selectMovieId(CURS, SUBJECT) - skip = False # guard @@ -316,24 +394,203 @@ def parseRDF_Dataset(): if skip: continue - if insertRDF(CURS, MOVIE_ID, SUBJECT_ID, RELATIONSHIP_ID, OBJECT_ID): + if insertRDF(CURS, MOVIE_ID, SUBJECT_ID, RELATIONSHIP_ID, OBJECT_ID): # type: ignore total += 1 print(total) - + + +def parseAbbr_Reverse(): + + REVERSE_CSV_READER = csv.DictReader(REVERSE_CSV_HANDLER) + REVERSE_ORIGIN_ID = selectOrigin(CURS, "reverse.csv") + + if REVERSE_ORIGIN_ID is None: + return + + total = 0 + + for row in REVERSE_CSV_READER: + SUBJECT = row["subject"] + RELATIONSHIP = row["relationship"] + OBJECT = row["object"] + + SUBJECT_ID = selectSubjectId(CURS, SUBJECT) + OBJECT_ID = selectObjectId(CURS, OBJECT) + RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP) + + SUB_SECTIONS = SUBJECT.split("/") + REL_SECTIONS = RELATIONSHIP.split("/") + OBJ_SECTIONS = OBJECT.split("/") + + SUB_ABBR_ID = None + REL_ABBR_ID = None + OBJ_ABBR_ID = None + + skip = False + + # guard + if SUBJECT_ID is None: + print(f"No SubjectId for {SUBJECT}") + skip = True + + if OBJECT_ID is None: + print(f"No ObjectId for {OBJECT}") + skip = True + + if RELATIONSHIP_ID is None: + print(f"No RelationshipId for {RELATIONSHIP}") + skip = True + + + if skip: + continue + + if len(SUB_SECTIONS) > 4: + index = min(len(SUB_SECTIONS), 7) + while index > 3: + PATH = "/".join(SUB_SECTIONS[0:index]) + "%" + SUB_ABBR_ID = select_abbreviation_id(PATH) + + if SUB_ABBR_ID is not None: + if insert_subject_abbreviation(SUBJECT_ID, SUB_ABBR_ID): + total += 1 + index = 0 + index -= 1 + + if len(REL_SECTIONS) > 4: + index = min(len(REL_SECTIONS), 7) + while index > 2: + PATH = "/".join(REL_SECTIONS[0:index]) + "%" + REL_ABBR_ID = select_abbreviation_id(PATH) + + + if REL_ABBR_ID is not None: + if insert_relationship_abbreviation(RELATIONSHIP_ID, REL_ABBR_ID): + total += 1 + index = 0 + index -= 1 + + if len(OBJ_SECTIONS) > 4: + index = min(len(OBJ_SECTIONS), 7) + while index > 3: + PATH = "/".join(OBJ_SECTIONS[0:index]) + "%" + OBJ_ABBR_ID = select_abbreviation_id(PATH) + + if OBJ_ABBR_ID is not None: + if insert_object_abbreviation(OBJECT_ID, OBJ_ABBR_ID): + total += 1 + index = 0 + index -= 1 + + print(total) + + +def parseAbbr_Dataset(): + + DATASET_CSV_READER = csv.DictReader(DATASET_CSV_HANDLER) + DATASET_ORIGIN_ID = selectOrigin(CURS, "dataset.csv") + + if DATASET_ORIGIN_ID is None: + return + + total = 0 + rdf_idx = 0 + for row in DATASET_CSV_READER: + SUBJECT = row["subject"] + RELATIONSHIP = row["relationship"] + OBJECT = row["object"] + + rdf_idx += 1 + + if rdf_idx % 100000 == 0: + print(f"RDF number {rdf_idx}:\n\t{SUBJECT} - {RELATIONSHIP} - {OBJECT}") + + SUBJECT_ID = selectSubjectId(CURS, SUBJECT) + OBJECT_ID = selectObjectId(CURS, OBJECT) + RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP) + + SUB_SECTIONS = SUBJECT.split("/") + REL_SECTIONS = RELATIONSHIP.split("/") + OBJ_SECTIONS = OBJECT.split("/") + + SUB_ABBR_ID = None + REL_ABBR_ID = None + OBJ_ABBR_ID = None + + skip = False + + # guard + if SUBJECT_ID is None: + print(f"No SubjectId for {SUBJECT}") + skip = True + + if OBJECT_ID is None: + print(f"No ObjectId for {OBJECT}") + skip = True + + if RELATIONSHIP_ID is None: + print(f"No RelationshipId for {RELATIONSHIP}") + skip = True + + + if skip: + continue + + if len(SUB_SECTIONS) > 4: + index = min(len(SUB_SECTIONS), 7) + while index > 3: + PATH = "/".join(SUB_SECTIONS[0:index]) + "%" + SUB_ABBR_ID = select_abbreviation_id(PATH) + + if SUB_ABBR_ID is not None: + if insert_subject_abbreviation(SUBJECT_ID, SUB_ABBR_ID): + total += 1 + index = 0 + index -= 1 + + if len(REL_SECTIONS) > 4: + index = min(len(REL_SECTIONS), 7) + while index > 2: + PATH = "/".join(REL_SECTIONS[0:index]) + "%" + REL_ABBR_ID = select_abbreviation_id(PATH) + + + if REL_ABBR_ID is not None: + if insert_relationship_abbreviation(RELATIONSHIP_ID, REL_ABBR_ID): + total += 1 + index = 0 + index -= 1 + + if len(OBJ_SECTIONS) > 4: + index = min(len(OBJ_SECTIONS), 7) + while index > 3: + PATH = "/".join(OBJ_SECTIONS[0:index]) + "%" + OBJ_ABBR_ID = select_abbreviation_id(PATH) + + if OBJ_ABBR_ID is not None: + if insert_object_abbreviation(OBJECT_ID, OBJ_ABBR_ID): + total += 1 + index = 0 + index -= 1 + + print(total) + # MARK: Actual Code # parseMovies() # parseWikiPageId() # parseAbstract() # insertOrigin(CURS) +# parseAbbreviations() # parseRDF_Reverse() # parseRDF_Dataset() +# parseAbbr_Reverse() +parseAbbr_Dataset() CONN.commit() CONN.close() - MOVIES_CSV_HANDLER.close() @@ -341,35 +598,36 @@ PAGEID_CSV_HANDLER.close() SUMMARY_CSV_HANDLER.close() DATASET_CSV_HANDLER.close() REVERSE_CSV_HANDLER.close() +URI_ABBR_CSV_HANDLER.close() """ -The MovieUri: http://dbpedia.org/resource/1%25_(film) has not a MovieId -The MovieUri: http://dbpedia.org/resource/10%25:_What_Makes_a_Hero%3F has not a MovieId -The MovieUri: http://dbpedia.org/resource/100%25_Arabica has not a MovieId -The MovieUri: http://dbpedia.org/resource/100%25_Kadhal has not a MovieId -The MovieUri: http://dbpedia.org/resource/100%25_Love_(2011_film) has not a MovieId -The MovieUri: http://dbpedia.org/resource/100%25_Love_(2012_film) has not a MovieId -The MovieUri: http://dbpedia.org/resource/100%25_Wolf has not a MovieId -The MovieUri: http://dbpedia.org/resource/Who_the_$&%25_Is_Jackson_Pollock%3F has not a MovieId -The MovieUri: http://dbpedia.org/resource/99%25:_The_Occupy_Wall_Street_Collaborative_Film has not a MovieId -The MovieUri: http://dbpedia.org/resource/99_and_44/100%25_Dead has not a MovieId -The MovieUri: http://dbpedia.org/resource/Postcards_from_the_48%25 has not a MovieId -The MovieUri: http://dbpedia.org/resource/Wool_100%25 has not a MovieId +The MovieUri: http://dbpedia.org/resource/1%25_(film) has not a MovieId +The MovieUri: http://dbpedia.org/resource/10%25:_What_Makes_a_Hero%3F has not a MovieId +The MovieUri: http://dbpedia.org/resource/100%25_Arabica has not a MovieId +The MovieUri: http://dbpedia.org/resource/100%25_Kadhal has not a MovieId +The MovieUri: http://dbpedia.org/resource/100%25_Love_(2011_film) has not a MovieId +The MovieUri: http://dbpedia.org/resource/100%25_Love_(2012_film) has not a MovieId +The MovieUri: http://dbpedia.org/resource/100%25_Wolf has not a MovieId +The MovieUri: http://dbpedia.org/resource/Who_the_$&%25_Is_Jackson_Pollock%3F has not a MovieId +The MovieUri: http://dbpedia.org/resource/99%25:_The_Occupy_Wall_Street_Collaborative_Film has not a MovieId +The MovieUri: http://dbpedia.org/resource/99_and_44/100%25_Dead has not a MovieId +The MovieUri: http://dbpedia.org/resource/Postcards_from_the_48%25 has not a MovieId +The MovieUri: http://dbpedia.org/resource/Wool_100%25 has not a MovieId """ """ -The WikiPageId: 10068850 has not a MovieId -The WikiPageId: 55069615 has not a MovieId -The WikiPageId: 49510056 has not a MovieId -The WikiPageId: 4049786 has not a MovieId -The WikiPageId: 55510238 has not a MovieId -The WikiPageId: 31239628 has not a MovieId -The WikiPageId: 34757217 has not a MovieId -The WikiPageId: 64311757 has not a MovieId -The WikiPageId: 8326198 has not a MovieId -The WikiPageId: 42162164 has not a MovieId -The WikiPageId: 18502369 has not a MovieId -The WikiPageId: 58092358 has not a MovieId -The WikiPageId: 40710250 has not a MovieId -""" \ No newline at end of file +The WikiPageId: 10068850 has not a MovieId +The WikiPageId: 55069615 has not a MovieId +The WikiPageId: 49510056 has not a MovieId +The WikiPageId: 4049786 has not a MovieId +The WikiPageId: 55510238 has not a MovieId +The WikiPageId: 31239628 has not a MovieId +The WikiPageId: 34757217 has not a MovieId +The WikiPageId: 64311757 has not a MovieId +The WikiPageId: 8326198 has not a MovieId +The WikiPageId: 42162164 has not a MovieId +The WikiPageId: 18502369 has not a MovieId +The WikiPageId: 58092358 has not a MovieId +The WikiPageId: 40710250 has not a MovieId +"""