From 4315d701091cbee0d69943261a46416a55618417 Mon Sep 17 00:00:00 2001
From: Christian Risi <75698846+CnF-Gris@users.noreply.github.com>
Date: Wed, 24 Sep 2025 19:29:43 +0200
Subject: [PATCH] Merged abbreviation_datawarehouse into datawarehouse

---
 .../abbrevietion_datawarehouse.py             | 105 -----
 Scripts/DatasetMerging/datawarehouse.py       | 410 ++++++++++++++----
 2 files changed, 334 insertions(+), 181 deletions(-)
 delete mode 100644 Scripts/DataCleaning/abbrevietion_datawarehouse.py

diff --git a/Scripts/DataCleaning/abbrevietion_datawarehouse.py b/Scripts/DataCleaning/abbrevietion_datawarehouse.py
deleted file mode 100644
index bc88cd5..0000000
--- a/Scripts/DataCleaning/abbrevietion_datawarehouse.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import sqlite3
-import csv
-import pandas as pd
-
-DB_NAME = "./Assets/Dataset/DatawareHouse/dataset.db"
-CSV_MAPPER = "./Assets/Dataset/1-hop/uri-abbreviations.csv"
-# MAPPER_HANDLER = open(CSV_MAPPER,"r",newline='', encoding="utf-8")
-mapper = pd.read_csv(CSV_MAPPER)
-mapper_key_list = mapper["uri"].to_list()
-mapper_value_list = mapper["abbreviation"].to_list()
-
-CONN = sqlite3.connect(DB_NAME)
-CURS = CONN.cursor()
-
-def insert_abbreviation(uri, abbreviation) -> bool:
-    QUERY = "INSERT INTO Abbreviations(URI, Abbreviation) VALUES (?,?);"
-    try:
-        CURS.execute(QUERY,[uri, abbreviation])
-        return True
-    except sqlite3.IntegrityError:
-        return False
-    
-def inserto_object_abbreviation(object_id, abbreviation_id) -> bool:
-    QUERY = "INSERT INTO Objects_Abbreviations(ObjectID, AbrreviationID) VALUES (?,?);"
-    try:
-        CURS.execute(QUERY,[object_id, abbreviation_id])
-        return True
-    except sqlite3.IntegrityError:
-        return False
-    
-def insert_relationship_abbreviation(relationship_id, abbreviation_id) -> bool:
-    QUERY = "INSERT INTO Relationships_Abbreviations(RelationshipID, AbbreviationID) VALUES (?,?);"
-    try:
-        CURS.execute(QUERY,[relationship_id, abbreviation_id])
-        return True
-    except sqlite3.IntegrityError:
-        return False
-    
-def insert_subject_abbreviation(subject_id, abbreviation_id) -> bool:
-    QUERY = "INSERT INTO Subjects_Abbreviations(SubjectID, AbbreviationID) VALUES (?,?);"
-    try:
-        CURS.execute(QUERY,[subject_id, abbreviation_id])
-        return True
-    except sqlite3.IntegrityError:
-        return False
-    
-def select_abbreviation_id(uri) -> int | None:
-    QUERY = "SELECT AbbreviationID FROM Abbreviations WHERE URI LIKE ?;"
-    CURS.execute(QUERY, [uri])
-    abbreviation_id = CURS.fetchone()
-    if not abbreviation_id:
-        return None
-    
-    # in this case the real id is the first element of the tuple
-    return abbreviation_id[0]
-
-def parser(element: pd.DataFrame):
-    # df.replace(['Boston Celtics', 'Amir Johnson', 'R.J. Hunter'],
-    #            ['Omega Warriors', 'Mitcell Johnson', 'Shivang Thomas'])
-    return element.replace(mapper_key_list, mapper_value_list)
-# # map by csv
-
-
-
-def populate():
-    # get subject, relationships, objects
-    # for index, row in df.iterrows():
-    Subjects = pd.read_sql_query('SELECT * FROM Subjects;', CONN)
-    Objects = pd.read_sql_query('SELECT * FROM Objects;', CONN)
-    Relationships = pd.read_sql_query('SELECT * FROM Relationships;', CONN)
-    # add at each df their abbreviation
-    Subjects["Abbreviation"] = Subjects["SubjectURI"]
-    Objects["Abbreviation"] = Objects["ObjectURI"]
-    Relationships["Abbreviation"] = Relationships["RelationshipURI"]
-
-
-    for index, row in Subjects.iterrows():
-        subject_uri = row["SubjectURI"]
-        subject_id = row["SubjectID"]
-        abbreviation = parser(subject_uri)
-        insert_abbreviation(subject_uri,abbreviation)
-        abbreviation_id = select_abbreviation_id(subject_uri)
-        insert_subject_abbreviation(subject_id,abbreviation_id)
-
-    for index, row in Objects.iterrows():
-        object_uri = row["ObjectURI"]
-        object_id = row["ObjectID"]
-        abbreviation = parser(object_uri)
-        insert_abbreviation(object_uri,abbreviation)
-        abbreviation_id = select_abbreviation_id(object_uri)
-        insert_subject_abbreviation(object_id,abbreviation_id)
-
-    for index, row in Relationships.iterrows():
-        relationship_uri = row["RelationshipURI"]
-        relationship_id = row["RelationshipID"]
-        abbreviation = parser(relationship_uri)
-        insert_abbreviation(relationship_uri,abbreviation)
-        abbreviation_id = select_abbreviation_id(relationship_uri)
-        insert_subject_abbreviation(relationship_id,abbreviation_id)
-
-
-CONN.commit()
-CONN.close()
-
-# MAPPER_HANDLER.close()
\ No newline at end of file
diff --git a/Scripts/DatasetMerging/datawarehouse.py b/Scripts/DatasetMerging/datawarehouse.py
index f28c2e8..7bb5da9 100644
--- a/Scripts/DatasetMerging/datawarehouse.py
+++ b/Scripts/DatasetMerging/datawarehouse.py
@@ -8,7 +8,7 @@ import csv
 #####################################################################
 
 # sometimes you may need to build a new db file, here a little snippet for you
-# sqlite3 ./Assets/Dataset/Tmp/dataset.db < ./Scripts/DataCleaning/SQL_Queries/db_creation.sql  
+# sqlite3 ./Assets/Dataset/Tmp/dataset.db < ./Scripts/DataCleaning/SQL_Queries/db_creation.sql
 
 # --- Global configuration ---
 DB_NAME = "./Assets/Dataset/DatawareHouse/dataset.db"
@@ -17,12 +17,15 @@ PAGEID_CSV = "./Assets/Dataset/1-hop/movie-pageid.csv"
 SUMMARY_CSV = "./Assets/Dataset/1-hop/wikipedia-summary.csv"
 DATASET_CSV = "./Assets/Dataset/1-hop/dataset.csv"
 REVERSE_CSV = "./Assets/Dataset/1-hop/reverse.csv"
+URI_CSV = "./Assets/Dataset/1-hop/uri-abbreviations.csv"
+
+MOVIES_CSV_HANDLER = open(MOVIES_CSV, "r", newline="", encoding="utf-8")
+PAGEID_CSV_HANDLER = open(PAGEID_CSV, "r", newline="", encoding="utf-8")
+SUMMARY_CSV_HANDLER = open(SUMMARY_CSV, "r", newline="", encoding="utf-8")
+DATASET_CSV_HANDLER = open(DATASET_CSV, "r", newline="", encoding="utf-8")
+REVERSE_CSV_HANDLER = open(REVERSE_CSV, "r", newline="", encoding="utf-8")
+URI_ABBR_CSV_HANDLER = open(URI_CSV, "r", newline="", encoding="utf-8")
 
-MOVIES_CSV_HANDLER = open(MOVIES_CSV,"r",newline='', encoding="utf-8")
-PAGEID_CSV_HANDLER = open(PAGEID_CSV,"r",newline='', encoding="utf-8")
-SUMMARY_CSV_HANDLER = open(SUMMARY_CSV,"r",newline='', encoding="utf-8")
-DATASET_CSV_HANDLER = open(DATASET_CSV,"r",newline='', encoding="utf-8")
-REVERSE_CSV_HANDLER = open(REVERSE_CSV,"r",newline='', encoding="utf-8")
 
 CONN = sqlite3.connect(DB_NAME)
 CURS = CONN.cursor()
@@ -30,7 +33,8 @@ CURS = CONN.cursor()
 # MARK: SQL Definitions
 # Insert MovieURI
 
-def insertOrigin(curs : sqlite3.Cursor ) -> bool:
+
+def insertOrigin(curs: sqlite3.Cursor) -> bool:
 
     QUERY = "INSERT INTO  Origins (OriginName) VALUES ('dataset.csv'),('reverse.csv');"
     try:
@@ -38,24 +42,26 @@ def insertOrigin(curs : sqlite3.Cursor ) -> bool:
         return True
     except sqlite3.IntegrityError:
         return False
-    
+
+
 def selectOrigin(curs: sqlite3.Cursor, originName: str) -> int | None:
 
     QUERY = "SELECT OriginID FROM Origins WHERE OriginName = ?;"
-    
+
     curs.execute(QUERY, [originName])
     originId = curs.fetchone()
     if not originId:
         return None
-    
+
     # in this case the real id is the first element of the tuple
     return originId[0]
 
-def insertMovie(curs : sqlite3.Cursor , movieUri: str) -> bool:
+
+def insertMovie(curs: sqlite3.Cursor, movieUri: str) -> bool:
 
     QUERY = "INSERT INTO Movies (MovieURI) VALUES (?);"
     try:
-        curs.execute(QUERY,[movieUri])
+        curs.execute(QUERY, [movieUri])
         return True
     except sqlite3.IntegrityError:
         return False
@@ -64,12 +70,12 @@ def insertMovie(curs : sqlite3.Cursor , movieUri: str) -> bool:
 def selectMovieId(curs: sqlite3.Cursor, movieUri: str) -> int | None:
 
     QUERY = "SELECT MovieID FROM Movies WHERE MovieURI = ?;"
-    
+
     curs.execute(QUERY, [movieUri])
     movieId = curs.fetchone()
     if not movieId:
         return None
-    
+
     # in this case the real id is the first element of the tuple
     return movieId[0]
 
@@ -77,105 +83,164 @@ def selectMovieId(curs: sqlite3.Cursor, movieUri: str) -> int | None:
 def insertWikiPageId(curs: sqlite3.Cursor, movieId: int, pageId: int) -> bool:
     QUERY = "INSERT INTO  WikiPageIDs (MovieID, PageID) VALUES (?,?);"
     try:
-        curs.execute(QUERY,[movieId, pageId])
+        curs.execute(QUERY, [movieId, pageId])
         return True
     except sqlite3.IntegrityError:
         return False
-    
-def selectMovieIdFromWikiPageId(curs: sqlite3.Cursor,pageId: int) -> int | None:
+
+
+def selectMovieIdFromWikiPageId(curs: sqlite3.Cursor, pageId: int) -> int | None:
 
     QUERY = "SELECT MovieID FROM WikiPageIDs WHERE PageID = ?;"
-    
+
     curs.execute(QUERY, [pageId])
     movieId = curs.fetchone()
     if not movieId:
         return None
-    
+
     # in this case the real id is the first element of the tuple
     return movieId[0]
 
+
 def insertWikiAbstract(curs: sqlite3.Cursor, movieId: int, abstract: str) -> bool:
     QUERY = "INSERT INTO  WikipediaAbstracts (MovieID, Abstract) VALUES (?,?);"
     try:
-        curs.execute(QUERY,[movieId, abstract])
+        curs.execute(QUERY, [movieId, abstract])
         return True
     except sqlite3.IntegrityError:
         return False
 
+
 def insertSubject(curs: sqlite3.Cursor, subjectURI: str, originID: int) -> bool:
     QUERY = "INSERT INTO  Subjects (SubjectURI, OriginID) VALUES (?,?);"
     try:
-        curs.execute(QUERY,[subjectURI, originID])
+        curs.execute(QUERY, [subjectURI, originID])
         return True
     except sqlite3.IntegrityError:
         return False
-    
+
+
 def insertRelationship(curs: sqlite3.Cursor, relationshipURI: str) -> bool:
     QUERY = "INSERT INTO  Relationships (RelationshipURI) VALUES (?);"
     try:
-        curs.execute(QUERY,[relationshipURI])
+        curs.execute(QUERY, [relationshipURI])
         return True
     except sqlite3.IntegrityError:
         return False
 
+
 def insertObject(curs: sqlite3.Cursor, objectURI: str, originID: int) -> bool:
     QUERY = "INSERT INTO  objects (ObjectURI, OriginID) VALUES (?,?);"
     try:
-        curs.execute(QUERY,[objectURI, originID])
+        curs.execute(QUERY, [objectURI, originID])
         return True
     except sqlite3.IntegrityError:
         return False
-    
+
+
 def selectSubjectId(curs: sqlite3.Cursor, subjectURI: str) -> int | None:
 
     QUERY = "SELECT SubjectID FROM Subjects WHERE SubjectURI = ?;"
-    
+
     curs.execute(QUERY, [subjectURI])
     subjectId = curs.fetchone()
     if not subjectId:
         return None
-    
+
     # in this case the real id is the first element of the tuple
     return subjectId[0]
 
+
 def selectRelationshipId(curs: sqlite3.Cursor, relationshipURI: str) -> int | None:
 
     QUERY = "SELECT RelationshipID FROM Relationships WHERE RelationshipURI = ?;"
-    
+
     curs.execute(QUERY, [relationshipURI])
     relationshipId = curs.fetchone()
     if not relationshipId:
         return None
-    
+
     # in this case the real id is the first element of the tuple
     return relationshipId[0]
 
+
 def selectObjectId(curs: sqlite3.Cursor, objectURI: str) -> int | None:
 
     QUERY = "SELECT ObjectID FROM Objects WHERE ObjectURI = ?;"
-    
+
     curs.execute(QUERY, [objectURI])
     objectId = curs.fetchone()
     if not objectId:
         return None
-    
+
     # in this case the real id is the first element of the tuple
     return objectId[0]
-    
+
+
 def insertRDF(
-    curs: sqlite3.Cursor, 
-    movieId: int, 
+    curs: sqlite3.Cursor,
+    movieId: int,
     subjectId: int,
     relationshipId: int,
-    objectId: int 
+    objectId: int,
 ) -> bool:
     QUERY = "INSERT INTO  RDFs (MovieID, SubjectID, RelationshipID, ObjectID) VALUES (?,?,?,?);"
     try:
-        curs.execute(QUERY,[movieId,subjectId,relationshipId,objectId])
+        curs.execute(QUERY, [movieId, subjectId, relationshipId, objectId])
         return True
     except sqlite3.IntegrityError:
         return False
-    
+
+# UGLY: correct method to add cursor
+def insert_abbreviation(uri, abbreviation) -> bool:
+    QUERY = "INSERT INTO Abbreviations(URI, Abbreviation) VALUES (?,?);"
+    try:
+        CURS.execute(QUERY, [uri, abbreviation])
+        return True
+    except sqlite3.IntegrityError:
+        return False
+
+# UGLY: correct method to add cursor
+def insert_object_abbreviation(object_id, abbreviation_id) -> bool:
+    QUERY = "INSERT INTO Objects_Abbreviations(ObjectID, AbbreviationID) VALUES (?,?);"
+    try:
+        CURS.execute(QUERY, [object_id, abbreviation_id])
+        return True
+    except sqlite3.IntegrityError:
+        return False
+
+# UGLY: correct method to add cursor
+def insert_relationship_abbreviation(relationship_id, abbreviation_id) -> bool:
+    QUERY = "INSERT INTO Relationships_Abbreviations(RelationshipID, AbbreviationID) VALUES (?,?);"
+    try:
+        CURS.execute(QUERY, [relationship_id, abbreviation_id])
+        return True
+    except sqlite3.IntegrityError:
+        return False
+
+# UGLY: correct method to add cursor
+def insert_subject_abbreviation(subject_id, abbreviation_id) -> bool:
+    QUERY = (
+        "INSERT INTO Subjects_Abbreviations(SubjectID, AbbreviationID) VALUES (?,?);"
+    )
+    try:
+        CURS.execute(QUERY, [subject_id, abbreviation_id])
+        return True
+    except sqlite3.IntegrityError:
+        return False
+
+# UGLY: correct method to add cursor
+def select_abbreviation_id(uri) -> int | None:
+    QUERY = "SELECT AbbreviationID FROM Abbreviations WHERE URI LIKE ?;"
+    CURS.execute(QUERY, [uri])
+    abbreviation_id = CURS.fetchone()
+    if not abbreviation_id:
+        return None
+
+    # in this case the real id is the first element of the tuple
+    return abbreviation_id[0]
+
+
 # MARK: Parsing
 def parseMovies():
 
@@ -203,12 +268,11 @@ def parseWikiPageId():
 def parseAbstract():
     CSV_READER = csv.DictReader(SUMMARY_CSV_HANDLER)
     for row in CSV_READER:
-        
+
         WIKI_PAGE_ID = int(row["subject"])
         ABSTRACT = row["text"]
         MOVIE_ID = selectMovieIdFromWikiPageId(CURS, WIKI_PAGE_ID)
 
-
         if MOVIE_ID is None:
             print(f"The WikiPageId: {WIKI_PAGE_ID} has not a MovieId ")
             continue
@@ -216,10 +280,24 @@ def parseAbstract():
         insertWikiAbstract(CURS, MOVIE_ID, ABSTRACT)
 
 
+def parseAbbreviations():
+    URI_CSV = csv.DictReader(URI_ABBR_CSV_HANDLER)
+    for row in URI_CSV:
+
+        URI = row["uri"]
+        ABBREVIATION = row["abbreviation"]
+
+        insert_abbreviation(URI, ABBREVIATION)
+
+
 def parseRDF_Reverse():
 
     REVERSE_CSV_READER = csv.DictReader(REVERSE_CSV_HANDLER)
-    REVERSE_ORIGIN_ID = selectOrigin(CURS, 'reverse.csv')
+    REVERSE_ORIGIN_ID = selectOrigin(CURS, "reverse.csv")
+
+    if REVERSE_ORIGIN_ID is None:
+        return
+
     total = 0
 
     for row in REVERSE_CSV_READER:
@@ -227,7 +305,7 @@ def parseRDF_Reverse():
         RELATIONSHIP = row["relationship"]
         OBJECT = row["object"]
         print(f"RDF triplets:\n\t{SUBJECT} - {RELATIONSHIP} - {OBJECT}")
-        insertSubject(CURS,SUBJECT,REVERSE_ORIGIN_ID)
+        insertSubject(CURS, SUBJECT, REVERSE_ORIGIN_ID)
         insertRelationship(CURS, RELATIONSHIP)
         insertObject(CURS, OBJECT, REVERSE_ORIGIN_ID)
 
@@ -236,7 +314,6 @@ def parseRDF_Reverse():
         RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP)
         MOVIE_ID = selectMovieId(CURS, OBJECT)
 
-
         skip = False
 
         # guard
@@ -259,17 +336,19 @@ def parseRDF_Reverse():
         if skip:
             continue
 
-        if insertRDF(CURS, MOVIE_ID, SUBJECT_ID, RELATIONSHIP_ID, OBJECT_ID):
+        if insertRDF(CURS, MOVIE_ID, SUBJECT_ID, RELATIONSHIP_ID, OBJECT_ID):  # type: ignore
             total += 1
 
     print(total)
 
 
-
 def parseRDF_Dataset():
 
     DATASET_CSV_READER = csv.DictReader(DATASET_CSV_HANDLER)
-    DATASET_ORIGIN_ID = selectOrigin(CURS, 'dataset.csv')
+    DATASET_ORIGIN_ID = selectOrigin(CURS, "dataset.csv")
+
+    if DATASET_ORIGIN_ID is None:
+        return
 
     total = 0
     rdf_idx = 0
@@ -284,7 +363,7 @@ def parseRDF_Dataset():
         if rdf_idx % 100000 == 0:
             print(f"RDF number {rdf_idx}:\n\t{SUBJECT} - {RELATIONSHIP} - {OBJECT}")
 
-        insertSubject(CURS,SUBJECT,DATASET_ORIGIN_ID)
+        insertSubject(CURS, SUBJECT, DATASET_ORIGIN_ID)
         insertRelationship(CURS, RELATIONSHIP)
         insertObject(CURS, OBJECT, DATASET_ORIGIN_ID)
 
@@ -293,7 +372,6 @@ def parseRDF_Dataset():
         RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP)
         MOVIE_ID = selectMovieId(CURS, SUBJECT)
 
-
         skip = False
 
         # guard
@@ -316,24 +394,203 @@ def parseRDF_Dataset():
         if skip:
             continue
 
-        if insertRDF(CURS, MOVIE_ID, SUBJECT_ID, RELATIONSHIP_ID, OBJECT_ID):
+        if insertRDF(CURS, MOVIE_ID, SUBJECT_ID, RELATIONSHIP_ID, OBJECT_ID):  # type: ignore
             total += 1
 
     print(total)
-        
+
+
+def parseAbbr_Reverse():
+
+    REVERSE_CSV_READER = csv.DictReader(REVERSE_CSV_HANDLER)
+    REVERSE_ORIGIN_ID = selectOrigin(CURS, "reverse.csv")
+
+    if REVERSE_ORIGIN_ID is None:
+        return
+
+    total = 0
+
+    for row in REVERSE_CSV_READER:
+        SUBJECT = row["subject"]
+        RELATIONSHIP = row["relationship"]
+        OBJECT = row["object"]
+
+        SUBJECT_ID = selectSubjectId(CURS, SUBJECT)
+        OBJECT_ID = selectObjectId(CURS, OBJECT)
+        RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP)
+
+        SUB_SECTIONS = SUBJECT.split("/")
+        REL_SECTIONS = RELATIONSHIP.split("/")
+        OBJ_SECTIONS = OBJECT.split("/")
+
+        SUB_ABBR_ID = None
+        REL_ABBR_ID = None
+        OBJ_ABBR_ID = None
+
+        skip = False
+
+        # guard
+        if SUBJECT_ID is None:
+            print(f"No SubjectId for {SUBJECT}")
+            skip = True
+
+        if OBJECT_ID is None:
+            print(f"No ObjectId for {OBJECT}")
+            skip = True
+
+        if RELATIONSHIP_ID is None:
+            print(f"No RelationshipId for {RELATIONSHIP}")
+            skip = True
+
+
+        if skip:
+            continue
+
+        if len(SUB_SECTIONS) > 4:
+            index = min(len(SUB_SECTIONS), 7)
+            while index > 3:
+                PATH = "/".join(SUB_SECTIONS[0:index]) + "%"
+                SUB_ABBR_ID = select_abbreviation_id(PATH)
+
+                if SUB_ABBR_ID is not None:
+                    if insert_subject_abbreviation(SUBJECT_ID, SUB_ABBR_ID):
+                        total += 1
+                    index = 0
+                index -= 1
+
+        if len(REL_SECTIONS) > 4:
+            index = min(len(REL_SECTIONS), 7)
+            while index > 2:
+                PATH = "/".join(REL_SECTIONS[0:index]) + "%"
+                REL_ABBR_ID = select_abbreviation_id(PATH)
+
+
+                if REL_ABBR_ID is not None:
+                    if insert_relationship_abbreviation(RELATIONSHIP_ID, REL_ABBR_ID):
+                        total += 1
+                    index = 0
+                index -= 1
+
+        if len(OBJ_SECTIONS) > 4:
+            index = min(len(OBJ_SECTIONS), 7)
+            while index > 3:
+                PATH = "/".join(OBJ_SECTIONS[0:index]) + "%"
+                OBJ_ABBR_ID = select_abbreviation_id(PATH)
+
+                if OBJ_ABBR_ID is not None:
+                    if insert_object_abbreviation(OBJECT_ID, OBJ_ABBR_ID):
+                        total += 1
+                    index = 0
+                index -= 1
+
+    print(total)
+
+
+def parseAbbr_Dataset():
+
+    DATASET_CSV_READER = csv.DictReader(DATASET_CSV_HANDLER)
+    DATASET_ORIGIN_ID = selectOrigin(CURS, "dataset.csv")
+
+    if DATASET_ORIGIN_ID is None:
+        return
+
+    total = 0
+    rdf_idx = 0
+    for row in DATASET_CSV_READER:
+        SUBJECT = row["subject"]
+        RELATIONSHIP = row["relationship"]
+        OBJECT = row["object"]
+
+        rdf_idx += 1
+
+        if rdf_idx % 100000 == 0:
+            print(f"RDF number {rdf_idx}:\n\t{SUBJECT} - {RELATIONSHIP} - {OBJECT}")
+
+        SUBJECT_ID = selectSubjectId(CURS, SUBJECT)
+        OBJECT_ID = selectObjectId(CURS, OBJECT)
+        RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP)
+
+        SUB_SECTIONS = SUBJECT.split("/")
+        REL_SECTIONS = RELATIONSHIP.split("/")
+        OBJ_SECTIONS = OBJECT.split("/")
+
+        SUB_ABBR_ID = None
+        REL_ABBR_ID = None
+        OBJ_ABBR_ID = None
+
+        skip = False
+
+        # guard
+        if SUBJECT_ID is None:
+            print(f"No SubjectId for {SUBJECT}")
+            skip = True
+
+        if OBJECT_ID is None:
+            print(f"No ObjectId for {OBJECT}")
+            skip = True
+
+        if RELATIONSHIP_ID is None:
+            print(f"No RelationshipId for {RELATIONSHIP}")
+            skip = True
+
+
+        if skip:
+            continue
+
+        if len(SUB_SECTIONS) > 4:
+            index = min(len(SUB_SECTIONS), 7)
+            while index > 3:
+                PATH = "/".join(SUB_SECTIONS[0:index]) + "%"
+                SUB_ABBR_ID = select_abbreviation_id(PATH)
+
+                if SUB_ABBR_ID is not None:
+                    if insert_subject_abbreviation(SUBJECT_ID, SUB_ABBR_ID):
+                        total += 1
+                    index = 0
+                index -= 1
+
+        if len(REL_SECTIONS) > 4:
+            index = min(len(REL_SECTIONS), 7)
+            while index > 2:
+                PATH = "/".join(REL_SECTIONS[0:index]) + "%"
+                REL_ABBR_ID = select_abbreviation_id(PATH)
+
+
+                if REL_ABBR_ID is not None:
+                    if insert_relationship_abbreviation(RELATIONSHIP_ID, REL_ABBR_ID):
+                        total += 1
+                    index = 0
+                index -= 1
+
+        if len(OBJ_SECTIONS) > 4:
+            index = min(len(OBJ_SECTIONS), 7)
+            while index > 3:
+                PATH = "/".join(OBJ_SECTIONS[0:index]) + "%"
+                OBJ_ABBR_ID = select_abbreviation_id(PATH)
+
+                if OBJ_ABBR_ID is not None:
+                    if insert_object_abbreviation(OBJECT_ID, OBJ_ABBR_ID):
+                        total += 1
+                    index = 0
+                index -= 1
+
+    print(total)
+
 
 # MARK: Actual Code
 # parseMovies()
 # parseWikiPageId()
 # parseAbstract()
 # insertOrigin(CURS)
+# parseAbbreviations()
 # parseRDF_Reverse()
 # parseRDF_Dataset()
+# parseAbbr_Reverse()
+parseAbbr_Dataset()
 
 
 CONN.commit()
 CONN.close()
-    
 
 
 MOVIES_CSV_HANDLER.close()
@@ -341,35 +598,36 @@ PAGEID_CSV_HANDLER.close()
 SUMMARY_CSV_HANDLER.close()
 DATASET_CSV_HANDLER.close()
 REVERSE_CSV_HANDLER.close()
+URI_ABBR_CSV_HANDLER.close()
 
 
 """
-The MovieUri: http://dbpedia.org/resource/1%25_(film) has not a MovieId 
-The MovieUri: http://dbpedia.org/resource/10%25:_What_Makes_a_Hero%3F has not a MovieId 
-The MovieUri: http://dbpedia.org/resource/100%25_Arabica has not a MovieId 
-The MovieUri: http://dbpedia.org/resource/100%25_Kadhal has not a MovieId 
-The MovieUri: http://dbpedia.org/resource/100%25_Love_(2011_film) has not a MovieId 
-The MovieUri: http://dbpedia.org/resource/100%25_Love_(2012_film) has not a MovieId 
-The MovieUri: http://dbpedia.org/resource/100%25_Wolf has not a MovieId 
-The MovieUri: http://dbpedia.org/resource/Who_the_$&%25_Is_Jackson_Pollock%3F has not a MovieId 
-The MovieUri: http://dbpedia.org/resource/99%25:_The_Occupy_Wall_Street_Collaborative_Film has not a MovieId 
-The MovieUri: http://dbpedia.org/resource/99_and_44/100%25_Dead has not a MovieId 
-The MovieUri: http://dbpedia.org/resource/Postcards_from_the_48%25 has not a MovieId 
-The MovieUri: http://dbpedia.org/resource/Wool_100%25 has not a MovieId 
+The MovieUri: http://dbpedia.org/resource/1%25_(film) has not a MovieId
+The MovieUri: http://dbpedia.org/resource/10%25:_What_Makes_a_Hero%3F has not a MovieId
+The MovieUri: http://dbpedia.org/resource/100%25_Arabica has not a MovieId
+The MovieUri: http://dbpedia.org/resource/100%25_Kadhal has not a MovieId
+The MovieUri: http://dbpedia.org/resource/100%25_Love_(2011_film) has not a MovieId
+The MovieUri: http://dbpedia.org/resource/100%25_Love_(2012_film) has not a MovieId
+The MovieUri: http://dbpedia.org/resource/100%25_Wolf has not a MovieId
+The MovieUri: http://dbpedia.org/resource/Who_the_$&%25_Is_Jackson_Pollock%3F has not a MovieId
+The MovieUri: http://dbpedia.org/resource/99%25:_The_Occupy_Wall_Street_Collaborative_Film has not a MovieId
+The MovieUri: http://dbpedia.org/resource/99_and_44/100%25_Dead has not a MovieId
+The MovieUri: http://dbpedia.org/resource/Postcards_from_the_48%25 has not a MovieId
+The MovieUri: http://dbpedia.org/resource/Wool_100%25 has not a MovieId
 """
 
 """
-The WikiPageId: 10068850 has not a MovieId 
-The WikiPageId: 55069615 has not a MovieId 
-The WikiPageId: 49510056 has not a MovieId 
-The WikiPageId: 4049786 has not a MovieId 
-The WikiPageId: 55510238 has not a MovieId 
-The WikiPageId: 31239628 has not a MovieId 
-The WikiPageId: 34757217 has not a MovieId 
-The WikiPageId: 64311757 has not a MovieId 
-The WikiPageId: 8326198 has not a MovieId 
-The WikiPageId: 42162164 has not a MovieId 
-The WikiPageId: 18502369 has not a MovieId 
-The WikiPageId: 58092358 has not a MovieId 
-The WikiPageId: 40710250 has not a MovieId 
-"""
\ No newline at end of file
+The WikiPageId: 10068850 has not a MovieId
+The WikiPageId: 55069615 has not a MovieId
+The WikiPageId: 49510056 has not a MovieId
+The WikiPageId: 4049786 has not a MovieId
+The WikiPageId: 55510238 has not a MovieId
+The WikiPageId: 31239628 has not a MovieId
+The WikiPageId: 34757217 has not a MovieId
+The WikiPageId: 64311757 has not a MovieId
+The WikiPageId: 8326198 has not a MovieId
+The WikiPageId: 42162164 has not a MovieId
+The WikiPageId: 18502369 has not a MovieId
+The WikiPageId: 58092358 has not a MovieId
+The WikiPageId: 40710250 has not a MovieId
+"""