import sqlite3 import csv import pandas as pd DB_NAME = "./Assets/Dataset/DatawareHouse/dataset.db" CSV_MAPPER = "./Assets/Dataset/1-hop/uri-abbreviations.csv" # MAPPER_HANDLER = open(CSV_MAPPER,"r",newline='', encoding="utf-8") mapper = pd.read_csv(CSV_MAPPER) mapper_key_list = mapper["uri"].to_list() mapper_value_list = mapper["abbreviation"].to_list() CONN = sqlite3.connect(DB_NAME) CURS = CONN.cursor() def insert_abbreviation(uri, abbreviation) -> bool: QUERY = "INSERT INTO Abbreviations(URI, Abbreviation) VALUES (?,?);" try: CURS.execute(QUERY,[uri, abbreviation]) return True except sqlite3.IntegrityError: return False def inserto_object_abbreviation(object_id, abbreviation_id) -> bool: QUERY = "INSERT INTO Objects_Abbreviations(ObjectID, AbrreviationID) VALUES (?,?);" try: CURS.execute(QUERY,[object_id, abbreviation_id]) return True except sqlite3.IntegrityError: return False def insert_relationship_abbreviation(relationship_id, abbreviation_id) -> bool: QUERY = "INSERT INTO Relationships_Abbreviations(RelationshipID, AbbreviationID) VALUES (?,?);" try: CURS.execute(QUERY,[relationship_id, abbreviation_id]) return True except sqlite3.IntegrityError: return False def insert_subject_abbreviation(subject_id, abbreviation_id) -> bool: QUERY = "INSERT INTO Subjects_Abbreviations(SubjectID, AbbreviationID) VALUES (?,?);" try: CURS.execute(QUERY,[subject_id, abbreviation_id]) return True except sqlite3.IntegrityError: return False def select_abbreviation_id(uri) -> int | None: QUERY = "SELECT AbbreviationID FROM Abbreviations WHERE URI LIKE ?;" CURS.execute(QUERY, [uri]) abbreviation_id = CURS.fetchone() if not abbreviation_id: return None # in this case the real id is the first element of the tuple return abbreviation_id[0] def parser(element: pd.DataFrame): # df.replace(['Boston Celtics', 'Amir Johnson', 'R.J. Hunter'], # ['Omega Warriors', 'Mitcell Johnson', 'Shivang Thomas']) return element.replace(mapper_key_list, mapper_value_list) # # map by csv def populate(): # get subject, relationships, objects # for index, row in df.iterrows(): Subjects = pd.read_sql_query('SELECT * FROM Subjects;', CONN) Objects = pd.read_sql_query('SELECT * FROM Objects;', CONN) Relationships = pd.read_sql_query('SELECT * FROM Relationships;', CONN) # add at each df their abbreviation Subjects["Abbreviation"] = Subjects["SubjectURI"] Objects["Abbreviation"] = Objects["ObjectURI"] Relationships["Abbreviation"] = Relationships["RelationshipURI"] for index, row in Subjects.iterrows(): subject_uri = row["SubjectURI"] subject_id = row["SubjectID"] abbreviation = parser(subject_uri) insert_abbreviation(subject_uri,abbreviation) abbreviation_id = select_abbreviation_id(subject_uri) insert_subject_abbreviation(subject_id,abbreviation_id) for index, row in Objects.iterrows(): object_uri = row["ObjectURI"] object_id = row["ObjectID"] abbreviation = parser(object_uri) insert_abbreviation(object_uri,abbreviation) abbreviation_id = select_abbreviation_id(object_uri) insert_subject_abbreviation(object_id,abbreviation_id) for index, row in Relationships.iterrows(): relationship_uri = row["RelationshipURI"] relationship_id = row["RelationshipID"] abbreviation = parser(relationship_uri) insert_abbreviation(relationship_uri,abbreviation) abbreviation_id = select_abbreviation_id(relationship_uri) insert_subject_abbreviation(relationship_id,abbreviation_id) CONN.commit() CONN.close() # MAPPER_HANDLER.close()