WIP abbrevietion_datawarehouse to creat an abbreviation system

This commit is contained in:
GassiGiuseppe 2025-09-24 16:32:09 +02:00
parent 3e30489f86
commit 47197194d5

View File

@ -0,0 +1,105 @@
import sqlite3
import csv
import pandas as pd
DB_NAME = "./Assets/Dataset/DatawareHouse/dataset.db"
CSV_MAPPER = "./Assets/Dataset/1-hop/uri-abbreviations.csv"
# MAPPER_HANDLER = open(CSV_MAPPER,"r",newline='', encoding="utf-8")
mapper = pd.read_csv(CSV_MAPPER)
mapper_key_list = mapper["uri"].to_list()
mapper_value_list = mapper["abbreviation"].to_list()
CONN = sqlite3.connect(DB_NAME)
CURS = CONN.cursor()
def insert_abbreviation(uri, abbreviation) -> bool:
QUERY = "INSERT INTO Abbreviations(URI, Abbreviation) VALUES (?,?);"
try:
CURS.execute(QUERY,[uri, abbreviation])
return True
except sqlite3.IntegrityError:
return False
def inserto_object_abbreviation(object_id, abbreviation_id) -> bool:
QUERY = "INSERT INTO Objects_Abbreviations(ObjectID, AbrreviationID) VALUES (?,?);"
try:
CURS.execute(QUERY,[object_id, abbreviation_id])
return True
except sqlite3.IntegrityError:
return False
def insert_relationship_abbreviation(relationship_id, abbreviation_id) -> bool:
QUERY = "INSERT INTO Relationships_Abbreviations(RelationshipID, AbbreviationID) VALUES (?,?);"
try:
CURS.execute(QUERY,[relationship_id, abbreviation_id])
return True
except sqlite3.IntegrityError:
return False
def insert_subject_abbreviation(subject_id, abbreviation_id) -> bool:
QUERY = "INSERT INTO Subjects_Abbreviations(SubjectID, AbbreviationID) VALUES (?,?);"
try:
CURS.execute(QUERY,[subject_id, abbreviation_id])
return True
except sqlite3.IntegrityError:
return False
def select_abbreviation_id(uri) -> int | None:
QUERY = "SELECT AbbreviationID FROM Abbreviations WHERE URI LIKE ?;"
CURS.execute(QUERY, [uri])
abbreviation_id = CURS.fetchone()
if not abbreviation_id:
return None
# in this case the real id is the first element of the tuple
return abbreviation_id[0]
def parser(element: pd.DataFrame):
# df.replace(['Boston Celtics', 'Amir Johnson', 'R.J. Hunter'],
# ['Omega Warriors', 'Mitcell Johnson', 'Shivang Thomas'])
return element.replace(mapper_key_list, mapper_value_list)
# # map by csv
def populate():
# get subject, relationships, objects
# for index, row in df.iterrows():
Subjects = pd.read_sql_query('SELECT * FROM Subjects;', CONN)
Objects = pd.read_sql_query('SELECT * FROM Objects;', CONN)
Relationships = pd.read_sql_query('SELECT * FROM Relationships;', CONN)
# add at each df their abbreviation
Subjects["Abbreviation"] = Subjects["SubjectURI"]
Objects["Abbreviation"] = Objects["ObjectURI"]
Relationships["Abbreviation"] = Relationships["RelationshipURI"]
for index, row in Subjects.iterrows():
subject_uri = row["SubjectURI"]
subject_id = row["SubjectID"]
abbreviation = parser(subject_uri)
insert_abbreviation(subject_uri,abbreviation)
abbreviation_id = select_abbreviation_id(subject_uri)
insert_subject_abbreviation(subject_id,abbreviation_id)
for index, row in Objects.iterrows():
object_uri = row["ObjectURI"]
object_id = row["ObjectID"]
abbreviation = parser(object_uri)
insert_abbreviation(object_uri,abbreviation)
abbreviation_id = select_abbreviation_id(object_uri)
insert_subject_abbreviation(object_id,abbreviation_id)
for index, row in Relationships.iterrows():
relationship_uri = row["RelationshipURI"]
relationship_id = row["RelationshipID"]
abbreviation = parser(relationship_uri)
insert_abbreviation(relationship_uri,abbreviation)
abbreviation_id = select_abbreviation_id(relationship_uri)
insert_subject_abbreviation(relationship_id,abbreviation_id)
CONN.commit()
CONN.close()
# MAPPER_HANDLER.close()