Merge branch 'dev.etl' of https://repositories.communitynotfound.work/PoliBa-DeepLearning/NanoSocrates into dev.etl

Fixed creation query to be unique even with movieID in RDFs
CSV support added to path_splitter_tree
2025-09-25 18:33:51 +02:00 · 2025-09-25 17:58:09 +02:00 · 2025-09-25 17:57:46 +02:00 · 2025-09-25 17:57:45 +02:00 · 2025-09-25 16:28:24 +02:00 · 2025-09-25 12:37:52 +02:00
13 changed files with 1684 additions and 152 deletions
--- a/Assets/Dataset/1-hop/uri-abbreviations.csv
+++ b/Assets/Dataset/1-hop/uri-abbreviations.csv
--- a/Assets/Dataset/DatawareHouse/dataset.db
+++ b/Assets/Dataset/DatawareHouse/dataset.db
--- a/Scripts/DataBaseQueries/dataset.sql
+++ b/Scripts/DataBaseQueries/dataset.sql
@@ -0,0 +1,30 @@
 -- To pass to Pandas
 SELECT *
 FROM RDFs
 INNER JOIN Subjects USING (SubjectID)
 INNER JOIN Relationships USING (RelationshipID)
 INNER JOIN Objects USING (ObjectID);
 -- To pass to Pandas for abstracts
 SELECT *
 FROM RDFs
 INNER JOIN WikipediaAbstracts USING (MovieID);
 -- To pass to Pandas for abbreviations
 SELECT *
 FROM Abbreviations;
 -- More complex to have clean dataset
 -- More complex to have clean dataset
 SELECT MovieID, GROUP_CONCAT('<SOT>' || '<SUB>' || SubjectURI || '<REL>' || RelationshipURI || '<OBJ>' || ObjectURI || '<EOT>', '') as RDF_String, Abstract
 FROM RDFs
 INNER JOIN SubjectsCountInRDFs USING (SubjectID)
 INNER JOIN RelationshipsCountInRDFs USING(RelationshipID)
 INNER JOIN ObjectsCountInRDFs USING (ObjectID)
 INNER JOIN ParsedSubjects USING (SubjectID)
 INNER JOIN ParsedRelationships USING (RelationshipID)
 INNER JOIN ParsedObjects USING (ObjectID)
 INNER JOIN WikipediaAbstracts USING (MovieID)
    -- insert WHERE here
 -- WHERE SubjectID = 134626
 GROUP BY MovieID;
--- a/Scripts/DataBaseQueries/db_creation.sql
+++ b/Scripts/DataBaseQueries/db_creation.sql
@@ -0,0 +1,174 @@
 CREATE TABLE IF NOT EXISTS Movies (
    MovieID INTEGER PRIMARY KEY AUTOINCREMENT,
    MovieURI TEXT UNIQUE NOT NULL
 );
 CREATE TABLE IF NOT EXISTS WikiPageIDs (
    MovieID INTEGER PRIMARY KEY,
    PageID INTEGER UNIQUE NOT NULL,
    FOREIGN KEY(MovieID) REFERENCES Movies(MovieID)
 );
 CREATE TABLE IF NOT EXISTS WikipediaAbstracts (
    MovieID INTEGER PRIMARY KEY,
    Abstract TEXT NOT NULL,
    FOREIGN KEY(MovieID) REFERENCES Movies(MovieID)
 );
 CREATE TABLE IF NOT EXISTS Origins (
    OriginID INTEGER PRIMARY KEY AUTOINCREMENT,
    OriginName TEXT UNIQUE NOT NULL
 );
 CREATE TABLE IF NOT EXISTS Subjects (
    SubjectID INTEGER PRIMARY KEY AUTOINCREMENT,
    SubjectURI TEXT UNIQUE NOT NULL,
    OriginID BIGINT NOT NULL,
    FOREIGN KEY(OriginID) REFERENCES Origins(OriginID)
 );
 CREATE TABLE IF NOT EXISTS Relationships (
    RelationshipID INTEGER PRIMARY KEY AUTOINCREMENT,
    RelationshipURI TEXT UNIQUE NOT NULL
 );
 CREATE TABLE IF NOT EXISTS Objects (
    ObjectID INTEGER PRIMARY KEY AUTOINCREMENT,
    ObjectURI TEXT UNIQUE NOT NULL,
    OriginID BIGINT NOT NULL,
    FOREIGN KEY(OriginID) REFERENCES Origins(OriginID)
 );
 CREATE TABLE IF NOT EXISTS RDFs (
    RDF_ID INTEGER PRIMARY KEY AUTOINCREMENT,
    MovieID INTEGER NOT NULL,
    SubjectID INTEGER NOT NULL,
    RelationshipID INTEGER NOT NULL,
    ObjectID INTEGER NOT NULL,
    UNIQUE(MovieID, SubjectID, RelationshipID, ObjectID),
    FOREIGN KEY(MovieID) REFERENCES Movies(MovieID),
    FOREIGN KEY(SubjectID) REFERENCES Subjects(SubjectID),
    FOREIGN KEY(RelationshipID) REFERENCES Relationships(RelationshipID),
    FOREIGN KEY(ObjectID) REFERENCES Objects(ObjectID)
 );
 CREATE INDEX IF NOT EXISTS idx_rdf_movie_id ON RDFs(MovieID);
 CREATE INDEX IF NOT EXISTS idx_rdf_subject_id ON RDFs(SubjectID);
 CREATE INDEX IF NOT EXISTS idx_rdf_relationship_id ON RDFs(RelationshipID);
 CREATE INDEX IF NOT EXISTS idx_rdf_object_id ON RDFs(ObjectID);
 CREATE TABLE IF NOT EXISTS Abbreviations (
    AbbreviationID INTEGER PRIMARY KEY AUTOINCREMENT,
    URI TEXT UNIQUE NOT NULL,
    Abbreviation TEXT UNIQUE NOT NULL
 );
 CREATE TABLE IF NOT EXISTS Subjects_Abbreviations (
    SubjectID INTEGER NOT NULL,
    AbbreviationID INTEGER NOT NULL,
    PRIMARY KEY(SubjectID, AbbreviationID),
    FOREIGN KEY(SubjectID) REFERENCES Subjects(SubjectID),
    FOREIGN KEY(AbbreviationID) REFERENCES Abbreviations(AbbreviationID)
 );
 CREATE TABLE IF NOT EXISTS Relationships_Abbreviations (
    RelationshipID INTEGER NOT NULL,
    AbbreviationID INTEGER NOT NULL,
    PRIMARY KEY(RelationshipID, AbbreviationID),
    FOREIGN KEY(RelationshipID) REFERENCES Relationships(RelationshipID),
    FOREIGN KEY(AbbreviationID) REFERENCES Abbreviations(AbbreviationID)
 );
 CREATE TABLE IF NOT EXISTS Objects_Abbreviations (
    ObjectID INTEGER NOT NULL,
    AbbreviationID INTEGER NOT NULL,
    PRIMARY KEY(ObjectID, AbbreviationID),
    FOREIGN KEY(ObjectID) REFERENCES Objects(ObjectID),
    FOREIGN KEY(AbbreviationID) REFERENCES Abbreviations(AbbreviationID)
 );
 CREATE INDEX IF NOT EXISTS idx_sub_abbr_sub_id ON Subjects_Abbreviations(SubjectID);
 CREATE INDEX IF NOT EXISTS idx_sub_abbr_abbr_id ON Subjects_Abbreviations(AbbreviationID);
 CREATE INDEX IF NOT EXISTS idx_rel_abbr_rel_id ON Relationships_Abbreviations(RelationshipID);
 CREATE INDEX IF NOT EXISTS idx_rel_abbr_abbr_id ON Relationships_Abbreviations(AbbreviationID);
 CREATE INDEX IF NOT EXISTS idx_obj_abbr_obj_id ON Objects_Abbreviations(ObjectID);
 CREATE INDEX IF NOT EXISTS idx_obj_abbr_abbr_id ON Objects_Abbreviations(AbbreviationID);
 -- Views
 -- Subjects
 CREATE VIEW IF NOT EXISTS ParsedSubjects
 AS
 SELECT
 	SubjectID,
 	CASE WHEN Abbreviation IS NULL
 		THEN SubjectURI
 		ELSE Abbreviation || ':' || replace(SubjectURI, URI, '') END
 		AS SubjectURI
 FROM Subjects
 	LEFT JOIN Subjects_Abbreviations USING (SubjectID)
 	LEFT JOIN Abbreviations USING (AbbreviationID);
 -- Relationships
 CREATE VIEW IF NOT EXISTS ParsedRelationships
 AS
 SELECT
 	RelationshipID,
 	CASE WHEN Abbreviation IS NULL
 		THEN RelationshipURI
 		ELSE Abbreviation || ':' || replace(RelationshipURI, URI, '') END
 		AS RelationshipURI
 FROM Relationships
 	LEFT JOIN Relationships_Abbreviations USING (RelationshipID)
 	LEFT JOIN Abbreviations USING (AbbreviationID);
 -- Objects
 CREATE VIEW IF NOT EXISTS ParsedObjects
 AS
 SELECT
 	ObjectID,
 	CASE WHEN Abbreviation IS NULL
 		THEN ObjectURI
 		ELSE Abbreviation || ':' || replace(ObjectURI, URI, '') END
 		AS ObjectURI
 FROM Objects
 	LEFT JOIN Objects_Abbreviations USING (ObjectID)
 	LEFT JOIN Abbreviations USING (AbbreviationID);
 -- Subject Count
 CREATE VIEW IF NOT EXISTS SubjectsCountInRDFs
 AS
 SELECT SubjectID, count(SubjectID) as Sub_Count
 FROM RDFs
 GROUP BY SubjectID;
 -- Relationship Count
 CREATE VIEW IF NOT EXISTS RelationshipsCountInRDFs
 AS
 SELECT RelationshipID, count(RelationshipID) as Rel_Count
 FROM RDFs
 GROUP BY RelationshipID;
 -- Object Count
 CREATE VIEW IF NOT EXISTS ObjectsCountInRDFs
 AS
 SELECT ObjectID, count(ObjectID) as Obj_Count
 FROM RDFs
 GROUP BY ObjectID;
--- a/Scripts/DatasetMerging/SQL_Queries/query.sql
+++ b/Scripts/DatasetMerging/SQL_Queries/query.sql
@@ -33,3 +33,23 @@ SELECT ObjectID FROM Objects WHERE ObjectURI = ?;
 INSERT INTO  RDFs (MovieID, SubjectID, RelationshipID, ObjectID) VALUES (?,?,?,?);
 -- Prefixes
 INSERT INTO Abbreviations(URI, Abbreviation) VALUES (?,?);
 INSERT INTO Objects_Abbreviations(ObjectID, AbbreviationID) VALUES (?,?);
 INSERT INTO Relationships_Abbreviations(RelationshipID, AbbreviationID) VALUES (?,?);
 INSERT INTO Subjects_Abbreviations(SubjectID, AbbreviationID) VALUES (?,?);
 -- Please be sure it is a URI before running this query
 --  and take at least until the domain and the first path part
 SELECT AbbreviationID FROM Abbreviations WHERE URI LIKE ?;
 -- Query to retrieve data
 SELECT MovieID, GROUP_CONCAT('<SOT>' || '<SUB>' || SubjectURI || '<REL>' || RelationshipURI || '<OBJ>' || ObjectURI || '<EOT>', '') as RDF_String, Abstract
 FROM RDFs
 INNER JOIN ParsedSubjects USING (SubjectID)
 INNER JOIN ParsedRelationships USING (RelationshipID)
 INNER JOIN ParsedObjects USING (ObjectID)
 INNER JOIN WikipediaAbstracts USING (MovieID)
    -- insert WHERE here
 GROUP BY MovieID;
--- a/Scripts/DataCleaning/clean_relationship.ipynb
+++ b/Scripts/DataCleaning/clean_relationship.ipynb
@@ -0,0 +1,186 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "b9081b7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# This file deletes in the pipeline the unwanted relationship by different rules\n",
    "import pandas as pd\n",
    "import sqlite3\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "CONN = sqlite3.connect('../../Assets/Dataset/Tmp/dataset2.db')\n",
    "\n",
    "def get_RDF() -> pd.DataFrame:\n",
    "    \"\"\"\n",
    "    QUERY = \"SELECT * FROM RDFs \" \\\n",
    "    \"INNER JOIN Subjects USING (SubjectID) \" \\\n",
    "    \"INNER JOIN Relationships USING (RelationshipID) \" \\\n",
    "    \"INNER JOIN Objects USING (ObjectID);\"\n",
    "    RDF = pd.read_sql_query(QUERY, CONN)\n",
    "    RDF = RDF[[\"SubjectURI\", \"RelationshipURI\", \"ObjectURI\"]]\n",
    "    RDF = RDF.dropna()\n",
    "    \"\"\"\n",
    "    Subjects = pd.read_sql_query('SELECT * FROM Subjects;', CONN)\n",
    "    Objects = pd.read_sql_query('SELECT * FROM Objects;', CONN)\n",
    "    Relationships = pd.read_sql_query('SELECT * FROM Relationships;', CONN)\n",
    "    RDF = pd.read_sql_query('SELECT * FROM RDFs;', CONN)\n",
    "\n",
    "    # drop '' values \n",
    "    Subjects = Subjects.replace('', np.nan)# .dropna()\n",
    "    Relationships = Relationships.replace('', np.nan)# .dropna()\n",
    "    Objects = Objects.replace('', np.nan)# .dropna()\n",
    "\n",
    "    # join RDF with its components\n",
    "    RDF = RDF.merge(Subjects, left_on=\"SubjectID\", right_on=\"SubjectID\")\n",
    "    RDF = RDF.merge(Objects, left_on=\"ObjectID\", right_on=\"ObjectID\")\n",
    "    RDF = RDF.merge(Relationships, left_on=\"RelationshipID\", right_on=\"RelationshipID\")\n",
    "    RDF = RDF[[\"SubjectURI\", \"RelationshipURI\", \"ObjectURI\", \"MovieID\"]]\n",
    "    return RDF\n",
    "\n",
    "\n",
    "#def delete_relationship_by_uri(RDF: pd.DataFrame, )\n",
    "\n",
    "def delete_relationship_by_uri(RDF: pd.DataFrame, uri: str) -> pd.DataFrame:\n",
    "    return RDF[RDF[\"RelationshipURI\"]!= uri]\n",
    "\n",
    "\n",
    "\n",
    "RDF = get_RDF()\n",
    "# RDF = RDF.dropna()\n",
    "# print(RDF)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "644690bb",
   "metadata": {},
   "outputs": [],
   "source": [
    "def filter_by_frequence_relationship_uri(RDF: pd.DataFrame, count_treshold) -> pd.DataFrame:\n",
    "    counts = RDF[\"RelationshipURI\"].value_counts() \n",
    "    RDF[\"RelationshipFreq\"] = RDF[\"RelationshipURI\"].map(counts)\n",
    "    RDF = RDF[RDF[\"RelationshipFreq\"] >= count_treshold]\n",
    "    # counts is a series as key: relationship, value: count\n",
    "    # counts = counts[counts > count_treshold]\n",
    "    # relationships = counts.index\n",
    "    # RDF = RDF[RDF[\"RelationshipURI\"].isin(relationships)]\n",
    "    # RDF = RDF.groupby(\"RelationshipURI\").filter(lambda x: len(x) >= count_treshold)\n",
    "    return RDF\n",
    "\n",
    "RDF = filter_by_frequence_relationship_uri(RDF, 1)\n",
    "# print(new_RDF)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "34525be6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                                 SubjectURI  \\\n",
      "0             http://dbpedia.org/resource/Nights_of_Cabiria   \n",
      "1         http://dbpedia.org/resource/California_Science...   \n",
      "2                 http://dbpedia.org/resource/China_Captain   \n",
      "3         http://dbpedia.org/resource/Caravan_of_Courage...   \n",
      "4                http://dbpedia.org/resource/WHIH_Newsfront   \n",
      "...                                                     ...   \n",
      "12725500   http://dbpedia.org/resource/I_Will_Follow_(film)   \n",
      "12725501   http://dbpedia.org/resource/I_Will_Follow_(film)   \n",
      "12725502  http://dbpedia.org/resource/I_Witnessed_Genoci...   \n",
      "12725503  http://dbpedia.org/resource/I_Woke_Up_Early_th...   \n",
      "12725504           http://dbpedia.org/resource/I_Won't_Play   \n",
      "\n",
      "                                       RelationshipURI  \\\n",
      "0          http://www.w3.org/2002/07/owl#differentFrom   \n",
      "1          http://www.w3.org/2002/07/owl#differentFrom   \n",
      "2          http://www.w3.org/2002/07/owl#differentFrom   \n",
      "3          http://www.w3.org/2002/07/owl#differentFrom   \n",
      "4         http://www.w3.org/2000/01/rdf-schema#seeAlso   \n",
      "...                                                ...   \n",
      "12725500          http://dbpedia.org/ontology/producer   \n",
      "12725501          http://dbpedia.org/ontology/producer   \n",
      "12725502          http://dbpedia.org/ontology/producer   \n",
      "12725503          http://dbpedia.org/ontology/producer   \n",
      "12725504          http://dbpedia.org/ontology/producer   \n",
      "\n",
      "                                                  ObjectURI  MovieID  \\\n",
      "0                       http://dbpedia.org/resource/Cabiria       26   \n",
      "1         http://dbpedia.org/resource/California_Academy...      185   \n",
      "2                 http://dbpedia.org/resource/Captain_China      614   \n",
      "3         http://dbpedia.org/resource/Caravan_of_Courage...      740   \n",
      "4         http://dbpedia.org/resource/Captain_America:_C...      594   \n",
      "...                                                     ...      ...   \n",
      "12725500           http://dbpedia.org/resource/Ava_DuVernay   145854   \n",
      "12725501           http://dbpedia.org/resource/Molly_Mayeux   145854   \n",
      "12725502        http://dbpedia.org/resource/Headlines_Today   145861   \n",
      "12725503             http://dbpedia.org/resource/Billy_Zane   145862   \n",
      "12725504    http://dbpedia.org/resource/Gordon_Hollingshead   145864   \n",
      "\n",
      "          RelationshipFreq  MovieFreq  \n",
      "0                     2132        216  \n",
      "1                     2132        264  \n",
      "2                     2132         66  \n",
      "3                     2132        131  \n",
      "4                     1653        133  \n",
      "...                    ...        ...  \n",
      "12725500             80077         95  \n",
      "12725501             80077         95  \n",
      "12725502             80077         41  \n",
      "12725503             80077         98  \n",
      "12725504             80077         91  \n",
      "\n",
      "[12725505 rows x 6 columns]\n"
     ]
    }
   ],
   "source": [
    "def filter_by_frequence_movie_id(RDF: pd.DataFrame, min_treshold, max_treshold) -> pd.DataFrame:\n",
    "    counts = RDF[\"MovieID\"].value_counts() \n",
    "    RDF[\"MovieFreq\"] = RDF[\"MovieID\"].map(counts)\n",
    "    RDF = RDF[RDF[\"MovieFreq\"] >= min_treshold]\n",
    "    RDF = RDF[RDF[\"MovieFreq\"] < max_treshold]\n",
    "    # counts is a series as key: relationship, value: count\n",
    "    # counts = counts[counts > count_treshold]\n",
    "    # relationships = counts.index\n",
    "    # RDF = RDF[RDF[\"RelationshipURI\"].isin(relationships)]\n",
    "    # RDF = RDF.groupby(\"RelationshipURI\").filter(lambda x: len(x) >= count_treshold)\n",
    "    return RDF\n",
    "\n",
    "RDF = filter_by_frequence_movie_id(RDF, 1, 1500)\n",
    "print(RDF)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "deep_learning",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/Scripts/DataCleaning/dbpedia-uri.py
+++ b/Scripts/DataCleaning/dbpedia-uri.py
@@ -0,0 +1,77 @@
 import argparse
 import sys
 class ProgramArgs:
    def __init__(self, file: str, output: str, treshold: int):
        self.file = file
        self.output = output
        self.treshold = treshold
 def get_args(args: list[str]) -> ProgramArgs:
    PARSER = argparse.ArgumentParser()
    PARSER.add_argument("--input-file", "-i", required=True, type=str)
    PARSER.add_argument("--output-file", "-o", required=True, type=str)
    PARSER.add_argument("--treshold", "-t", type=int, default=1)
    parsed_args, _ = PARSER.parse_known_args(args)
    # print(parsed_args.input_file)
    return ProgramArgs(parsed_args.input_file,parsed_args.output_file, parsed_args.treshold)  # type ignore
 def print_dbpedia(file: str, out: str):
    FILE = open(file, "r", encoding="utf-8")
    OUT = open(out, mode="w", encoding="utf-8")
    DOMAIN_PART = "dbpedia"
    already_parsed : set[str] = set()
    for row in FILE:
        sections = row.split("/")
        sections = list(filter(lambda item: item != "", sections))
        # print(sections)
        if len(sections) < 3:
            continue
        URI = "/".join(sections[1:3])
        URI = "//".join([sections[0], URI])
        if  URI in already_parsed:
            continue
        DOMAIN = sections[1]
        SUBDOMAINS = DOMAIN.split(".")
        TYPE = sections[2][0]
        if DOMAIN_PART not in SUBDOMAINS:
            continue
        already_parsed.add(URI)
        SUB_ID = SUBDOMAINS[0]
        if len(SUB_ID) > 3:
            SUB_ID = SUB_ID[:3]
        OUT.write(f"\"{URI}/\", \"{SUB_ID}-db{TYPE}\"\n")
    FILE.close()
    OUT.close()
 if __name__ == "__main__":
    ARGS = get_args(sys.argv)
    # ARGS = get_debug_args()
    print_dbpedia(ARGS.file, ARGS.output)
--- a/Scripts/DataCleaning/path_splitter_tree.py
+++ b/Scripts/DataCleaning/path_splitter_tree.py
@@ -6,8 +6,16 @@ from typing import Self
 class ProgramArgs:
-    def __init__(self, file: str, output: str, treshold: int):
+    def __init__(self, file: str, csv_uri_header: str, output: str, treshold: int):
        """
        Args:
            file (str): 
            csv_header (str): The name of the column of the csv file from which the program will get the URIs
            output (str): 
            treshold (int): 
        """        
        self.file = file
        self.csv_uri_header = csv_uri_header
        self.output = output
        self.treshold = treshold
@@ -33,11 +41,15 @@ class Node:
        KEY = child[0]
        if not self.children.get(KEY):
            # if the key has no value, it means we are traversing this branch for the first time
            # create another node for the key
            self.children[KEY] = Node(KEY, 0)
        # take the node for the key
        CHILD = self.children[KEY]
        self.quantity += 1
        # if the child list to enter has only one element, which is KEY, no more node will be created
        if len(child) == 1:
            return
@@ -53,27 +65,32 @@ def get_args(args: list[str]) -> ProgramArgs:
    PARSER = argparse.ArgumentParser()
    PARSER.add_argument("--input-file", "-i", required=True, type=str)
    PARSER.add_argument("--header-name", "-c", required=True, type=str)                       # c stands for column
    PARSER.add_argument("--output-file", "-o", required=True, type=str)
    PARSER.add_argument("--treshold", "-t", type=int, default=1)
    parsed_args, _ = PARSER.parse_known_args(args)
    # print(parsed_args.input_file)
-    return ProgramArgs(parsed_args.input_file,parsed_args.output_file, parsed_args.treshold)  # type ignore
+    return ProgramArgs(parsed_args.input_file, parsed_args.header_name ,parsed_args.output_file, parsed_args.treshold)  # type ignore
 def get_debug_args() -> ProgramArgs:
-
+    # -i ./Assets/Dataset/1-hop/movies.csv  -c subject -o Assets/Dataset/Tmp/prova.csv -t 1
-    FILE = "./Assets/Dataset/Tmp/reverse-rel.txt"
+    FILE = "./Assets/Dataset/1-hop/movies.csv"
    CSV_HEADER = "subject"
    OUTPUT = "./Assets/Dataset/Tmp/prova.csv"
    TRESHOLD = 1
    return ProgramArgs(
        FILE,
        CSV_HEADER,
        OUTPUT,
        TRESHOLD
    )
-def tree_like(file: str, out: str):
+def tree_like(file: str, csv_uri_header:str, out: str):
    INDENTATION = "    "
@@ -84,9 +101,12 @@ def tree_like(file: str, out: str):
    FILE = open(file, "r", encoding="utf-8")
-    for row in FILE:
+    # TODO: Change here so it takes single URI from a CSV file
    # It is needed the header-name
    for row in csv.DictReader(FILE):
-        sections = row.split("/")
+        uri_element = row[csv_uri_header]
        sections = uri_element.split("/")
        sections = list(filter(lambda item: item != "", sections))
        # print(sections)
@@ -115,7 +135,9 @@ def tree_like(file: str, out: str):
        INDENT: str = INDENTATION * DEPTH
-        if NODE.quantity < ARGS.treshold:
+        # Leaf node have quantity 0, so if i want them to appear the threshold have to be 0
        # if NODE.quantity < ARGS.treshold:
        if ARGS.treshold > NODE.quantity:
            continue
        OUT.write(f"{INDENT}- {NODE}\n")
@@ -133,7 +155,8 @@ def tree_like(file: str, out: str):
    OUT.close()
 if __name__ == "__main__":
    ARGS = get_args(sys.argv)
    # ARGS = get_debug_args()
-    tree_like(ARGS.file, ARGS.output)
+    tree_like(ARGS.file,ARGS.csv_uri_header, ARGS.output)
--- a/Scripts/DatasetMerging/SQL_Queries/db_creation.sql
+++ b/Scripts/DatasetMerging/SQL_Queries/db_creation.sql
@@ -1,65 +0,0 @@
 CREATE TABLE IF NOT EXISTS Movies (
    MovieID INTEGER PRIMARY KEY AUTOINCREMENT,
    MovieURI TEXT UNIQUE NOT NULL
 );
 CREATE TABLE IF NOT EXISTS WikiPageIDs (
    MovieID INTEGER PRIMARY KEY,
    PageID INTEGER UNIQUE NOT NULL,
    FOREIGN KEY(MovieID) REFERENCES Movies(MovieID)
 );
 CREATE TABLE IF NOT EXISTS WikipediaAbstracts (
    MovieID INTEGER PRIMARY KEY,
    Abstract TEXT NOT NULL,
    FOREIGN KEY(MovieID) REFERENCES Movies(MovieID)
 );
 CREATE TABLE IF NOT EXISTS Origins (
    OriginID INTEGER PRIMARY KEY AUTOINCREMENT,
    OriginName TEXT UNIQUE NOT NULL
 );
 CREATE TABLE IF NOT EXISTS Subjects (
    SubjectID INTEGER PRIMARY KEY AUTOINCREMENT,
    SubjectURI TEXT UNIQUE NOT NULL,
    OriginID BIGINT NOT NULL,
    FOREIGN KEY(OriginID) REFERENCES Origins(OriginID)
 );
 CREATE TABLE IF NOT EXISTS Relationships (
    RelationshipID INTEGER PRIMARY KEY AUTOINCREMENT,
    RelationshipURI TEXT UNIQUE NOT NULL
 );
 CREATE TABLE IF NOT EXISTS Objects (
    ObjectID INTEGER PRIMARY KEY AUTOINCREMENT,
    ObjectURI TEXT UNIQUE NOT NULL,
    OriginID BIGINT NOT NULL,
    FOREIGN KEY(OriginID) REFERENCES Origins(OriginID)
 );
 CREATE TABLE IF NOT EXISTS RDFs (
    RDF_ID INTEGER PRIMARY KEY AUTOINCREMENT,
    MovieID INTEGER NOT NULL,
    SubjectID INTEGER NOT NULL,
    RelationshipID INTEGER NOT NULL,
    ObjectID INTEGER NOT NULL,
    UNIQUE(SubjectID, RelationshipID, ObjectID),
    FOREIGN KEY(MovieID) REFERENCES Movies(MovieID),
    FOREIGN KEY(SubjectID) REFERENCES Subjects(SubjectID),
    FOREIGN KEY(RelationshipID) REFERENCES Relationships(RelationshipID),
    FOREIGN KEY(ObjectID) REFERENCES Objects(ObjectID)
 );
 CREATE INDEX IF NOT EXISTS idx_rdf_movie_id ON RDFs(MovieID);
 CREATE INDEX IF NOT EXISTS idx_rdf_subject_id ON RDFs(SubjectID);
 CREATE INDEX IF NOT EXISTS idx_rdf_relationship_id ON RDFs(RelationshipID);
 CREATE INDEX IF NOT EXISTS idx_rdf_object_id ON RDFs(ObjectID);
--- a/Scripts/DatasetMerging/datawarehouse.py
+++ b/Scripts/DatasetMerging/datawarehouse.py
@@ -8,7 +8,7 @@ import csv
 #####################################################################
 # sometimes you may need to build a new db file, here a little snippet for you
-# sqlite3 ./Assets/Dataset/Tmp/dataset.db < ./Scripts/DataCleaning/SQL_Queries/db_creation.sql  
+# sqlite3 ./Assets/Dataset/Tmp/dataset.db < ./Scripts/DataCleaning/SQL_Queries/db_creation.sql
 # --- Global configuration ---
 DB_NAME = "./Assets/Dataset/DatawareHouse/dataset.db"
@@ -17,12 +17,15 @@ PAGEID_CSV = "./Assets/Dataset/1-hop/movie-pageid.csv"
 SUMMARY_CSV = "./Assets/Dataset/1-hop/wikipedia-summary.csv"
 DATASET_CSV = "./Assets/Dataset/1-hop/dataset.csv"
 REVERSE_CSV = "./Assets/Dataset/1-hop/reverse.csv"
 URI_CSV = "./Assets/Dataset/1-hop/uri-abbreviations.csv"
 MOVIES_CSV_HANDLER = open(MOVIES_CSV, "r", newline="", encoding="utf-8")
 PAGEID_CSV_HANDLER = open(PAGEID_CSV, "r", newline="", encoding="utf-8")
 SUMMARY_CSV_HANDLER = open(SUMMARY_CSV, "r", newline="", encoding="utf-8")
 DATASET_CSV_HANDLER = open(DATASET_CSV, "r", newline="", encoding="utf-8")
 REVERSE_CSV_HANDLER = open(REVERSE_CSV, "r", newline="", encoding="utf-8")
 URI_ABBR_CSV_HANDLER = open(URI_CSV, "r", newline="", encoding="utf-8")
 MOVIES_CSV_HANDLER = open(MOVIES_CSV,"r",newline='', encoding="utf-8")
 PAGEID_CSV_HANDLER = open(PAGEID_CSV,"r",newline='', encoding="utf-8")
 SUMMARY_CSV_HANDLER = open(SUMMARY_CSV,"r",newline='', encoding="utf-8")
 DATASET_CSV_HANDLER = open(DATASET_CSV,"r",newline='', encoding="utf-8")
 REVERSE_CSV_HANDLER = open(REVERSE_CSV,"r",newline='', encoding="utf-8")
 CONN = sqlite3.connect(DB_NAME)
 CURS = CONN.cursor()
@@ -30,7 +33,8 @@ CURS = CONN.cursor()
 # MARK: SQL Definitions
 # Insert MovieURI
-def insertOrigin(curs : sqlite3.Cursor ) -> bool:
+
 def insertOrigin(curs: sqlite3.Cursor) -> bool:
    QUERY = "INSERT INTO  Origins (OriginName) VALUES ('dataset.csv'),('reverse.csv');"
    try:
@@ -38,24 +42,26 @@ def insertOrigin(curs : sqlite3.Cursor ) -> bool:
        return True
    except sqlite3.IntegrityError:
        return False
-    
+
 def selectOrigin(curs: sqlite3.Cursor, originName: str) -> int | None:
    QUERY = "SELECT OriginID FROM Origins WHERE OriginName = ?;"
-    
+
    curs.execute(QUERY, [originName])
    originId = curs.fetchone()
    if not originId:
        return None
-    
+
    # in this case the real id is the first element of the tuple
    return originId[0]
-def insertMovie(curs : sqlite3.Cursor , movieUri: str) -> bool:
+
 def insertMovie(curs: sqlite3.Cursor, movieUri: str) -> bool:
    QUERY = "INSERT INTO Movies (MovieURI) VALUES (?);"
    try:
-        curs.execute(QUERY,[movieUri])
+        curs.execute(QUERY, [movieUri])
        return True
    except sqlite3.IntegrityError:
        return False
@@ -64,12 +70,12 @@ def insertMovie(curs : sqlite3.Cursor , movieUri: str) -> bool:
 def selectMovieId(curs: sqlite3.Cursor, movieUri: str) -> int | None:
    QUERY = "SELECT MovieID FROM Movies WHERE MovieURI = ?;"
-    
+
    curs.execute(QUERY, [movieUri])
    movieId = curs.fetchone()
    if not movieId:
        return None
-    
+
    # in this case the real id is the first element of the tuple
    return movieId[0]
@@ -77,105 +83,164 @@ def selectMovieId(curs: sqlite3.Cursor, movieUri: str) -> int | None:
 def insertWikiPageId(curs: sqlite3.Cursor, movieId: int, pageId: int) -> bool:
    QUERY = "INSERT INTO  WikiPageIDs (MovieID, PageID) VALUES (?,?);"
    try:
-        curs.execute(QUERY,[movieId, pageId])
+        curs.execute(QUERY, [movieId, pageId])
        return True
    except sqlite3.IntegrityError:
        return False
-    
+
-def selectMovieIdFromWikiPageId(curs: sqlite3.Cursor,pageId: int) -> int | None:
+
 def selectMovieIdFromWikiPageId(curs: sqlite3.Cursor, pageId: int) -> int | None:
    QUERY = "SELECT MovieID FROM WikiPageIDs WHERE PageID = ?;"
-    
+
    curs.execute(QUERY, [pageId])
    movieId = curs.fetchone()
    if not movieId:
        return None
-    
+
    # in this case the real id is the first element of the tuple
    return movieId[0]
 def insertWikiAbstract(curs: sqlite3.Cursor, movieId: int, abstract: str) -> bool:
    QUERY = "INSERT INTO  WikipediaAbstracts (MovieID, Abstract) VALUES (?,?);"
    try:
-        curs.execute(QUERY,[movieId, abstract])
+        curs.execute(QUERY, [movieId, abstract])
        return True
    except sqlite3.IntegrityError:
        return False
 def insertSubject(curs: sqlite3.Cursor, subjectURI: str, originID: int) -> bool:
    QUERY = "INSERT INTO  Subjects (SubjectURI, OriginID) VALUES (?,?);"
    try:
-        curs.execute(QUERY,[subjectURI, originID])
+        curs.execute(QUERY, [subjectURI, originID])
        return True
    except sqlite3.IntegrityError:
        return False
-    
+
 def insertRelationship(curs: sqlite3.Cursor, relationshipURI: str) -> bool:
    QUERY = "INSERT INTO  Relationships (RelationshipURI) VALUES (?);"
    try:
-        curs.execute(QUERY,[relationshipURI])
+        curs.execute(QUERY, [relationshipURI])
        return True
    except sqlite3.IntegrityError:
        return False
 def insertObject(curs: sqlite3.Cursor, objectURI: str, originID: int) -> bool:
    QUERY = "INSERT INTO  objects (ObjectURI, OriginID) VALUES (?,?);"
    try:
-        curs.execute(QUERY,[objectURI, originID])
+        curs.execute(QUERY, [objectURI, originID])
        return True
    except sqlite3.IntegrityError:
        return False
-    
+
 def selectSubjectId(curs: sqlite3.Cursor, subjectURI: str) -> int | None:
    QUERY = "SELECT SubjectID FROM Subjects WHERE SubjectURI = ?;"
-    
+
    curs.execute(QUERY, [subjectURI])
    subjectId = curs.fetchone()
    if not subjectId:
        return None
-    
+
    # in this case the real id is the first element of the tuple
    return subjectId[0]
 def selectRelationshipId(curs: sqlite3.Cursor, relationshipURI: str) -> int | None:
    QUERY = "SELECT RelationshipID FROM Relationships WHERE RelationshipURI = ?;"
-    
+
    curs.execute(QUERY, [relationshipURI])
    relationshipId = curs.fetchone()
    if not relationshipId:
        return None
-    
+
    # in this case the real id is the first element of the tuple
    return relationshipId[0]
 def selectObjectId(curs: sqlite3.Cursor, objectURI: str) -> int | None:
    QUERY = "SELECT ObjectID FROM Objects WHERE ObjectURI = ?;"
-    
+
    curs.execute(QUERY, [objectURI])
    objectId = curs.fetchone()
    if not objectId:
        return None
-    
+
    # in this case the real id is the first element of the tuple
    return objectId[0]
-    
+
 def insertRDF(
-    curs: sqlite3.Cursor, 
+    curs: sqlite3.Cursor,
-    movieId: int, 
+    movieId: int,
    subjectId: int,
    relationshipId: int,
-    objectId: int 
+    objectId: int,
 ) -> bool:
    QUERY = "INSERT INTO  RDFs (MovieID, SubjectID, RelationshipID, ObjectID) VALUES (?,?,?,?);"
    try:
-        curs.execute(QUERY,[movieId,subjectId,relationshipId,objectId])
+        curs.execute(QUERY, [movieId, subjectId, relationshipId, objectId])
        return True
    except sqlite3.IntegrityError:
        return False
-    
+
 # UGLY: correct method to add cursor
 def insert_abbreviation(uri, abbreviation) -> bool:
    QUERY = "INSERT INTO Abbreviations(URI, Abbreviation) VALUES (?,?);"
    try:
        CURS.execute(QUERY, [uri, abbreviation])
        return True
    except sqlite3.IntegrityError:
        return False
 # UGLY: correct method to add cursor
 def insert_object_abbreviation(object_id, abbreviation_id) -> bool:
    QUERY = "INSERT INTO Objects_Abbreviations(ObjectID, AbbreviationID) VALUES (?,?);"
    try:
        CURS.execute(QUERY, [object_id, abbreviation_id])
        return True
    except sqlite3.IntegrityError:
        return False
 # UGLY: correct method to add cursor
 def insert_relationship_abbreviation(relationship_id, abbreviation_id) -> bool:
    QUERY = "INSERT INTO Relationships_Abbreviations(RelationshipID, AbbreviationID) VALUES (?,?);"
    try:
        CURS.execute(QUERY, [relationship_id, abbreviation_id])
        return True
    except sqlite3.IntegrityError:
        return False
 # UGLY: correct method to add cursor
 def insert_subject_abbreviation(subject_id, abbreviation_id) -> bool:
    QUERY = (
        "INSERT INTO Subjects_Abbreviations(SubjectID, AbbreviationID) VALUES (?,?);"
    )
    try:
        CURS.execute(QUERY, [subject_id, abbreviation_id])
        return True
    except sqlite3.IntegrityError:
        return False
 # UGLY: correct method to add cursor
 def select_abbreviation_id(uri) -> int | None:
    QUERY = "SELECT AbbreviationID FROM Abbreviations WHERE URI LIKE ?;"
    CURS.execute(QUERY, [uri])
    abbreviation_id = CURS.fetchone()
    if not abbreviation_id:
        return None
    # in this case the real id is the first element of the tuple
    return abbreviation_id[0]
 # MARK: Parsing
 def parseMovies():
@@ -203,12 +268,11 @@ def parseWikiPageId():
 def parseAbstract():
    CSV_READER = csv.DictReader(SUMMARY_CSV_HANDLER)
    for row in CSV_READER:
-        
+
        WIKI_PAGE_ID = int(row["subject"])
        ABSTRACT = row["text"]
        MOVIE_ID = selectMovieIdFromWikiPageId(CURS, WIKI_PAGE_ID)
        if MOVIE_ID is None:
            print(f"The WikiPageId: {WIKI_PAGE_ID} has not a MovieId ")
            continue
@@ -216,10 +280,24 @@ def parseAbstract():
        insertWikiAbstract(CURS, MOVIE_ID, ABSTRACT)
 def parseAbbreviations():
    URI_CSV = csv.DictReader(URI_ABBR_CSV_HANDLER)
    for row in URI_CSV:
        URI = row["uri"]
        ABBREVIATION = row["abbreviation"]
        insert_abbreviation(URI, ABBREVIATION)
 def parseRDF_Reverse():
    REVERSE_CSV_READER = csv.DictReader(REVERSE_CSV_HANDLER)
-    REVERSE_ORIGIN_ID = selectOrigin(CURS, 'reverse.csv')
+    REVERSE_ORIGIN_ID = selectOrigin(CURS, "reverse.csv")
    if REVERSE_ORIGIN_ID is None:
        return
    total = 0
    for row in REVERSE_CSV_READER:
@@ -227,7 +305,7 @@ def parseRDF_Reverse():
        RELATIONSHIP = row["relationship"]
        OBJECT = row["object"]
        print(f"RDF triplets:\n\t{SUBJECT} - {RELATIONSHIP} - {OBJECT}")
-        insertSubject(CURS,SUBJECT,REVERSE_ORIGIN_ID)
+        insertSubject(CURS, SUBJECT, REVERSE_ORIGIN_ID)
        insertRelationship(CURS, RELATIONSHIP)
        insertObject(CURS, OBJECT, REVERSE_ORIGIN_ID)
@@ -236,7 +314,6 @@ def parseRDF_Reverse():
        RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP)
        MOVIE_ID = selectMovieId(CURS, OBJECT)
        skip = False
        # guard
@@ -259,17 +336,19 @@ def parseRDF_Reverse():
        if skip:
            continue
-        if insertRDF(CURS, MOVIE_ID, SUBJECT_ID, RELATIONSHIP_ID, OBJECT_ID):
+        if insertRDF(CURS, MOVIE_ID, SUBJECT_ID, RELATIONSHIP_ID, OBJECT_ID):  # type: ignore
            total += 1
    print(total)
 def parseRDF_Dataset():
    DATASET_CSV_READER = csv.DictReader(DATASET_CSV_HANDLER)
-    DATASET_ORIGIN_ID = selectOrigin(CURS, 'dataset.csv')
+    DATASET_ORIGIN_ID = selectOrigin(CURS, "dataset.csv")
    if DATASET_ORIGIN_ID is None:
        return
    total = 0
    rdf_idx = 0
@@ -284,7 +363,7 @@ def parseRDF_Dataset():
        if rdf_idx % 100000 == 0:
            print(f"RDF number {rdf_idx}:\n\t{SUBJECT} - {RELATIONSHIP} - {OBJECT}")
-        insertSubject(CURS,SUBJECT,DATASET_ORIGIN_ID)
+        insertSubject(CURS, SUBJECT, DATASET_ORIGIN_ID)
        insertRelationship(CURS, RELATIONSHIP)
        insertObject(CURS, OBJECT, DATASET_ORIGIN_ID)
@@ -293,7 +372,6 @@ def parseRDF_Dataset():
        RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP)
        MOVIE_ID = selectMovieId(CURS, SUBJECT)
        skip = False
        # guard
@@ -316,24 +394,203 @@ def parseRDF_Dataset():
        if skip:
            continue
-        if insertRDF(CURS, MOVIE_ID, SUBJECT_ID, RELATIONSHIP_ID, OBJECT_ID):
+        if insertRDF(CURS, MOVIE_ID, SUBJECT_ID, RELATIONSHIP_ID, OBJECT_ID):  # type: ignore
            total += 1
    print(total)
-        
+
 def parseAbbr_Reverse():
    REVERSE_CSV_READER = csv.DictReader(REVERSE_CSV_HANDLER)
    REVERSE_ORIGIN_ID = selectOrigin(CURS, "reverse.csv")
    if REVERSE_ORIGIN_ID is None:
        return
    total = 0
    for row in REVERSE_CSV_READER:
        SUBJECT = row["subject"]
        RELATIONSHIP = row["relationship"]
        OBJECT = row["object"]
        SUBJECT_ID = selectSubjectId(CURS, SUBJECT)
        OBJECT_ID = selectObjectId(CURS, OBJECT)
        RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP)
        SUB_SECTIONS = SUBJECT.split("/")
        REL_SECTIONS = RELATIONSHIP.split("/")
        OBJ_SECTIONS = OBJECT.split("/")
        SUB_ABBR_ID = None
        REL_ABBR_ID = None
        OBJ_ABBR_ID = None
        skip = False
        # guard
        if SUBJECT_ID is None:
            print(f"No SubjectId for {SUBJECT}")
            skip = True
        if OBJECT_ID is None:
            print(f"No ObjectId for {OBJECT}")
            skip = True
        if RELATIONSHIP_ID is None:
            print(f"No RelationshipId for {RELATIONSHIP}")
            skip = True
        if skip:
            continue
        if len(SUB_SECTIONS) > 4:
            index = min(len(SUB_SECTIONS), 7)
            while index > 3:
                PATH = "/".join(SUB_SECTIONS[0:index]) + "%"
                SUB_ABBR_ID = select_abbreviation_id(PATH)
                if SUB_ABBR_ID is not None:
                    if insert_subject_abbreviation(SUBJECT_ID, SUB_ABBR_ID):
                        total += 1
                    index = 0
                index -= 1
        if len(REL_SECTIONS) > 4:
            index = min(len(REL_SECTIONS), 7)
            while index > 2:
                PATH = "/".join(REL_SECTIONS[0:index]) + "%"
                REL_ABBR_ID = select_abbreviation_id(PATH)
                if REL_ABBR_ID is not None:
                    if insert_relationship_abbreviation(RELATIONSHIP_ID, REL_ABBR_ID):
                        total += 1
                    index = 0
                index -= 1
        if len(OBJ_SECTIONS) > 4:
            index = min(len(OBJ_SECTIONS), 7)
            while index > 3:
                PATH = "/".join(OBJ_SECTIONS[0:index]) + "%"
                OBJ_ABBR_ID = select_abbreviation_id(PATH)
                if OBJ_ABBR_ID is not None:
                    if insert_object_abbreviation(OBJECT_ID, OBJ_ABBR_ID):
                        total += 1
                    index = 0
                index -= 1
    print(total)
 def parseAbbr_Dataset():
    DATASET_CSV_READER = csv.DictReader(DATASET_CSV_HANDLER)
    DATASET_ORIGIN_ID = selectOrigin(CURS, "dataset.csv")
    if DATASET_ORIGIN_ID is None:
        return
    total = 0
    rdf_idx = 0
    for row in DATASET_CSV_READER:
        SUBJECT = row["subject"]
        RELATIONSHIP = row["relationship"]
        OBJECT = row["object"]
        rdf_idx += 1
        if rdf_idx % 100000 == 0:
            print(f"RDF number {rdf_idx}:\n\t{SUBJECT} - {RELATIONSHIP} - {OBJECT}")
        SUBJECT_ID = selectSubjectId(CURS, SUBJECT)
        OBJECT_ID = selectObjectId(CURS, OBJECT)
        RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP)
        SUB_SECTIONS = SUBJECT.split("/")
        REL_SECTIONS = RELATIONSHIP.split("/")
        OBJ_SECTIONS = OBJECT.split("/")
        SUB_ABBR_ID = None
        REL_ABBR_ID = None
        OBJ_ABBR_ID = None
        skip = False
        # guard
        if SUBJECT_ID is None:
            print(f"No SubjectId for {SUBJECT}")
            skip = True
        if OBJECT_ID is None:
            print(f"No ObjectId for {OBJECT}")
            skip = True
        if RELATIONSHIP_ID is None:
            print(f"No RelationshipId for {RELATIONSHIP}")
            skip = True
        if skip:
            continue
        if len(SUB_SECTIONS) > 4:
            index = min(len(SUB_SECTIONS), 7)
            while index > 3:
                PATH = "/".join(SUB_SECTIONS[0:index]) + "%"
                SUB_ABBR_ID = select_abbreviation_id(PATH)
                if SUB_ABBR_ID is not None:
                    if insert_subject_abbreviation(SUBJECT_ID, SUB_ABBR_ID):
                        total += 1
                    index = 0
                index -= 1
        if len(REL_SECTIONS) > 4:
            index = min(len(REL_SECTIONS), 7)
            while index > 2:
                PATH = "/".join(REL_SECTIONS[0:index]) + "%"
                REL_ABBR_ID = select_abbreviation_id(PATH)
                if REL_ABBR_ID is not None:
                    if insert_relationship_abbreviation(RELATIONSHIP_ID, REL_ABBR_ID):
                        total += 1
                    index = 0
                index -= 1
        if len(OBJ_SECTIONS) > 4:
            index = min(len(OBJ_SECTIONS), 7)
            while index > 3:
                PATH = "/".join(OBJ_SECTIONS[0:index]) + "%"
                OBJ_ABBR_ID = select_abbreviation_id(PATH)
                if OBJ_ABBR_ID is not None:
                    if insert_object_abbreviation(OBJECT_ID, OBJ_ABBR_ID):
                        total += 1
                    index = 0
                index -= 1
    print(total)
 # MARK: Actual Code
 # parseMovies()
 # parseWikiPageId()
 # parseAbstract()
 # insertOrigin(CURS)
 # parseAbbreviations()
 # parseRDF_Reverse()
 # parseRDF_Dataset()
 # parseAbbr_Reverse()
 parseAbbr_Dataset()
 CONN.commit()
 CONN.close()
 MOVIES_CSV_HANDLER.close()
@@ -341,35 +598,36 @@ PAGEID_CSV_HANDLER.close()
 SUMMARY_CSV_HANDLER.close()
 DATASET_CSV_HANDLER.close()
 REVERSE_CSV_HANDLER.close()
 URI_ABBR_CSV_HANDLER.close()
 """
-The MovieUri: http://dbpedia.org/resource/1%25_(film) has not a MovieId 
+The MovieUri: http://dbpedia.org/resource/1%25_(film) has not a MovieId
-The MovieUri: http://dbpedia.org/resource/10%25:_What_Makes_a_Hero%3F has not a MovieId 
+The MovieUri: http://dbpedia.org/resource/10%25:_What_Makes_a_Hero%3F has not a MovieId
-The MovieUri: http://dbpedia.org/resource/100%25_Arabica has not a MovieId 
+The MovieUri: http://dbpedia.org/resource/100%25_Arabica has not a MovieId
-The MovieUri: http://dbpedia.org/resource/100%25_Kadhal has not a MovieId 
+The MovieUri: http://dbpedia.org/resource/100%25_Kadhal has not a MovieId
-The MovieUri: http://dbpedia.org/resource/100%25_Love_(2011_film) has not a MovieId 
+The MovieUri: http://dbpedia.org/resource/100%25_Love_(2011_film) has not a MovieId
-The MovieUri: http://dbpedia.org/resource/100%25_Love_(2012_film) has not a MovieId 
+The MovieUri: http://dbpedia.org/resource/100%25_Love_(2012_film) has not a MovieId
-The MovieUri: http://dbpedia.org/resource/100%25_Wolf has not a MovieId 
+The MovieUri: http://dbpedia.org/resource/100%25_Wolf has not a MovieId
-The MovieUri: http://dbpedia.org/resource/Who_the_$&%25_Is_Jackson_Pollock%3F has not a MovieId 
+The MovieUri: http://dbpedia.org/resource/Who_the_$&%25_Is_Jackson_Pollock%3F has not a MovieId
-The MovieUri: http://dbpedia.org/resource/99%25:_The_Occupy_Wall_Street_Collaborative_Film has not a MovieId 
+The MovieUri: http://dbpedia.org/resource/99%25:_The_Occupy_Wall_Street_Collaborative_Film has not a MovieId
-The MovieUri: http://dbpedia.org/resource/99_and_44/100%25_Dead has not a MovieId 
+The MovieUri: http://dbpedia.org/resource/99_and_44/100%25_Dead has not a MovieId
-The MovieUri: http://dbpedia.org/resource/Postcards_from_the_48%25 has not a MovieId 
+The MovieUri: http://dbpedia.org/resource/Postcards_from_the_48%25 has not a MovieId
-The MovieUri: http://dbpedia.org/resource/Wool_100%25 has not a MovieId 
+The MovieUri: http://dbpedia.org/resource/Wool_100%25 has not a MovieId
 """
 """
-The WikiPageId: 10068850 has not a MovieId 
+The WikiPageId: 10068850 has not a MovieId
-The WikiPageId: 55069615 has not a MovieId 
+The WikiPageId: 55069615 has not a MovieId
-The WikiPageId: 49510056 has not a MovieId 
+The WikiPageId: 49510056 has not a MovieId
-The WikiPageId: 4049786 has not a MovieId 
+The WikiPageId: 4049786 has not a MovieId
-The WikiPageId: 55510238 has not a MovieId 
+The WikiPageId: 55510238 has not a MovieId
-The WikiPageId: 31239628 has not a MovieId 
+The WikiPageId: 31239628 has not a MovieId
-The WikiPageId: 34757217 has not a MovieId 
+The WikiPageId: 34757217 has not a MovieId
-The WikiPageId: 64311757 has not a MovieId 
+The WikiPageId: 64311757 has not a MovieId
-The WikiPageId: 8326198 has not a MovieId 
+The WikiPageId: 8326198 has not a MovieId
-The WikiPageId: 42162164 has not a MovieId 
+The WikiPageId: 42162164 has not a MovieId
-The WikiPageId: 18502369 has not a MovieId 
+The WikiPageId: 18502369 has not a MovieId
-The WikiPageId: 58092358 has not a MovieId 
+The WikiPageId: 58092358 has not a MovieId
-The WikiPageId: 40710250 has not a MovieId 
+The WikiPageId: 40710250 has not a MovieId
-"""
+"""
--- a/Scripts/Libs/CleaningPipeline/.gitkeep
+++ b/Scripts/Libs/CleaningPipeline/.gitkeep
--- a/Scripts/Libs/Utils/.gitkeep
+++ b/Scripts/Libs/Utils/.gitkeep
--- a/Scripts/UML/CleaningPipeline/classes.excalidraw.json
+++ b/Scripts/UML/CleaningPipeline/classes.excalidraw.json
@@ -0,0 +1,826 @@
 {
  "type": "excalidraw",
  "version": 2,
  "source": "https://marketplace.visualstudio.com/items?itemName=pomdtr.excalidraw-editor",
  "elements": [
    {
      "type": "line",
      "version": 4622,
      "versionNonce": 1623045672,
      "isDeleted": false,
      "id": "twu_PiAvEuQ4l1YYtZLET",
      "fillStyle": "solid",
      "strokeWidth": 1,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "angle": 0,
      "x": 289.8504963515835,
      "y": 91.87474806402287,
      "strokeColor": "#000000",
      "backgroundColor": "#a5d8ff",
      "width": 77.09201683999922,
      "height": 99.49948667804088,
      "seed": 1975340120,
      "groupIds": [
        "9PT4BXPfQ6UoCaB-T-h9A",
        "dp_TZJyYdyPIH1hOkAPlb"
      ],
      "strokeSharpness": "round",
      "boundElementIds": [],
      "startBinding": null,
      "endBinding": null,
      "lastCommittedPoint": null,
      "startArrowhead": null,
      "endArrowhead": null,
      "points": [
        [
          0,
          0
        ],
        [
          0.2542098813493443,
          75.20117273657175
        ],
        [
          0.011896425679918422,
          83.76249969444815
        ],
        [
          3.970409367559332,
          87.46174320643391
        ],
        [
          17.75573317066317,
          90.59250103325854
        ],
        [
          41.05683533152865,
          91.56737225214069
        ],
        [
          63.319497586673116,
          90.01084754868091
        ],
        [
          75.14781395923075,
          86.28844687220405
        ],
        [
          76.81603792670788,
          83.15042405259751
        ],
        [
          77.05033394391478,
          76.25776215104557
        ],
        [
          76.86643881413028,
          6.3089586511537865
        ],
        [
          76.45188016352971,
          -0.2999144698665015
        ],
        [
          71.50179495549581,
          -3.9936571317850627
        ],
        [
          61.077971898861186,
          -6.132877429442784
        ],
        [
          37.32348754161154,
          -7.932114425900202
        ],
        [
          18.278415656797975,
          -6.859225353587373
        ],
        [
          3.2995959613238286,
          -3.2201165291205287
        ],
        [
          -0.04168289608444441,
          -0.045185660461322996
        ],
        [
          0,
          0
        ]
      ],
      "index": "a1",
      "frameId": null,
      "roundness": {
        "type": 2
      },
      "boundElements": [],
      "updated": 1758646548051,
      "link": null,
      "locked": false
    },
    {
      "type": "line",
      "version": 2327,
      "versionNonce": 1593094440,
      "isDeleted": false,
      "id": "hmJk4dH9VpOsfkrCTkhvh",
      "fillStyle": "solid",
      "strokeWidth": 1,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "angle": 0,
      "x": 290.3744257898585,
      "y": 149.00103172175278,
      "strokeColor": "#000000",
      "backgroundColor": "#a5d8ff",
      "width": 77.17198221193564,
      "height": 8.562348957853036,
      "seed": 637665624,
      "groupIds": [
        "9PT4BXPfQ6UoCaB-T-h9A",
        "dp_TZJyYdyPIH1hOkAPlb"
      ],
      "strokeSharpness": "round",
      "boundElementIds": [],
      "startBinding": null,
      "endBinding": null,
      "lastCommittedPoint": null,
      "startArrowhead": null,
      "endArrowhead": null,
      "points": [
        [
          0,
          0
        ],
        [
          2.033150371639873,
          3.413095389435587
        ],
        [
          10.801287372573954,
          6.276651055277943
        ],
        [
          22.468666942209353,
          8.010803051612635
        ],
        [
          40.747074201802775,
          8.168828515515864
        ],
        [
          62.077348233027564,
          7.0647721921469495
        ],
        [
          74.53446931782398,
          3.04824021069218
        ],
        [
          77.17198221193564,
          -0.3935204423371723
        ]
      ],
      "index": "a2",
      "frameId": null,
      "roundness": {
        "type": 2
      },
      "boundElements": [],
      "updated": 1758646548051,
      "link": null,
      "locked": false
    },
    {
      "type": "line",
      "version": 2413,
      "versionNonce": 311708712,
      "isDeleted": false,
      "id": "X1ldVIXm4DfBal5N2Pwn9",
      "fillStyle": "solid",
      "strokeWidth": 1,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "angle": 0,
      "x": 289.3425684673547,
      "y": 120.03697638652972,
      "strokeColor": "#000000",
      "backgroundColor": "#a5d8ff",
      "width": 77.17198221193564,
      "height": 8.562348957853036,
      "seed": 904402520,
      "groupIds": [
        "9PT4BXPfQ6UoCaB-T-h9A",
        "dp_TZJyYdyPIH1hOkAPlb"
      ],
      "strokeSharpness": "round",
      "boundElementIds": [],
      "startBinding": null,
      "endBinding": null,
      "lastCommittedPoint": null,
      "startArrowhead": null,
      "endArrowhead": null,
      "points": [
        [
          0,
          0
        ],
        [
          2.033150371639873,
          3.413095389435587
        ],
        [
          10.801287372573954,
          6.276651055277943
        ],
        [
          22.468666942209353,
          8.010803051612635
        ],
        [
          40.747074201802775,
          8.168828515515864
        ],
        [
          62.077348233027564,
          7.0647721921469495
        ],
        [
          74.53446931782398,
          3.04824021069218
        ],
        [
          77.17198221193564,
          -0.3935204423371723
        ]
      ],
      "index": "a3",
      "frameId": null,
      "roundness": {
        "type": 2
      },
      "boundElements": [],
      "updated": 1758646548051,
      "link": null,
      "locked": false
    },
    {
      "type": "ellipse",
      "version": 5410,
      "versionNonce": 92833576,
      "isDeleted": false,
      "id": "CFhp5ZxSVwHYzGUj4hEn1",
      "fillStyle": "solid",
      "strokeWidth": 1,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "angle": 0,
      "x": 288.28461948527263,
      "y": 84.74247943834126,
      "strokeColor": "#000000",
      "backgroundColor": "#a5d8ff",
      "width": 76.59753601865496,
      "height": 15.49127539284798,
      "seed": 1782811480,
      "groupIds": [
        "9PT4BXPfQ6UoCaB-T-h9A",
        "dp_TZJyYdyPIH1hOkAPlb"
      ],
      "strokeSharpness": "sharp",
      "boundElementIds": [
        "bxuMGTzXLn7H-uBCptINx"
      ],
      "index": "a4",
      "frameId": null,
      "roundness": null,
      "boundElements": [],
      "updated": 1758646548051,
      "link": null,
      "locked": false
    },
    {
      "type": "ellipse",
      "version": 820,
      "versionNonce": 608002600,
      "isDeleted": false,
      "id": "B43R7rWwK2_vdiRHBSSPk",
      "fillStyle": "solid",
      "strokeWidth": 1,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "angle": 0,
      "x": 324.77660659049513,
      "y": 109.21914711824485,
      "strokeColor": "#000000",
      "backgroundColor": "#228be6",
      "width": 11.226103154161754,
      "height": 12.183758484455605,
      "seed": 1298686040,
      "groupIds": [
        "9PT4BXPfQ6UoCaB-T-h9A",
        "dp_TZJyYdyPIH1hOkAPlb"
      ],
      "strokeSharpness": "sharp",
      "boundElementIds": [],
      "index": "a5",
      "frameId": null,
      "roundness": null,
      "boundElements": [],
      "updated": 1758646548051,
      "link": null,
      "locked": false
    },
    {
      "type": "ellipse",
      "version": 1108,
      "versionNonce": 1839127848,
      "isDeleted": false,
      "id": "CkKMb9wkJfVk04T217zSs",
      "fillStyle": "solid",
      "strokeWidth": 1,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "angle": 0,
      "x": 325.12774837442873,
      "y": 135.43576140530996,
      "strokeColor": "#000000",
      "backgroundColor": "#228be6",
      "width": 11.226103154161754,
      "height": 12.183758484455605,
      "seed": 2133497176,
      "groupIds": [
        "9PT4BXPfQ6UoCaB-T-h9A",
        "dp_TZJyYdyPIH1hOkAPlb"
      ],
      "strokeSharpness": "sharp",
      "boundElementIds": [],
      "index": "a6",
      "frameId": null,
      "roundness": null,
      "boundElements": [],
      "updated": 1758646548051,
      "link": null,
      "locked": false
    },
    {
      "type": "ellipse",
      "version": 991,
      "versionNonce": 588838952,
      "isDeleted": false,
      "id": "SHJdKeQPkfpvzSoNH--3o",
      "fillStyle": "solid",
      "strokeWidth": 1,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "angle": 6.239590202363168,
      "x": 325.77660659049513,
      "y": 164.20448797661635,
      "strokeColor": "#000000",
      "backgroundColor": "#228be6",
      "width": 11.226103154161754,
      "height": 12.183758484455605,
      "seed": 81668696,
      "groupIds": [
        "9PT4BXPfQ6UoCaB-T-h9A",
        "dp_TZJyYdyPIH1hOkAPlb"
      ],
      "strokeSharpness": "sharp",
      "boundElementIds": [],
      "index": "a7",
      "frameId": null,
      "roundness": null,
      "boundElements": [],
      "updated": 1758646548051,
      "link": null,
      "locked": false
    },
    {
      "type": "text",
      "version": 489,
      "versionNonce": 2023207720,
      "isDeleted": false,
      "id": "vUSyMBPup0jZ71CYXKyGb",
      "fillStyle": "solid",
      "strokeWidth": 1,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "angle": 0,
      "x": 280.1846389770508,
      "y": 185.79462957545917,
      "strokeColor": "#000000",
      "backgroundColor": "#a5d8ff",
      "width": 95.63072204589844,
      "height": 23.595161071904883,
      "seed": 425140056,
      "groupIds": [
        "dp_TZJyYdyPIH1hOkAPlb"
      ],
      "strokeSharpness": "sharp",
      "boundElementIds": [],
      "fontSize": 17.4778970902999,
      "fontFamily": 1,
      "text": "dataset.db",
      "baseline": 16.595161071904883,
      "textAlign": "center",
      "verticalAlign": "top",
      "index": "a8",
      "frameId": null,
      "roundness": null,
      "boundElements": [],
      "updated": 1758646548051,
      "link": null,
      "locked": false,
      "containerId": null,
      "originalText": "dataset.db",
      "autoResize": true,
      "lineHeight": 1.350000000000001
    },
    {
      "id": "R7pU0VP6CFKCAwuvt0xsr",
      "type": "text",
      "x": 295.5,
      "y": 342,
      "width": 374,
      "height": 225,
      "angle": 0,
      "strokeColor": "#1e1e1e",
      "backgroundColor": "#228be6",
      "fillStyle": "solid",
      "strokeWidth": 2,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "groupIds": [],
      "frameId": null,
      "index": "a9",
      "roundness": null,
      "seed": 705463336,
      "version": 1130,
      "versionNonce": 72522328,
      "isDeleted": false,
      "boundElements": null,
      "updated": 1758648226024,
      "link": null,
      "locked": false,
      "text": "class Extract(Action):\n    # Static\n    + type : ActionTypes = Extract\n    \n    # Properties\n    - db_connection: Path\n    - query: str\n    - query_parameters: [str]\n    - output_mapper: [str]",
      "fontSize": 20,
      "fontFamily": 8,
      "textAlign": "left",
      "verticalAlign": "top",
      "containerId": null,
      "originalText": "class Extract(Action):\n    # Static\n    + type : ActionTypes = Extract\n    \n    # Properties\n    - db_connection: Path\n    - query: str\n    - query_parameters: [str]\n    - output_mapper: [str]",
      "autoResize": true,
      "lineHeight": 1.25
    },
    {
      "id": "G1xIRcJgm34_NMEWQFFlW",
      "type": "text",
      "x": 1419.5,
      "y": 110,
      "width": 253,
      "height": 75,
      "angle": 0,
      "strokeColor": "#1e1e1e",
      "backgroundColor": "#228be6",
      "fillStyle": "solid",
      "strokeWidth": 2,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "groupIds": [],
      "frameId": null,
      "index": "aA",
      "roundness": null,
      "seed": 651981400,
      "version": 256,
      "versionNonce": 138082856,
      "isDeleted": false,
      "boundElements": null,
      "updated": 1758646570344,
      "link": null,
      "locked": false,
      "text": "class Pipeline\n    - actions: [Action]\n    ",
      "fontSize": 20,
      "fontFamily": 8,
      "textAlign": "left",
      "verticalAlign": "top",
      "containerId": null,
      "originalText": "class Pipeline\n    - actions: [Action]\n    ",
      "autoResize": true,
      "lineHeight": 1.25
    },
    {
      "id": "TBVy3JbJCkbA9kjVEJ8lv",
      "type": "text",
      "x": 694,
      "y": 100,
      "width": 495,
      "height": 150,
      "angle": 0,
      "strokeColor": "#1e1e1e",
      "backgroundColor": "#228be6",
      "fillStyle": "solid",
      "strokeWidth": 2,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "groupIds": [],
      "frameId": null,
      "index": "aB",
      "roundness": null,
      "seed": 680960040,
      "version": 560,
      "versionNonce": 85012520,
      "isDeleted": false,
      "boundElements": null,
      "updated": 1758649442239,
      "link": null,
      "locked": false,
      "text": "class Action\n    + type: ActionTypes\n    + name: str\n    + depends_on: [str]\n\n    + execute(mem) -> [Dict<str, any>] | Void",
      "fontSize": 20,
      "fontFamily": 8,
      "textAlign": "left",
      "verticalAlign": "top",
      "containerId": null,
      "originalText": "class Action\n    + type: ActionTypes\n    + name: str\n    + depends_on: [str]\n\n    + execute(mem) -> [Dict<str, any>] | Void",
      "autoResize": true,
      "lineHeight": 1.25
    },
    {
      "id": "an7KRTzWpCytKNKgHftKC",
      "type": "text",
      "x": 1528.5,
      "y": 365.5,
      "width": 187,
      "height": 150,
      "angle": 0,
      "strokeColor": "#1e1e1e",
      "backgroundColor": "#228be6",
      "fillStyle": "solid",
      "strokeWidth": 2,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "groupIds": [],
      "frameId": null,
      "index": "aC",
      "roundness": null,
      "seed": 1974317656,
      "version": 306,
      "versionNonce": 1574962264,
      "isDeleted": false,
      "boundElements": null,
      "updated": 1758648154009,
      "link": null,
      "locked": false,
      "text": "enum ActionTypes:\n    + Extract\n    + Aggregate\n    + Filter\n    + Map\n    + Dump",
      "fontSize": 20,
      "fontFamily": 8,
      "textAlign": "left",
      "verticalAlign": "top",
      "containerId": null,
      "originalText": "enum ActionTypes:\n    + Extract\n    + Aggregate\n    + Filter\n    + Map\n    + Dump",
      "autoResize": true,
      "lineHeight": 1.25
    },
    {
      "id": "2pQ5EULirrWs_QZPbClhh",
      "type": "text",
      "x": 785,
      "y": 332.5,
      "width": 418,
      "height": 375,
      "angle": 0,
      "strokeColor": "#1e1e1e",
      "backgroundColor": "#228be6",
      "fillStyle": "solid",
      "strokeWidth": 2,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "groupIds": [],
      "frameId": null,
      "index": "aH",
      "roundness": null,
      "seed": 1402251560,
      "version": 742,
      "versionNonce": 680432168,
      "isDeleted": false,
      "boundElements": null,
      "updated": 1758649532881,
      "link": null,
      "locked": false,
      "text": "class Aggregate(Action):\n    # Static\n    + type: ActionTypes = Aggregate\n\n    # Properties\n    - actionIDs: [str]\n    - associations: [Association]\n    - output_mapper: [str]\n\n    + execute(mem):\n        tables = mem.gather(actionIDs)\n\n        for join in association:\n            \n            ",
      "fontSize": 20,
      "fontFamily": 8,
      "textAlign": "left",
      "verticalAlign": "top",
      "containerId": null,
      "originalText": "class Aggregate(Action):\n    # Static\n    + type: ActionTypes = Aggregate\n\n    # Properties\n    - actionIDs: [str]\n    - associations: [Association]\n    - output_mapper: [str]\n\n    + execute(mem):\n        tables = mem.gather(actionIDs)\n\n        for join in association:\n            \n            ",
      "autoResize": true,
      "lineHeight": 1.25
    },
    {
      "id": "O0fso8DJqFfwJEzmpUikM",
      "type": "text",
      "x": 1289,
      "y": 195,
      "width": 594,
      "height": 100,
      "angle": 0,
      "strokeColor": "#1e1e1e",
      "backgroundColor": "#228be6",
      "fillStyle": "solid",
      "strokeWidth": 2,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "groupIds": [],
      "frameId": null,
      "index": "aI",
      "roundness": null,
      "seed": 1582329944,
      "version": 459,
      "versionNonce": 1080077144,
      "isDeleted": false,
      "boundElements": null,
      "updated": 1758647067031,
      "link": null,
      "locked": false,
      "text": "input_mapper: \n    - key: ActionID (name) that produced such output\n    - value: list of strings that represent the values\n                to take",
      "fontSize": 20,
      "fontFamily": 8,
      "textAlign": "left",
      "verticalAlign": "top",
      "containerId": null,
      "originalText": "input_mapper: \n    - key: ActionID (name) that produced such output\n    - value: list of strings that represent the values\n                to take",
      "autoResize": true,
      "lineHeight": 1.25
    },
    {
      "id": "v0kzO6vlBWOdJCV3yoG69",
      "type": "text",
      "x": 1379.5,
      "y": 718.5,
      "width": 286,
      "height": 175,
      "angle": 0,
      "strokeColor": "#1e1e1e",
      "backgroundColor": "#228be6",
      "fillStyle": "solid",
      "strokeWidth": 2,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "groupIds": [],
      "frameId": null,
      "index": "aL",
      "roundness": null,
      "seed": 1462407976,
      "version": 635,
      "versionNonce": 1012998696,
      "isDeleted": false,
      "boundElements": null,
      "updated": 1758649495598,
      "link": null,
      "locked": false,
      "text": "class Association:\n    - from_actionID: str\n    - from_key_name: str\n    - from_value_name: str\n    - to_actionID: str\n    - to_value_name: str\n    - type: Type",
      "fontSize": 20,
      "fontFamily": 8,
      "textAlign": "left",
      "verticalAlign": "top",
      "containerId": null,
      "originalText": "class Association:\n    - from_actionID: str\n    - from_key_name: str\n    - from_value_name: str\n    - to_actionID: str\n    - to_value_name: str\n    - type: Type",
      "autoResize": true,
      "lineHeight": 1.25
    },
    {
      "id": "WK34n9xeVxntypCtrlK6p",
      "type": "text",
      "x": 256.5,
      "y": 787.5,
      "width": 517,
      "height": 175,
      "angle": 0,
      "strokeColor": "#1e1e1e",
      "backgroundColor": "#228be6",
      "fillStyle": "solid",
      "strokeWidth": 2,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "groupIds": [],
      "frameId": null,
      "index": "aM",
      "roundness": null,
      "seed": 1166526296,
      "version": 318,
      "versionNonce": 1042162520,
      "isDeleted": false,
      "boundElements": null,
      "updated": 1758649002604,
      "link": null,
      "locked": false,
      "text": "class Filter(Action):\n    # Static\n    + type: ActionTypes = Filter\n\n    # Properties\n    - compare: function(Dict<str, any>) -> bool\n    - output_mapper: [str]",
      "fontSize": 20,
      "fontFamily": 8,
      "textAlign": "left",
      "verticalAlign": "top",
      "containerId": null,
      "originalText": "class Filter(Action):\n    # Static\n    + type: ActionTypes = Filter\n\n    # Properties\n    - compare: function(Dict<str, any>) -> bool\n    - output_mapper: [str]",
      "autoResize": true,
      "lineHeight": 1.25
    },
    {
      "id": "NY9jyUFLFFCNPE2sh00SX",
      "type": "text",
      "x": 1639,
      "y": 606.5,
      "width": 407,
      "height": 200,
      "angle": 0,
      "strokeColor": "#1e1e1e",
      "backgroundColor": "#228be6",
      "fillStyle": "solid",
      "strokeWidth": 2,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "groupIds": [],
      "frameId": null,
      "index": "aP",
      "roundness": null,
      "seed": 20345896,
      "version": 168,
      "versionNonce": 627282472,
      "isDeleted": false,
      "boundElements": null,
      "updated": 1758649426380,
      "link": null,
      "locked": false,
      "text": "class Map(Action):\n    # Static\n    + type: ActionTypes = Map\n\n    # Properties\n    - compare_mapper: [str]\n    - mapper: function(any...) -> any\n    - output_mapper: [str]",
      "fontSize": 20,
      "fontFamily": 8,
      "textAlign": "left",
      "verticalAlign": "top",
      "containerId": null,
      "originalText": "class Map(Action):\n    # Static\n    + type: ActionTypes = Map\n\n    # Properties\n    - compare_mapper: [str]\n    - mapper: function(any...) -> any\n    - output_mapper: [str]",
      "autoResize": true,
      "lineHeight": 1.25
    },
    {
      "id": "SkhaoW-3TTKDZzEii3Lf6",
      "type": "text",
      "x": 1457.5,
      "y": 955.5,
      "width": 121,
      "height": 50,
      "angle": 0,
      "strokeColor": "#1e1e1e",
      "backgroundColor": "#228be6",
      "fillStyle": "solid",
      "strokeWidth": 2,
      "strokeStyle": "solid",
      "roughness": 1,
      "opacity": 100,
      "groupIds": [],
      "frameId": null,
      "index": "aQ",
      "roundness": null,
      "seed": 2071523672,
      "version": 37,
      "versionNonce": 105260376,
      "isDeleted": false,
      "boundElements": null,
      "updated": 1758648834435,
      "link": null,
      "locked": false,
      "text": "class Dump:\n    -",
      "fontSize": 20,
      "fontFamily": 8,
      "textAlign": "left",
      "verticalAlign": "top",
      "containerId": null,
      "originalText": "class Dump:\n    -",
      "autoResize": true,
      "lineHeight": 1.25
    }
  ],
  "appState": {
    "gridSize": 20,
    "gridStep": 5,
    "gridModeEnabled": false,
    "viewBackgroundColor": "#ffffff"
  },
  "files": {}
 }
Author	SHA1	Message	Date
GassiGiuseppe	9440a562f2	Merge branch 'dev.etl' of https://repositories.communitynotfound.work/PoliBa-DeepLearning/NanoSocrates into dev.etl	2025-09-25 18:33:51 +02:00
Christian Risi	5eda131aac	Fixed creation query to be unique even with movieID in RDFs	2025-09-25 17:58:09 +02:00
GassiGiuseppe	57884eaf2e	CSV support added to path_splitter_tree Also resolved a minor bug to print also leaf nodes	2025-09-25 17:57:46 +02:00
Christian Risi	4548a683c2	Fixed DB	2025-09-25 17:57:45 +02:00
GassiGiuseppe	3eec49ffa5	WIP: added test file: clean_relationship.jupyter to create a first cleaning pipeline	2025-09-25 16:28:24 +02:00
Christian Risi	0bc7f4b227	Fixed Typos	2025-09-25 12:37:52 +02:00
Christian Risi	f28952b0a2	Added todo	2025-09-25 12:00:26 +02:00
Christian Risi	0b626a8e09	Modified query to take all data	2025-09-25 11:53:12 +02:00
Christian Risi	b254098532	Added views to count for subjects and objects	2025-09-25 11:40:44 +02:00
Christian Risi	ee88ffe4cf	Added View to filter over relationship counts	2025-09-25 11:32:03 +02:00
Christian Risi	70b4bd8645	Added Complex query	2025-09-25 11:31:34 +02:00
Christian Risi	6316d2bfc4	Added queries to take data from SQL for dataset	2025-09-25 11:27:19 +02:00
Christian Risi	87ca748f45	Updated DB to reflect new changes	2025-09-24 19:29:57 +02:00
Christian Risi	4315d70109	Merged abbreviation_datawarehouse into datawarehouse	2025-09-24 19:29:43 +02:00
Christian Risi	9a5d633b5e	Fixed Typos	2025-09-24 19:29:07 +02:00
Christian Risi	a6760cd52d	Updated SQL Queries to support parsing in DB	2025-09-24 19:28:55 +02:00
GassiGiuseppe	a7eb92227d	Moved all db queries file in their own folder	2025-09-24 16:44:55 +02:00
GassiGiuseppe	9f221e31cd	Merge branch 'dev.etl' of https://repositories.communitynotfound.work/PoliBa-DeepLearning/NanoSocrates into dev.etl	2025-09-24 16:32:52 +02:00
GassiGiuseppe	47197194d5	WIP abbrevietion_datawarehouse to creat an abbreviation system	2025-09-24 16:32:09 +02:00
Christian Risi	0cdbf6f624	Added query to retrieve a dirty dataset from SQLite DB	2025-09-24 16:15:47 +02:00
Christian Risi	3e30489f86	Updated Queries for DB	2025-09-24 14:44:53 +02:00
Christian Risi	8a22e453e4	Fixed csv	2025-09-24 14:44:25 +02:00
Christian Risi	7feb4eb857	Fixed URI generation	2025-09-24 14:44:07 +02:00
Christian Risi	70af19d356	Removed unused imports and added trailing slashes	2025-09-24 14:04:48 +02:00
Christian Risi	a4b44ab2ee	Fixed Typos	2025-09-24 14:04:27 +02:00
Christian Risi	74b6b609dd	Fixed typos	2025-09-24 13:59:19 +02:00
Christian Risi	59796c37cb	Added script to take dbpedia uris	2025-09-24 13:49:29 +02:00
Christian Risi	f696f5950b	Added uri-abbreviations	2025-09-24 13:48:53 +02:00
Christian Risi	605b496da7	Added barebone UML diagram for a Cleaning Pipeline	2025-09-23 19:49:01 +02:00
Christian Risi	7d693964dd	Added new directories to tree structure	2025-09-23 19:47:56 +02:00