36 changed files with 2 additions and 3002 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -1,3 +1,2 @@
 Exam/Deep_Learning_2025_VIII.pdf filter=lfs diff=lfs merge=lfs -text
 Assets/** filter=lfs diff=lfs merge=lfs -text
-Assets/Dataset/1-hop/dataset.csv filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
@ -189,8 +189,7 @@ ipython_config.py
 .LSOverride

 # Icon must end with two \r
-Icon
-
+Icon

 # Thumbnails
 ._*
@ -252,6 +251,3 @@ $RECYCLE.BIN/
 # .nfs files are created when an open file is removed but is still being accessed
 .nfs*

-# ---> Custom
-**/Tmp/**
-!**/.gitkeep
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@ -1,14 +0,0 @@
-{
-    "recommendations": [
-        "bierner.github-markdown-preview",
-        "bierner.markdown-checkbox",
-        "bierner.markdown-emoji",
-        "bierner.markdown-footnotes",
-        "bierner.markdown-mermaid",
-        "bierner.markdown-preview-github-styles",
-        "bierner.markdown-yaml-preamble",
-        "davidanson.vscode-markdownlint",
-        "kejun.markdown-alert",
-        "yzhang.markdown-all-in-one"
-    ]
-}
--- a/Assets/Dataset/1-hop/dataset.csv
+++ b/Assets/Dataset/1-hop/dataset.csv
--- a/Assets/Dataset/1-hop/movie-pageid.csv
+++ b/Assets/Dataset/1-hop/movie-pageid.csv
--- a/Assets/Dataset/1-hop/movies.csv
+++ b/Assets/Dataset/1-hop/movies.csv
--- a/Assets/Dataset/1-hop/reverse.csv
+++ b/Assets/Dataset/1-hop/reverse.csv
--- a/Assets/Dataset/1-hop/uri-abbreviations.csv
+++ b/Assets/Dataset/1-hop/uri-abbreviations.csv
--- a/Assets/Dataset/1-hop/wikipedia-movie.csv
+++ b/Assets/Dataset/1-hop/wikipedia-movie.csv
--- a/Assets/Dataset/1-hop/wikipedia-summary.csv
+++ b/Assets/Dataset/1-hop/wikipedia-summary.csv
--- a/Assets/Dataset/DatawareHouse/dataset.db
+++ b/Assets/Dataset/DatawareHouse/dataset.db
--- a/Assets/Dataset/Tmp/.gitkeep
+++ b/Assets/Dataset/Tmp/.gitkeep
--- a/README.md
+++ b/README.md
@ -1,28 +1,3 @@
 # NanoSocrates

-This is the work project for the DeepLearning exam of 16th September 2025
-
-## Index
-
- [Resources](./docs/RESOURCES.md)
-
-## Setup
-
-Create and activate you Conda enviroment with:
-
-       conda env create -f environment.yaml
-       conda activate deep_learning
-  
-Now install dependencies on pip:
-
-        pip install -r requirements.txt
-
-## TroubleShooting
-
-Sometimes when uploading really large batch of data, git can stop the uploads thanks to the timeout.
-The solution is to locally change its settings:
-
-       git config lfs.dialtimeout 3600
-       git config lfs.activitytimeout 3600
-
-for clearance check the link: https://stackoverflow.com/questions/58961697/i-o-timeout-when-pushing-to-a-git-reporsitory
+This is the work project for the DeepLearning exam of 16th September 2025
--- a/Scripts/DataBaseQueries/dataset.sql
+++ b/Scripts/DataBaseQueries/dataset.sql
@ -1,30 +0,0 @@
-- To pass to Pandas
-SELECT *
-FROM RDFs
-INNER JOIN Subjects USING (SubjectID)
-INNER JOIN Relationships USING (RelationshipID)
-INNER JOIN Objects USING (ObjectID);
-
-- To pass to Pandas for abstracts
-SELECT *
-FROM RDFs
-INNER JOIN WikipediaAbstracts USING (MovieID);
-
-- To pass to Pandas for abbreviations
-SELECT *
-FROM Abbreviations;
-
-- More complex to have clean dataset
-- More complex to have clean dataset
-SELECT MovieID, GROUP_CONCAT('<SOT>' || '<SUB>' || SubjectURI || '<REL>' || RelationshipURI || '<OBJ>' || ObjectURI || '<EOT>', '') as RDF_String, Abstract
-FROM RDFs
-INNER JOIN SubjectsCountInRDFs USING (SubjectID)
-INNER JOIN RelationshipsCountInRDFs USING(RelationshipID)
-INNER JOIN ObjectsCountInRDFs USING (ObjectID)
-INNER JOIN ParsedSubjects USING (SubjectID)
-INNER JOIN ParsedRelationships USING (RelationshipID)
-INNER JOIN ParsedObjects USING (ObjectID)
-INNER JOIN WikipediaAbstracts USING (MovieID)
-    -- insert WHERE here
-- WHERE SubjectID = 134626
-GROUP BY MovieID;
--- a/Scripts/DataBaseQueries/db_creation.sql
+++ b/Scripts/DataBaseQueries/db_creation.sql
@ -1,174 +0,0 @@
-CREATE TABLE IF NOT EXISTS Movies (
-    MovieID INTEGER PRIMARY KEY AUTOINCREMENT,
-    MovieURI TEXT UNIQUE NOT NULL
-);
-
-
-CREATE TABLE IF NOT EXISTS WikiPageIDs (
-    MovieID INTEGER PRIMARY KEY,
-    PageID INTEGER UNIQUE NOT NULL,
-    FOREIGN KEY(MovieID) REFERENCES Movies(MovieID)
-);
-
-
-CREATE TABLE IF NOT EXISTS WikipediaAbstracts (
-    MovieID INTEGER PRIMARY KEY,
-    Abstract TEXT NOT NULL,
-    FOREIGN KEY(MovieID) REFERENCES Movies(MovieID)
-);
-
-
-CREATE TABLE IF NOT EXISTS Origins (
-    OriginID INTEGER PRIMARY KEY AUTOINCREMENT,
-    OriginName TEXT UNIQUE NOT NULL
-);
-
-
-CREATE TABLE IF NOT EXISTS Subjects (
-    SubjectID INTEGER PRIMARY KEY AUTOINCREMENT,
-    SubjectURI TEXT UNIQUE NOT NULL,
-    OriginID BIGINT NOT NULL,
-    FOREIGN KEY(OriginID) REFERENCES Origins(OriginID)
-);
-
-
-CREATE TABLE IF NOT EXISTS Relationships (
-    RelationshipID INTEGER PRIMARY KEY AUTOINCREMENT,
-    RelationshipURI TEXT UNIQUE NOT NULL
-);
-
-
-CREATE TABLE IF NOT EXISTS Objects (
-    ObjectID INTEGER PRIMARY KEY AUTOINCREMENT,
-    ObjectURI TEXT UNIQUE NOT NULL,
-    OriginID BIGINT NOT NULL,
-    FOREIGN KEY(OriginID) REFERENCES Origins(OriginID)
-);
-
-CREATE TABLE IF NOT EXISTS RDFs (
-    RDF_ID INTEGER PRIMARY KEY AUTOINCREMENT,
-    MovieID INTEGER NOT NULL,
-    SubjectID INTEGER NOT NULL,
-    RelationshipID INTEGER NOT NULL,
-    ObjectID INTEGER NOT NULL,
-    UNIQUE(MovieID, SubjectID, RelationshipID, ObjectID),
-    FOREIGN KEY(MovieID) REFERENCES Movies(MovieID),
-    FOREIGN KEY(SubjectID) REFERENCES Subjects(SubjectID),
-    FOREIGN KEY(RelationshipID) REFERENCES Relationships(RelationshipID),
-    FOREIGN KEY(ObjectID) REFERENCES Objects(ObjectID)
-);
-
-CREATE INDEX IF NOT EXISTS idx_rdf_movie_id ON RDFs(MovieID);
-CREATE INDEX IF NOT EXISTS idx_rdf_subject_id ON RDFs(SubjectID);
-CREATE INDEX IF NOT EXISTS idx_rdf_relationship_id ON RDFs(RelationshipID);
-CREATE INDEX IF NOT EXISTS idx_rdf_object_id ON RDFs(ObjectID);
-
-CREATE TABLE IF NOT EXISTS Abbreviations (
-    AbbreviationID INTEGER PRIMARY KEY AUTOINCREMENT,
-    URI TEXT UNIQUE NOT NULL,
-    Abbreviation TEXT UNIQUE NOT NULL
-);
-
-CREATE TABLE IF NOT EXISTS Subjects_Abbreviations (
-    SubjectID INTEGER NOT NULL,
-    AbbreviationID INTEGER NOT NULL,
-    PRIMARY KEY(SubjectID, AbbreviationID),
-    FOREIGN KEY(SubjectID) REFERENCES Subjects(SubjectID),
-    FOREIGN KEY(AbbreviationID) REFERENCES Abbreviations(AbbreviationID)
-);
-
-CREATE TABLE IF NOT EXISTS Relationships_Abbreviations (
-    RelationshipID INTEGER NOT NULL,
-    AbbreviationID INTEGER NOT NULL,
-    PRIMARY KEY(RelationshipID, AbbreviationID),
-    FOREIGN KEY(RelationshipID) REFERENCES Relationships(RelationshipID),
-    FOREIGN KEY(AbbreviationID) REFERENCES Abbreviations(AbbreviationID)
-);
-
-CREATE TABLE IF NOT EXISTS Objects_Abbreviations (
-    ObjectID INTEGER NOT NULL,
-    AbbreviationID INTEGER NOT NULL,
-    PRIMARY KEY(ObjectID, AbbreviationID),
-    FOREIGN KEY(ObjectID) REFERENCES Objects(ObjectID),
-    FOREIGN KEY(AbbreviationID) REFERENCES Abbreviations(AbbreviationID)
-);
-
-CREATE INDEX IF NOT EXISTS idx_sub_abbr_sub_id ON Subjects_Abbreviations(SubjectID);
-CREATE INDEX IF NOT EXISTS idx_sub_abbr_abbr_id ON Subjects_Abbreviations(AbbreviationID);
-CREATE INDEX IF NOT EXISTS idx_rel_abbr_rel_id ON Relationships_Abbreviations(RelationshipID);
-CREATE INDEX IF NOT EXISTS idx_rel_abbr_abbr_id ON Relationships_Abbreviations(AbbreviationID);
-CREATE INDEX IF NOT EXISTS idx_obj_abbr_obj_id ON Objects_Abbreviations(ObjectID);
-CREATE INDEX IF NOT EXISTS idx_obj_abbr_abbr_id ON Objects_Abbreviations(AbbreviationID);
-
-- Views
-- Subjects
-CREATE VIEW IF NOT EXISTS ParsedSubjects
-AS
-SELECT
-	SubjectID,
-	CASE WHEN Abbreviation IS NULL
-		THEN SubjectURI
-		ELSE Abbreviation || ':' || replace(SubjectURI, URI, '') END
-		AS SubjectURI
-FROM Subjects
-	LEFT JOIN Subjects_Abbreviations USING (SubjectID)
-	LEFT JOIN Abbreviations USING (AbbreviationID);
-
-- Relationships
-CREATE VIEW IF NOT EXISTS ParsedRelationships
-AS
-SELECT
-	RelationshipID,
-	CASE WHEN Abbreviation IS NULL
-		THEN RelationshipURI
-		ELSE Abbreviation || ':' || replace(RelationshipURI, URI, '') END
-		AS RelationshipURI
-FROM Relationships
-	LEFT JOIN Relationships_Abbreviations USING (RelationshipID)
-	LEFT JOIN Abbreviations USING (AbbreviationID);
-
-- Objects
-CREATE VIEW IF NOT EXISTS ParsedObjects
-AS
-SELECT
-	ObjectID,
-	CASE WHEN Abbreviation IS NULL
-		THEN ObjectURI
-		ELSE Abbreviation || ':' || replace(ObjectURI, URI, '') END
-		AS ObjectURI
-FROM Objects
-	LEFT JOIN Objects_Abbreviations USING (ObjectID)
-	LEFT JOIN Abbreviations USING (AbbreviationID);
-
-
-- Subject Count
-CREATE VIEW IF NOT EXISTS SubjectsCountInRDFs
-AS
-SELECT SubjectID, count(SubjectID) as Sub_Count
-FROM RDFs
-GROUP BY SubjectID;
-
-
-
-
-- Relationship Count
-CREATE VIEW IF NOT EXISTS RelationshipsCountInRDFs
-AS
-SELECT RelationshipID, count(RelationshipID) as Rel_Count
-FROM RDFs
-GROUP BY RelationshipID;
-
-
-- Object Count
-CREATE VIEW IF NOT EXISTS ObjectsCountInRDFs
-AS
-SELECT ObjectID, count(ObjectID) as Obj_Count
-FROM RDFs
-GROUP BY ObjectID;
-
-
-
-
-
-
-
--- a/Scripts/DataBaseQueries/query.sql
+++ b/Scripts/DataBaseQueries/query.sql
@ -1,55 +0,0 @@
-- Insert MovieURI into Movies ; MovieID is auto incremental
-INSERT INTO  Movies (MovieURI) VALUES (?);
-
-- Get MovieID where MovieURI equal given value
-SELECT MovieID FROM Movies WHERE MovieURI = ?;
-
-- SetPageId
-INSERT INTO  WikiPageIDs (MovieID, PageID) VALUES (?,?);
-
-- Get MovieId by PageID ... ( to create WikipediaAbstract)
-SELECT MovieID FROM WikiPageIDs WHERE PageID = ?;
-
-- SetAbstract ...
-
-INSERT INTO  WikipediaAbstracts (MovieID, Abstract) VALUES (?,?);
-
-
-- SetOrigin
---
-INSERT INTO  Origins (OriginName) VALUES ("dataset.csv"),("reverse.csv");
-
-- GetOrigin
-SELECT OriginID FROM Origins WHERE OriginName = ?;
-
-- Subject, Relationship, Object, RDF
-INSERT INTO  Subjects (SubjectURI, OriginID) VALUES (?,?);
-INSERT INTO  Relationships (RelationshipURI) VALUES (?);
-INSERT INTO  Objects (ObjectURI, OriginID) VALUES (?,?);
-
-SELECT SubjectID FROM Subjects WHERE SubjectURI = ?;
-SELECT RelationshipID FROM Relationships WHERE RelationshipURI = ?;
-SELECT ObjectID FROM Objects WHERE ObjectURI = ?;
-
-
-INSERT INTO  RDFs (MovieID, SubjectID, RelationshipID, ObjectID) VALUES (?,?,?,?);
-
-- Prefixes
-INSERT INTO Abbreviations(URI, Abbreviation) VALUES (?,?);
-INSERT INTO Objects_Abbreviations(ObjectID, AbbreviationID) VALUES (?,?);
-INSERT INTO Relationships_Abbreviations(RelationshipID, AbbreviationID) VALUES (?,?);
-INSERT INTO Subjects_Abbreviations(SubjectID, AbbreviationID) VALUES (?,?);
-
-- Please be sure it is a URI before running this query
--  and take at least until the domain and the first path part
-SELECT AbbreviationID FROM Abbreviations WHERE URI LIKE ?;
-
-- Query to retrieve data
-SELECT MovieID, GROUP_CONCAT('<SOT>' || '<SUB>' || SubjectURI || '<REL>' || RelationshipURI || '<OBJ>' || ObjectURI || '<EOT>', '') as RDF_String, Abstract
-FROM RDFs
-INNER JOIN ParsedSubjects USING (SubjectID)
-INNER JOIN ParsedRelationships USING (RelationshipID)
-INNER JOIN ParsedObjects USING (ObjectID)
-INNER JOIN WikipediaAbstracts USING (MovieID)
-    -- insert WHERE here
-GROUP BY MovieID;
--- a/Scripts/DataCleaning/clean_relationship.ipynb
+++ b/Scripts/DataCleaning/clean_relationship.ipynb
@ -1,186 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "b9081b7c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# This file deletes in the pipeline the unwanted relationship by different rules\n",
-    "import pandas as pd\n",
-    "import sqlite3\n",
-    "import numpy as np\n",
-    "\n",
-    "\n",
-    "CONN = sqlite3.connect('../../Assets/Dataset/Tmp/dataset2.db')\n",
-    "\n",
-    "def get_RDF() -> pd.DataFrame:\n",
-    "    \"\"\"\n",
-    "    QUERY = \"SELECT * FROM RDFs \" \\\n",
-    "    \"INNER JOIN Subjects USING (SubjectID) \" \\\n",
-    "    \"INNER JOIN Relationships USING (RelationshipID) \" \\\n",
-    "    \"INNER JOIN Objects USING (ObjectID);\"\n",
-    "    RDF = pd.read_sql_query(QUERY, CONN)\n",
-    "    RDF = RDF[[\"SubjectURI\", \"RelationshipURI\", \"ObjectURI\"]]\n",
-    "    RDF = RDF.dropna()\n",
-    "    \"\"\"\n",
-    "    Subjects = pd.read_sql_query('SELECT * FROM Subjects;', CONN)\n",
-    "    Objects = pd.read_sql_query('SELECT * FROM Objects;', CONN)\n",
-    "    Relationships = pd.read_sql_query('SELECT * FROM Relationships;', CONN)\n",
-    "    RDF = pd.read_sql_query('SELECT * FROM RDFs;', CONN)\n",
-    "\n",
-    "    # drop '' values \n",
-    "    Subjects = Subjects.replace('', np.nan)# .dropna()\n",
-    "    Relationships = Relationships.replace('', np.nan)# .dropna()\n",
-    "    Objects = Objects.replace('', np.nan)# .dropna()\n",
-    "\n",
-    "    # join RDF with its components\n",
-    "    RDF = RDF.merge(Subjects, left_on=\"SubjectID\", right_on=\"SubjectID\")\n",
-    "    RDF = RDF.merge(Objects, left_on=\"ObjectID\", right_on=\"ObjectID\")\n",
-    "    RDF = RDF.merge(Relationships, left_on=\"RelationshipID\", right_on=\"RelationshipID\")\n",
-    "    RDF = RDF[[\"SubjectURI\", \"RelationshipURI\", \"ObjectURI\", \"MovieID\"]]\n",
-    "    return RDF\n",
-    "\n",
-    "\n",
-    "#def delete_relationship_by_uri(RDF: pd.DataFrame, )\n",
-    "\n",
-    "def delete_relationship_by_uri(RDF: pd.DataFrame, uri: str) -> pd.DataFrame:\n",
-    "    return RDF[RDF[\"RelationshipURI\"]!= uri]\n",
-    "\n",
-    "\n",
-    "\n",
-    "RDF = get_RDF()\n",
-    "# RDF = RDF.dropna()\n",
-    "# print(RDF)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "644690bb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def filter_by_frequence_relationship_uri(RDF: pd.DataFrame, count_treshold) -> pd.DataFrame:\n",
-    "    counts = RDF[\"RelationshipURI\"].value_counts() \n",
-    "    RDF[\"RelationshipFreq\"] = RDF[\"RelationshipURI\"].map(counts)\n",
-    "    RDF = RDF[RDF[\"RelationshipFreq\"] >= count_treshold]\n",
-    "    # counts is a series as key: relationship, value: count\n",
-    "    # counts = counts[counts > count_treshold]\n",
-    "    # relationships = counts.index\n",
-    "    # RDF = RDF[RDF[\"RelationshipURI\"].isin(relationships)]\n",
-    "    # RDF = RDF.groupby(\"RelationshipURI\").filter(lambda x: len(x) >= count_treshold)\n",
-    "    return RDF\n",
-    "\n",
-    "RDF = filter_by_frequence_relationship_uri(RDF, 1)\n",
-    "# print(new_RDF)\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "34525be6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                                                 SubjectURI  \\\n",
-      "0             http://dbpedia.org/resource/Nights_of_Cabiria   \n",
-      "1         http://dbpedia.org/resource/California_Science...   \n",
-      "2                 http://dbpedia.org/resource/China_Captain   \n",
-      "3         http://dbpedia.org/resource/Caravan_of_Courage...   \n",
-      "4                http://dbpedia.org/resource/WHIH_Newsfront   \n",
-      "...                                                     ...   \n",
-      "12725500   http://dbpedia.org/resource/I_Will_Follow_(film)   \n",
-      "12725501   http://dbpedia.org/resource/I_Will_Follow_(film)   \n",
-      "12725502  http://dbpedia.org/resource/I_Witnessed_Genoci...   \n",
-      "12725503  http://dbpedia.org/resource/I_Woke_Up_Early_th...   \n",
-      "12725504           http://dbpedia.org/resource/I_Won't_Play   \n",
-      "\n",
-      "                                       RelationshipURI  \\\n",
-      "0          http://www.w3.org/2002/07/owl#differentFrom   \n",
-      "1          http://www.w3.org/2002/07/owl#differentFrom   \n",
-      "2          http://www.w3.org/2002/07/owl#differentFrom   \n",
-      "3          http://www.w3.org/2002/07/owl#differentFrom   \n",
-      "4         http://www.w3.org/2000/01/rdf-schema#seeAlso   \n",
-      "...                                                ...   \n",
-      "12725500          http://dbpedia.org/ontology/producer   \n",
-      "12725501          http://dbpedia.org/ontology/producer   \n",
-      "12725502          http://dbpedia.org/ontology/producer   \n",
-      "12725503          http://dbpedia.org/ontology/producer   \n",
-      "12725504          http://dbpedia.org/ontology/producer   \n",
-      "\n",
-      "                                                  ObjectURI  MovieID  \\\n",
-      "0                       http://dbpedia.org/resource/Cabiria       26   \n",
-      "1         http://dbpedia.org/resource/California_Academy...      185   \n",
-      "2                 http://dbpedia.org/resource/Captain_China      614   \n",
-      "3         http://dbpedia.org/resource/Caravan_of_Courage...      740   \n",
-      "4         http://dbpedia.org/resource/Captain_America:_C...      594   \n",
-      "...                                                     ...      ...   \n",
-      "12725500           http://dbpedia.org/resource/Ava_DuVernay   145854   \n",
-      "12725501           http://dbpedia.org/resource/Molly_Mayeux   145854   \n",
-      "12725502        http://dbpedia.org/resource/Headlines_Today   145861   \n",
-      "12725503             http://dbpedia.org/resource/Billy_Zane   145862   \n",
-      "12725504    http://dbpedia.org/resource/Gordon_Hollingshead   145864   \n",
-      "\n",
-      "          RelationshipFreq  MovieFreq  \n",
-      "0                     2132        216  \n",
-      "1                     2132        264  \n",
-      "2                     2132         66  \n",
-      "3                     2132        131  \n",
-      "4                     1653        133  \n",
-      "...                    ...        ...  \n",
-      "12725500             80077         95  \n",
-      "12725501             80077         95  \n",
-      "12725502             80077         41  \n",
-      "12725503             80077         98  \n",
-      "12725504             80077         91  \n",
-      "\n",
-      "[12725505 rows x 6 columns]\n"
-     ]
-    }
-   ],
-   "source": [
-    "def filter_by_frequence_movie_id(RDF: pd.DataFrame, min_treshold, max_treshold) -> pd.DataFrame:\n",
-    "    counts = RDF[\"MovieID\"].value_counts() \n",
-    "    RDF[\"MovieFreq\"] = RDF[\"MovieID\"].map(counts)\n",
-    "    RDF = RDF[RDF[\"MovieFreq\"] >= min_treshold]\n",
-    "    RDF = RDF[RDF[\"MovieFreq\"] < max_treshold]\n",
-    "    # counts is a series as key: relationship, value: count\n",
-    "    # counts = counts[counts > count_treshold]\n",
-    "    # relationships = counts.index\n",
-    "    # RDF = RDF[RDF[\"RelationshipURI\"].isin(relationships)]\n",
-    "    # RDF = RDF.groupby(\"RelationshipURI\").filter(lambda x: len(x) >= count_treshold)\n",
-    "    return RDF\n",
-    "\n",
-    "RDF = filter_by_frequence_movie_id(RDF, 1, 1500)\n",
-    "print(RDF)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "deep_learning",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.13.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/Scripts/DataCleaning/dbpedia-uri.py
+++ b/Scripts/DataCleaning/dbpedia-uri.py
@ -1,77 +0,0 @@
-import argparse
-import sys
-
-
-
-class ProgramArgs:
-
-    def __init__(self, file: str, output: str, treshold: int):
-        self.file = file
-        self.output = output
-        self.treshold = treshold
-
-def get_args(args: list[str]) -> ProgramArgs:
-
-    PARSER = argparse.ArgumentParser()
-    PARSER.add_argument("--input-file", "-i", required=True, type=str)
-    PARSER.add_argument("--output-file", "-o", required=True, type=str)
-    PARSER.add_argument("--treshold", "-t", type=int, default=1)
-    parsed_args, _ = PARSER.parse_known_args(args)
-
-    # print(parsed_args.input_file)
-
-    return ProgramArgs(parsed_args.input_file,parsed_args.output_file, parsed_args.treshold)  # type ignore
-
-
-def print_dbpedia(file: str, out: str):
-
-
-    FILE = open(file, "r", encoding="utf-8")
-    OUT = open(out, mode="w", encoding="utf-8")
-
-    DOMAIN_PART = "dbpedia"
-
-    already_parsed : set[str] = set()
-
-
-    for row in FILE:
-
-        sections = row.split("/")
-        sections = list(filter(lambda item: item != "", sections))
-
-        # print(sections)
-
-        if len(sections) < 3:
-            continue
-
-        URI = "/".join(sections[1:3])
-        URI = "//".join([sections[0], URI])
-
-        if  URI in already_parsed:
-            continue
-
-        DOMAIN = sections[1]
-        SUBDOMAINS = DOMAIN.split(".")
-        TYPE = sections[2][0]
-
-        if DOMAIN_PART not in SUBDOMAINS:
-            continue
-
-        already_parsed.add(URI)
-
-        SUB_ID = SUBDOMAINS[0]
-
-        if len(SUB_ID) > 3:
-            SUB_ID = SUB_ID[:3]
-
-        OUT.write(f"\"{URI}/\", \"{SUB_ID}-db{TYPE}\"\n")
-
-
-    FILE.close()
-    OUT.close()
-
-
-if __name__ == "__main__":
-    ARGS = get_args(sys.argv)
-    # ARGS = get_debug_args()
-    print_dbpedia(ARGS.file, ARGS.output)
--- a/Scripts/DataCleaning/path_splitter_tree.py
+++ b/Scripts/DataCleaning/path_splitter_tree.py
@ -1,162 +0,0 @@
-import argparse
-import csv
-import sys
-from typing import Self
-
-
-class ProgramArgs:
-
-    def __init__(self, file: str, csv_uri_header: str, output: str, treshold: int):
-        """
-        Args:
-            file (str): 
-            csv_header (str): The name of the column of the csv file from which the program will get the URIs
-            output (str): 
-            treshold (int): 
-        """        
-        self.file = file
-        self.csv_uri_header = csv_uri_header
-        self.output = output
-        self.treshold = treshold
-
-
-class Node:
-
-    def __init__(
-        self,
-        name: str,
-        quantity: int = 0,
-    ):
-        self.name = name
-        self.quantity = quantity
-        self.children: dict[str, Node] = {}
-
-    @property
-    def is_leaf(self):
-        return len(self.children) == 0
-
-    def append_child(self, child: list[str]):
-
-        # print(child)
-        KEY = child[0]
-
-        if not self.children.get(KEY):
-            # if the key has no value, it means we are traversing this branch for the first time
-            # create another node for the key
-            self.children[KEY] = Node(KEY, 0)
-
-        # take the node for the key
-        CHILD = self.children[KEY]
-        self.quantity += 1
-
-        # if the child list to enter has only one element, which is KEY, no more node will be created
-        if len(child) == 1:
-            return
-
-        new_children = child[1:]
-
-        CHILD.append_child(new_children)
-
-    def __str__(self):
-        return f"{self.name}/ - {self.quantity}"
-
-
-def get_args(args: list[str]) -> ProgramArgs:
-
-    PARSER = argparse.ArgumentParser()
-    PARSER.add_argument("--input-file", "-i", required=True, type=str)
-    PARSER.add_argument("--header-name", "-c", required=True, type=str)                       # c stands for column
-    PARSER.add_argument("--output-file", "-o", required=True, type=str)
-    PARSER.add_argument("--treshold", "-t", type=int, default=1)
-    parsed_args, _ = PARSER.parse_known_args(args)
-
-    # print(parsed_args.input_file)
-
-    return ProgramArgs(parsed_args.input_file, parsed_args.header_name ,parsed_args.output_file, parsed_args.treshold)  # type ignore
-
-
-def get_debug_args() -> ProgramArgs:
-    # -i ./Assets/Dataset/1-hop/movies.csv  -c subject -o Assets/Dataset/Tmp/prova.csv -t 1
-    FILE = "./Assets/Dataset/1-hop/movies.csv"
-    CSV_HEADER = "subject"
-    OUTPUT = "./Assets/Dataset/Tmp/prova.csv"
-    TRESHOLD = 1
-
-    return ProgramArgs(
-        FILE,
-        CSV_HEADER,
-        OUTPUT,
-        TRESHOLD
-    )
-
-
-def tree_like(file: str, csv_uri_header:str, out: str):
-
-    INDENTATION = "    "
-
-    properties: dict[str, Node] = {}
-
-    properties["pure"] = Node("pure", 0)
-    properties["URI"] = Node("uri", 0)
-
-    FILE = open(file, "r", encoding="utf-8")
-
-    # TODO: Change here so it takes single URI from a CSV file
-    # It is needed the header-name
-    for row in csv.DictReader(FILE):
-
-        uri_element = row[csv_uri_header]
-        sections = uri_element.split("/")
-        sections = list(filter(lambda item: item != "", sections))
-
-        # print(sections)
-
-        if sections[0] != "http:" and sections[0] != "https:":
-            properties["pure"].append_child(sections)
-            continue
-
-        properties["URI"].append_child(sections)
-
-    FILE.close()
-
-    stack: list[tuple[Node, int]] = []
-
-    for _, item in properties.items():
-        stack.append((item, 0))
-
-    OUT = open(out, mode="w", encoding="utf-8")
-
-    while len(stack) > 0:
-
-        LAST_ITEM = stack.pop()
-
-        NODE: Node = LAST_ITEM[0]
-        DEPTH: int = LAST_ITEM[1]
-
-        INDENT: str = INDENTATION * DEPTH
-
-        # Leaf node have quantity 0, so if i want them to appear the threshold have to be 0
-        # if NODE.quantity < ARGS.treshold:
-        if ARGS.treshold > NODE.quantity:
-            continue
-
-        OUT.write(f"{INDENT}- {NODE}\n")
-
-        if NODE.is_leaf:
-            continue
-
-        CHILDREN = []
-
-        for _, child in NODE.children.items():
-            CHILDREN.append((child, DEPTH + 1))
-
-        stack.extend(CHILDREN)
-
-    OUT.close()
-
-
-
-if __name__ == "__main__":
-    ARGS = get_args(sys.argv)
-    # ARGS = get_debug_args()
-    tree_like(ARGS.file,ARGS.csv_uri_header, ARGS.output)
--- a/Scripts/DataGathering/analysis.py
+++ b/Scripts/DataGathering/analysis.py
@ -1,53 +0,0 @@
-import argparse
-import sys
-import pandas as pd
-
-
-class ProgramArgs:
-
-    def __init__(
-        self, input_file: str, column: str, output_file: str, count: bool
-    ) -> None:
-        self.input_file = input_file
-        self.column = column
-        self.output_file = output_file
-        self.count = count
-
-
-def get_args(args: list[str]) -> ProgramArgs:
-
-    PARSER = argparse.ArgumentParser()
-    PARSER.add_argument("--input-file", "--input", "-i", required=True, type=str)
-    PARSER.add_argument("--output-file", "--output", "-o", required=True, type=str)
-    PARSER.add_argument("--column", "--col", required=True, type=str)
-    PARSER.add_argument(
-        "--count", "-c", action="store_const", const=True, default=False
-    )
-    parsed_args, _ = PARSER.parse_known_args(args)
-
-    return ProgramArgs(
-        parsed_args.input_file,
-        parsed_args.column,
-        parsed_args.output_file,
-        parsed_args.count,
-    )  # type ignore
-
-
-if __name__ == "__main__":
-    ARGS = get_args(sys.argv)
-
-    OUTPUT_FILE = open(ARGS.output_file, "w+", encoding="utf-8")
-
-    # Load the CSV
-    df = pd.read_csv(ARGS.input_file)
-
-    # Count occurrences of each unique last part
-    item_counts = df[ARGS.column].value_counts()
-
-    # Print the counts
-    for item, count in item_counts.items():
-
-        if ARGS.count:
-            OUTPUT_FILE.write(f"{item}: {count}\n")
-        else:
-            OUTPUT_FILE.write(f"{item}\n")
--- a/Scripts/DataGathering/fetchdata.py
+++ b/Scripts/DataGathering/fetchdata.py
@ -1,146 +0,0 @@
-import argparse
-from math import floor
-import sys
-from time import sleep
-import SPARQLWrapper
-
-
-class ProgramData:
-
-    def __init__(
-        self,
-        local_url,
-        query_url,
-        sparql_url,
-        output_type,
-        initial_offset,
-        timeout,
-        limit,
-        max_pages,
-        verbosity_level,
-    ) -> None:
-
-        self.local_url = local_url
-        self.query_url = query_url
-        self.sparql_url = sparql_url
-        self.output_type = output_type
-        self.initial_offset = initial_offset
-        self.timeout = timeout
-        self.limit = limit
-        self.max_pages = max_pages
-        self.verbosity_level = verbosity_level
-
-    @property
-    def offset(self):
-        return self.limit
-
-    @property
-    def query(self):
-
-        with open(self.query_url, "r") as file:
-            return file.read()
-
-
-DBPEDIA_URL = "https://dbpedia.org/sparql"
-TYPE = SPARQLWrapper.CSV
-TIMEOUT_SECONDS = 1.5
-LIMIT = int(1E4)
-INITIAL_OFFSET = 0
-MAX_PAGES = int(1E9)
-
-
-def gather_cli_args(args: list[str]) -> ProgramData:
-
-    # TODO: Add argument for type
-    PARSER = argparse.ArgumentParser("sparql data fetcher")
-    PARSER.add_argument("--file-path", "--file", "--output", "-o", required=True, type=str)
-    PARSER.add_argument("--query-file", "--query", "-q", required=True, type=str)
-    PARSER.add_argument("--url", type=str, default=DBPEDIA_URL)
-    PARSER.add_argument("--limit", type=int, default=LIMIT)
-    PARSER.add_argument("--timeout", type=float, default=TIMEOUT_SECONDS)
-    PARSER.add_argument("--offset", type=int, default=INITIAL_OFFSET)
-    PARSER.add_argument("--max-pages", type=int, default=MAX_PAGES)
-    PARSER.add_argument("--verbose", "-v", action="count", default=0)
-
-    parsed_args, _ = PARSER.parse_known_args(args)
-
-    return ProgramData(
-        parsed_args.file_path,
-        parsed_args.query_file,
-        parsed_args.url,
-        SPARQLWrapper.CSV,
-        parsed_args.offset,
-        parsed_args.timeout,
-        parsed_args.limit,
-        parsed_args.max_pages,
-        parsed_args.verbose
-    )
-    # type: ignore
-
-
-def fetch_data(DATA: ProgramData):
-
-    # Take correction of page into account
-    page = int(floor(DATA.initial_offset / DATA.limit)) - 1
-    exit = False
-
-    while not exit:
-
-        print(f"Starting to get page {page}")
-
-        CURRENT_OFFSET = int(DATA.offset + (page * DATA.limit))
-        sparql = SPARQLWrapper.SPARQLWrapper(DATA.sparql_url)
-
-        sparql.setReturnFormat(TYPE)
-
-        CURRENT_PAGE_QUERY = "\n".join([
-            DATA.query,
-            f"LIMIT {LIMIT}",
-            f"OFFSET {CURRENT_OFFSET}"
-        ])
-
-        print(f"\nCurrent Query:\n{CURRENT_PAGE_QUERY}\n")
-
-        sparql.setQuery(CURRENT_PAGE_QUERY)
-
-        try:
-            res = sparql.queryAndConvert()
-            text = ""
-
-            if type(res) == bytes:
-
-                initial_offset = 0
-
-                if page != 0:
-                    initial_offset = 1
-
-                lines = res.decode("utf-8", "ignore").split("\n")
-                text = "\n".join(lines[initial_offset:])
-
-            if text == "":
-                exit = True
-                continue
-
-            with open(DATA.local_url, "a+", encoding="utf-8") as dataset:
-
-                print(f"Writing page {page} on {DATA.local_url}")
-                dataset.write(
-                    text
-                )
-
-        except Exception as ex:
-            print(f"Something went wrong during page {page}:\n\t{ex}")
-
-        print(f"Sleeping for {TIMEOUT_SECONDS}")
-
-        page += 1
-
-        if page == MAX_PAGES - 1:
-            exit = True
-
-        sleep(TIMEOUT_SECONDS)
-
-
-if __name__ == "__main__":
-    DATA = gather_cli_args(sys.argv)
-    fetch_data(DATA)
--- a/Scripts/DataGathering/wikipedia_gathering.py
+++ b/Scripts/DataGathering/wikipedia_gathering.py
@ -1,154 +0,0 @@
-from pathlib import Path
-import pandas as pd
-
-import csv
-import time
-import requests
-
-input_csv = "./Assets/Dataset/1-hop/movie-pageid.csv"
-output_csv = "./Assets/Dataset/Tmp/wikipedia-summary.csv"
-
-
-sess = requests.Session()
-
-CHUNK = 20
-
-
-# Function to get clean full text from Wikipedia PageID
-def get_clean_text(pageIDS: list[str]):
-
-    parsing_time = 0
-    start_full = time.time()
-    API_URL = "https://en.wikipedia.org/w/api.php"
-    headers = {
-        "User-Agent": "CoolBot/0.0"
-        ""
-        " (https://example.org/coolbot/; coolbot@example.org)"
-    }
-
-    ids = "|".join(pageIDS)
-
-    start_fetch = time.time()
-    res = sess.get(headers=headers, url=f"{API_URL}?action=query&pageids={ids}&prop=extracts&exintro=1&explaintext=1&format=json")
-    end_fetch = time.time()
-    fetch_time = end_fetch - start_fetch
-    print(f"Time elapsed FETCH: {fetch_time} seconds")
-
-    data = res.json()
-
-
-    abstracts = {}
-    # Make sure 'query' and the page exist
-    SKIPPED = 0
-    if "query" in data and "pages" in data["query"]:
-        for pageID in pageIDS:
-            if pageID in data["query"]["pages"]:
-                page = data["query"]["pages"][pageID]
-                extract: str = page.get("extract")
-
-                if extract:
-                    print(f"Entry FOUND for pageID {pageID}")
-                    start_parse = time.time()
-                    extract = extract.strip()
-                    extract = extract.replace("\n", "")
-                    end_parse = time.time()
-                    parsing_time = end_parse - start_parse
-                    print(f"Time elapsed PARSE: {parsing_time} seconds")
-                    abstracts[pageID] = extract
-                else:
-                    SKIPPED += 1
-                    print(f"Entry MISSING for pageID {pageID}")
-            else:
-                SKIPPED += 1
-                print(f"Page MISSING for pageID {pageID}")
-
-    print(f"Chunk done - Skipped {SKIPPED}")
-    end_full = time.time()
-
-    print(f"Time elapsed FULL: {end_full - start_full} seconds\n\tNO PARSE: {(end_full - start_full) - parsing_time} seconds")
-    return abstracts
-
-
-def flush(movie_ids):
-
-
-        abstracts = get_clean_text(movie_ids)
-
-        start = time.time()
-        with open(output_csv, "a", newline="", encoding="utf-8") as f_out:
-            writer = csv.DictWriter(f_out, fieldnames=["subject", "text"])
-
-            for id, text in abstracts.items():
-                writer.writerow({"subject": id, "text": text})
-        end = time.time()
-
-        print(f"Time elapsed WRITE: {end - start} seconds")
-
-
-
-
-def reconcile() -> int:
-
-    start = time.time()
-    input_file = open(input_csv, "r", newline="", encoding="utf-8")
-    output_file = open(output_csv, "r", newline="", encoding="utf-8")
-
-    next(input_file)
-    LAST_CHECKED = output_file.readlines()[-1].split(",")[0]
-    current_check = input_file.readline().split(",")[1]
-
-    index = 1
-
-    while current_check != LAST_CHECKED:
-        current_check = input_file.readline().split(",")[1].replace("\n", "")
-        index += 1
-
-    input_file.close()
-    output_file.close()
-    end = time.time()
-
-
-    print(f"Time elapsed RECONCILE: {end - start} seconds")
-
-    print(f"FOUND, we need to skip {index} lines")
-
-    return index
-
-
-if not Path(output_csv).is_file():
-    # Initialize output CSV
-    with open(output_csv, "w", newline="", encoding="utf-8") as f_out:
-        writer = csv.DictWriter(f_out, fieldnames=["subject", "text"])
-        writer.writeheader()
-
-
-SKIP = reconcile()
-
-
-# Read CSV in RAM
-with open(input_csv, "r", newline="", encoding="utf-8") as input:
-
-    # Skip already done
-    for i in range(0, SKIP):
-        next(input)
-
-    reader = csv.reader(input)
-
-    index = -1
-    movie_ids = []
-
-    for line in reader:
-
-        index += 1
-
-        if index == 0:
-            continue
-
-        # Save movies in map
-        movie_ids.append(line[1])
-
-        if index % CHUNK == 0:
-
-            # Flush movies
-            flush(movie_ids)
-            movie_ids = []
--- a/Scripts/DatasetMerging/datasetInfo.md
+++ b/Scripts/DatasetMerging/datasetInfo.md
@ -1,26 +0,0 @@
-# HOW THE DATASET IS BUILT AND POPULATED
-
-Note: the data are taken from CSV files in 1-hop
-
-## CSV files composition
-
-| CSV files          | Original structure                    | Saved AS                            |
-|--------------------|---------------------------------------|-------------------------------------|
-| Wikipeda-summary   | PageId / abstract                     | subject, text                       |
-| Movies             | Movie URI                             | "subject"                           |
-| Dataset            | Movie URI / Relationship / Object [RDF] | subject, relationship, object       |
-| Movies-PageId      | Movie URI / PageId (wiki)             | "subject", "object"                 |
-| Reverse            | Subject / Relationship / Movie URI    | "subject", "relationship", "object" |
-
-## Wanted tables schema
-
-| Table         | Columns                                                                 |
-|---------------|-------------------------------------------------------------------------|
-| Movies        | MovieID [PK], Movie URI                                                 |
-| WikiPageIDs   | MovieID [PK, FK], PageId [IDX] (wiki) *(Not important for now)*         |
-| Abstracts     | MovieID [PK, FK], abstract                                              |
-| Subjects      | SubjectID [PK], RDF Subject (from Dataset.csv or Reverse.csv), OriginID [FK] |
-| Relationships | RelationshipID [PK], RDF Relationship (value only, not the actual relation) |
-| Objects       | ObjectID [PK], RDF Object, OriginID [FK]                                |
-| Origins       | OriginID [PK], Origin Name                                              |
-| RDFs          | RDF_ID [PK], MovieID [FK], SubjectID [FK], RelationshipID [FK], ObjectID [FK] |
--- a/Scripts/DatasetMerging/datawarehouse.py
+++ b/Scripts/DatasetMerging/datawarehouse.py
@ -1,633 +0,0 @@
-import sqlite3
-import csv
-
-#####################################################################
-#   This file builds DatawareHouse/dataset.db from 1-hop csv files  #
-#   Its Schema in . /SQL_Queries/db_creation.sql                    #
-#   The sql query used to popualate id in . /SQL_Queries/query.sql  #
-#####################################################################
-
-# sometimes you may need to build a new db file, here a little snippet for you
-# sqlite3 ./Assets/Dataset/Tmp/dataset.db < ./Scripts/DataCleaning/SQL_Queries/db_creation.sql
-
-# --- Global configuration ---
-DB_NAME = "./Assets/Dataset/DatawareHouse/dataset.db"
-MOVIES_CSV = "./Assets/Dataset/1-hop/movies.csv"
-PAGEID_CSV = "./Assets/Dataset/1-hop/movie-pageid.csv"
-SUMMARY_CSV = "./Assets/Dataset/1-hop/wikipedia-summary.csv"
-DATASET_CSV = "./Assets/Dataset/1-hop/dataset.csv"
-REVERSE_CSV = "./Assets/Dataset/1-hop/reverse.csv"
-URI_CSV = "./Assets/Dataset/1-hop/uri-abbreviations.csv"
-
-MOVIES_CSV_HANDLER = open(MOVIES_CSV, "r", newline="", encoding="utf-8")
-PAGEID_CSV_HANDLER = open(PAGEID_CSV, "r", newline="", encoding="utf-8")
-SUMMARY_CSV_HANDLER = open(SUMMARY_CSV, "r", newline="", encoding="utf-8")
-DATASET_CSV_HANDLER = open(DATASET_CSV, "r", newline="", encoding="utf-8")
-REVERSE_CSV_HANDLER = open(REVERSE_CSV, "r", newline="", encoding="utf-8")
-URI_ABBR_CSV_HANDLER = open(URI_CSV, "r", newline="", encoding="utf-8")
-
-
-CONN = sqlite3.connect(DB_NAME)
-CURS = CONN.cursor()
-
-# MARK: SQL Definitions
-# Insert MovieURI
-
-
-def insertOrigin(curs: sqlite3.Cursor) -> bool:
-
-    QUERY = "INSERT INTO  Origins (OriginName) VALUES ('dataset.csv'),('reverse.csv');"
-    try:
-        curs.execute(QUERY)
-        return True
-    except sqlite3.IntegrityError:
-        return False
-
-
-def selectOrigin(curs: sqlite3.Cursor, originName: str) -> int | None:
-
-    QUERY = "SELECT OriginID FROM Origins WHERE OriginName = ?;"
-
-    curs.execute(QUERY, [originName])
-    originId = curs.fetchone()
-    if not originId:
-        return None
-
-    # in this case the real id is the first element of the tuple
-    return originId[0]
-
-
-def insertMovie(curs: sqlite3.Cursor, movieUri: str) -> bool:
-
-    QUERY = "INSERT INTO Movies (MovieURI) VALUES (?);"
-    try:
-        curs.execute(QUERY, [movieUri])
-        return True
-    except sqlite3.IntegrityError:
-        return False
-
-
-def selectMovieId(curs: sqlite3.Cursor, movieUri: str) -> int | None:
-
-    QUERY = "SELECT MovieID FROM Movies WHERE MovieURI = ?;"
-
-    curs.execute(QUERY, [movieUri])
-    movieId = curs.fetchone()
-    if not movieId:
-        return None
-
-    # in this case the real id is the first element of the tuple
-    return movieId[0]
-
-
-def insertWikiPageId(curs: sqlite3.Cursor, movieId: int, pageId: int) -> bool:
-    QUERY = "INSERT INTO  WikiPageIDs (MovieID, PageID) VALUES (?,?);"
-    try:
-        curs.execute(QUERY, [movieId, pageId])
-        return True
-    except sqlite3.IntegrityError:
-        return False
-
-
-def selectMovieIdFromWikiPageId(curs: sqlite3.Cursor, pageId: int) -> int | None:
-
-    QUERY = "SELECT MovieID FROM WikiPageIDs WHERE PageID = ?;"
-
-    curs.execute(QUERY, [pageId])
-    movieId = curs.fetchone()
-    if not movieId:
-        return None
-
-    # in this case the real id is the first element of the tuple
-    return movieId[0]
-
-
-def insertWikiAbstract(curs: sqlite3.Cursor, movieId: int, abstract: str) -> bool:
-    QUERY = "INSERT INTO  WikipediaAbstracts (MovieID, Abstract) VALUES (?,?);"
-    try:
-        curs.execute(QUERY, [movieId, abstract])
-        return True
-    except sqlite3.IntegrityError:
-        return False
-
-
-def insertSubject(curs: sqlite3.Cursor, subjectURI: str, originID: int) -> bool:
-    QUERY = "INSERT INTO  Subjects (SubjectURI, OriginID) VALUES (?,?);"
-    try:
-        curs.execute(QUERY, [subjectURI, originID])
-        return True
-    except sqlite3.IntegrityError:
-        return False
-
-
-def insertRelationship(curs: sqlite3.Cursor, relationshipURI: str) -> bool:
-    QUERY = "INSERT INTO  Relationships (RelationshipURI) VALUES (?);"
-    try:
-        curs.execute(QUERY, [relationshipURI])
-        return True
-    except sqlite3.IntegrityError:
-        return False
-
-
-def insertObject(curs: sqlite3.Cursor, objectURI: str, originID: int) -> bool:
-    QUERY = "INSERT INTO  objects (ObjectURI, OriginID) VALUES (?,?);"
-    try:
-        curs.execute(QUERY, [objectURI, originID])
-        return True
-    except sqlite3.IntegrityError:
-        return False
-
-
-def selectSubjectId(curs: sqlite3.Cursor, subjectURI: str) -> int | None:
-
-    QUERY = "SELECT SubjectID FROM Subjects WHERE SubjectURI = ?;"
-
-    curs.execute(QUERY, [subjectURI])
-    subjectId = curs.fetchone()
-    if not subjectId:
-        return None
-
-    # in this case the real id is the first element of the tuple
-    return subjectId[0]
-
-
-def selectRelationshipId(curs: sqlite3.Cursor, relationshipURI: str) -> int | None:
-
-    QUERY = "SELECT RelationshipID FROM Relationships WHERE RelationshipURI = ?;"
-
-    curs.execute(QUERY, [relationshipURI])
-    relationshipId = curs.fetchone()
-    if not relationshipId:
-        return None
-
-    # in this case the real id is the first element of the tuple
-    return relationshipId[0]
-
-
-def selectObjectId(curs: sqlite3.Cursor, objectURI: str) -> int | None:
-
-    QUERY = "SELECT ObjectID FROM Objects WHERE ObjectURI = ?;"
-
-    curs.execute(QUERY, [objectURI])
-    objectId = curs.fetchone()
-    if not objectId:
-        return None
-
-    # in this case the real id is the first element of the tuple
-    return objectId[0]
-
-
-def insertRDF(
-    curs: sqlite3.Cursor,
-    movieId: int,
-    subjectId: int,
-    relationshipId: int,
-    objectId: int,
-) -> bool:
-    QUERY = "INSERT INTO  RDFs (MovieID, SubjectID, RelationshipID, ObjectID) VALUES (?,?,?,?);"
-    try:
-        curs.execute(QUERY, [movieId, subjectId, relationshipId, objectId])
-        return True
-    except sqlite3.IntegrityError:
-        return False
-
-# UGLY: correct method to add cursor
-def insert_abbreviation(uri, abbreviation) -> bool:
-    QUERY = "INSERT INTO Abbreviations(URI, Abbreviation) VALUES (?,?);"
-    try:
-        CURS.execute(QUERY, [uri, abbreviation])
-        return True
-    except sqlite3.IntegrityError:
-        return False
-
-# UGLY: correct method to add cursor
-def insert_object_abbreviation(object_id, abbreviation_id) -> bool:
-    QUERY = "INSERT INTO Objects_Abbreviations(ObjectID, AbbreviationID) VALUES (?,?);"
-    try:
-        CURS.execute(QUERY, [object_id, abbreviation_id])
-        return True
-    except sqlite3.IntegrityError:
-        return False
-
-# UGLY: correct method to add cursor
-def insert_relationship_abbreviation(relationship_id, abbreviation_id) -> bool:
-    QUERY = "INSERT INTO Relationships_Abbreviations(RelationshipID, AbbreviationID) VALUES (?,?);"
-    try:
-        CURS.execute(QUERY, [relationship_id, abbreviation_id])
-        return True
-    except sqlite3.IntegrityError:
-        return False
-
-# UGLY: correct method to add cursor
-def insert_subject_abbreviation(subject_id, abbreviation_id) -> bool:
-    QUERY = (
-        "INSERT INTO Subjects_Abbreviations(SubjectID, AbbreviationID) VALUES (?,?);"
-    )
-    try:
-        CURS.execute(QUERY, [subject_id, abbreviation_id])
-        return True
-    except sqlite3.IntegrityError:
-        return False
-
-# UGLY: correct method to add cursor
-def select_abbreviation_id(uri) -> int | None:
-    QUERY = "SELECT AbbreviationID FROM Abbreviations WHERE URI LIKE ?;"
-    CURS.execute(QUERY, [uri])
-    abbreviation_id = CURS.fetchone()
-    if not abbreviation_id:
-        return None
-
-    # in this case the real id is the first element of the tuple
-    return abbreviation_id[0]
-
-
-# MARK: Parsing
-def parseMovies():
-
-    CSV_READER = csv.reader(MOVIES_CSV_HANDLER)
-    next(CSV_READER)
-    for row in CSV_READER:
-        MOVIE = row[0]
-        insertMovie(CURS, MOVIE)
-
-
-def parseWikiPageId():
-    CSV_READER = csv.DictReader(PAGEID_CSV_HANDLER)
-    for row in CSV_READER:
-        MOVIE_URI = row["subject"]
-        WIKI_PAGE_ID = int(row["object"])
-        MOVIE_ID = selectMovieId(CURS, MOVIE_URI)
-
-        if MOVIE_ID is None:
-            print(f"The MovieUri: {MOVIE_URI} has not a MovieId ")
-            continue
-
-        insertWikiPageId(CURS, MOVIE_ID, WIKI_PAGE_ID)
-
-
-def parseAbstract():
-    CSV_READER = csv.DictReader(SUMMARY_CSV_HANDLER)
-    for row in CSV_READER:
-
-        WIKI_PAGE_ID = int(row["subject"])
-        ABSTRACT = row["text"]
-        MOVIE_ID = selectMovieIdFromWikiPageId(CURS, WIKI_PAGE_ID)
-
-        if MOVIE_ID is None:
-            print(f"The WikiPageId: {WIKI_PAGE_ID} has not a MovieId ")
-            continue
-
-        insertWikiAbstract(CURS, MOVIE_ID, ABSTRACT)
-
-
-def parseAbbreviations():
-    URI_CSV = csv.DictReader(URI_ABBR_CSV_HANDLER)
-    for row in URI_CSV:
-
-        URI = row["uri"]
-        ABBREVIATION = row["abbreviation"]
-
-        insert_abbreviation(URI, ABBREVIATION)
-
-
-def parseRDF_Reverse():
-
-    REVERSE_CSV_READER = csv.DictReader(REVERSE_CSV_HANDLER)
-    REVERSE_ORIGIN_ID = selectOrigin(CURS, "reverse.csv")
-
-    if REVERSE_ORIGIN_ID is None:
-        return
-
-    total = 0
-
-    for row in REVERSE_CSV_READER:
-        SUBJECT = row["subject"]
-        RELATIONSHIP = row["relationship"]
-        OBJECT = row["object"]
-        print(f"RDF triplets:\n\t{SUBJECT} - {RELATIONSHIP} - {OBJECT}")
-        insertSubject(CURS, SUBJECT, REVERSE_ORIGIN_ID)
-        insertRelationship(CURS, RELATIONSHIP)
-        insertObject(CURS, OBJECT, REVERSE_ORIGIN_ID)
-
-        SUBJECT_ID = selectSubjectId(CURS, SUBJECT)
-        OBJECT_ID = selectObjectId(CURS, OBJECT)
-        RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP)
-        MOVIE_ID = selectMovieId(CURS, OBJECT)
-
-        skip = False
-
-        # guard
-        if SUBJECT_ID is None:
-            print(f"No SubjectId for {SUBJECT}")
-            skip = True
-
-        if OBJECT_ID is None:
-            print(f"No ObjectId for {OBJECT}")
-            skip = True
-
-        if RELATIONSHIP_ID is None:
-            print(f"No RelationshipId for {RELATIONSHIP}")
-            skip = True
-
-        if MOVIE_ID is None:
-            print(f"No MovieId for {OBJECT}")
-            skip = True
-
-        if skip:
-            continue
-
-        if insertRDF(CURS, MOVIE_ID, SUBJECT_ID, RELATIONSHIP_ID, OBJECT_ID):  # type: ignore
-            total += 1
-
-    print(total)
-
-
-def parseRDF_Dataset():
-
-    DATASET_CSV_READER = csv.DictReader(DATASET_CSV_HANDLER)
-    DATASET_ORIGIN_ID = selectOrigin(CURS, "dataset.csv")
-
-    if DATASET_ORIGIN_ID is None:
-        return
-
-    total = 0
-    rdf_idx = 0
-    for row in DATASET_CSV_READER:
-
-        SUBJECT = row["subject"]
-        RELATIONSHIP = row["relationship"]
-        OBJECT = row["object"]
-
-        rdf_idx += 1
-
-        if rdf_idx % 100000 == 0:
-            print(f"RDF number {rdf_idx}:\n\t{SUBJECT} - {RELATIONSHIP} - {OBJECT}")
-
-        insertSubject(CURS, SUBJECT, DATASET_ORIGIN_ID)
-        insertRelationship(CURS, RELATIONSHIP)
-        insertObject(CURS, OBJECT, DATASET_ORIGIN_ID)
-
-        SUBJECT_ID = selectSubjectId(CURS, SUBJECT)
-        OBJECT_ID = selectObjectId(CURS, OBJECT)
-        RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP)
-        MOVIE_ID = selectMovieId(CURS, SUBJECT)
-
-        skip = False
-
-        # guard
-        if SUBJECT_ID is None:
-            print(f"No SubjectId for {SUBJECT}")
-            skip = True
-
-        if OBJECT_ID is None:
-            print(f"No ObjectId for {OBJECT}")
-            skip = True
-
-        if RELATIONSHIP_ID is None:
-            print(f"No RelationshipId for {RELATIONSHIP}")
-            skip = True
-
-        if MOVIE_ID is None:
-            print(f"No MovieId for {SUBJECT}")
-            skip = True
-
-        if skip:
-            continue
-
-        if insertRDF(CURS, MOVIE_ID, SUBJECT_ID, RELATIONSHIP_ID, OBJECT_ID):  # type: ignore
-            total += 1
-
-    print(total)
-
-
-def parseAbbr_Reverse():
-
-    REVERSE_CSV_READER = csv.DictReader(REVERSE_CSV_HANDLER)
-    REVERSE_ORIGIN_ID = selectOrigin(CURS, "reverse.csv")
-
-    if REVERSE_ORIGIN_ID is None:
-        return
-
-    total = 0
-
-    for row in REVERSE_CSV_READER:
-        SUBJECT = row["subject"]
-        RELATIONSHIP = row["relationship"]
-        OBJECT = row["object"]
-
-        SUBJECT_ID = selectSubjectId(CURS, SUBJECT)
-        OBJECT_ID = selectObjectId(CURS, OBJECT)
-        RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP)
-
-        SUB_SECTIONS = SUBJECT.split("/")
-        REL_SECTIONS = RELATIONSHIP.split("/")
-        OBJ_SECTIONS = OBJECT.split("/")
-
-        SUB_ABBR_ID = None
-        REL_ABBR_ID = None
-        OBJ_ABBR_ID = None
-
-        skip = False
-
-        # guard
-        if SUBJECT_ID is None:
-            print(f"No SubjectId for {SUBJECT}")
-            skip = True
-
-        if OBJECT_ID is None:
-            print(f"No ObjectId for {OBJECT}")
-            skip = True
-
-        if RELATIONSHIP_ID is None:
-            print(f"No RelationshipId for {RELATIONSHIP}")
-            skip = True
-
-
-        if skip:
-            continue
-
-        if len(SUB_SECTIONS) > 4:
-            index = min(len(SUB_SECTIONS), 7)
-            while index > 3:
-                PATH = "/".join(SUB_SECTIONS[0:index]) + "%"
-                SUB_ABBR_ID = select_abbreviation_id(PATH)
-
-                if SUB_ABBR_ID is not None:
-                    if insert_subject_abbreviation(SUBJECT_ID, SUB_ABBR_ID):
-                        total += 1
-                    index = 0
-                index -= 1
-
-        if len(REL_SECTIONS) > 4:
-            index = min(len(REL_SECTIONS), 7)
-            while index > 2:
-                PATH = "/".join(REL_SECTIONS[0:index]) + "%"
-                REL_ABBR_ID = select_abbreviation_id(PATH)
-
-
-                if REL_ABBR_ID is not None:
-                    if insert_relationship_abbreviation(RELATIONSHIP_ID, REL_ABBR_ID):
-                        total += 1
-                    index = 0
-                index -= 1
-
-        if len(OBJ_SECTIONS) > 4:
-            index = min(len(OBJ_SECTIONS), 7)
-            while index > 3:
-                PATH = "/".join(OBJ_SECTIONS[0:index]) + "%"
-                OBJ_ABBR_ID = select_abbreviation_id(PATH)
-
-                if OBJ_ABBR_ID is not None:
-                    if insert_object_abbreviation(OBJECT_ID, OBJ_ABBR_ID):
-                        total += 1
-                    index = 0
-                index -= 1
-
-    print(total)
-
-
-def parseAbbr_Dataset():
-
-    DATASET_CSV_READER = csv.DictReader(DATASET_CSV_HANDLER)
-    DATASET_ORIGIN_ID = selectOrigin(CURS, "dataset.csv")
-
-    if DATASET_ORIGIN_ID is None:
-        return
-
-    total = 0
-    rdf_idx = 0
-    for row in DATASET_CSV_READER:
-        SUBJECT = row["subject"]
-        RELATIONSHIP = row["relationship"]
-        OBJECT = row["object"]
-
-        rdf_idx += 1
-
-        if rdf_idx % 100000 == 0:
-            print(f"RDF number {rdf_idx}:\n\t{SUBJECT} - {RELATIONSHIP} - {OBJECT}")
-
-        SUBJECT_ID = selectSubjectId(CURS, SUBJECT)
-        OBJECT_ID = selectObjectId(CURS, OBJECT)
-        RELATIONSHIP_ID = selectRelationshipId(CURS, RELATIONSHIP)
-
-        SUB_SECTIONS = SUBJECT.split("/")
-        REL_SECTIONS = RELATIONSHIP.split("/")
-        OBJ_SECTIONS = OBJECT.split("/")
-
-        SUB_ABBR_ID = None
-        REL_ABBR_ID = None
-        OBJ_ABBR_ID = None
-
-        skip = False
-
-        # guard
-        if SUBJECT_ID is None:
-            print(f"No SubjectId for {SUBJECT}")
-            skip = True
-
-        if OBJECT_ID is None:
-            print(f"No ObjectId for {OBJECT}")
-            skip = True
-
-        if RELATIONSHIP_ID is None:
-            print(f"No RelationshipId for {RELATIONSHIP}")
-            skip = True
-
-
-        if skip:
-            continue
-
-        if len(SUB_SECTIONS) > 4:
-            index = min(len(SUB_SECTIONS), 7)
-            while index > 3:
-                PATH = "/".join(SUB_SECTIONS[0:index]) + "%"
-                SUB_ABBR_ID = select_abbreviation_id(PATH)
-
-                if SUB_ABBR_ID is not None:
-                    if insert_subject_abbreviation(SUBJECT_ID, SUB_ABBR_ID):
-                        total += 1
-                    index = 0
-                index -= 1
-
-        if len(REL_SECTIONS) > 4:
-            index = min(len(REL_SECTIONS), 7)
-            while index > 2:
-                PATH = "/".join(REL_SECTIONS[0:index]) + "%"
-                REL_ABBR_ID = select_abbreviation_id(PATH)
-
-
-                if REL_ABBR_ID is not None:
-                    if insert_relationship_abbreviation(RELATIONSHIP_ID, REL_ABBR_ID):
-                        total += 1
-                    index = 0
-                index -= 1
-
-        if len(OBJ_SECTIONS) > 4:
-            index = min(len(OBJ_SECTIONS), 7)
-            while index > 3:
-                PATH = "/".join(OBJ_SECTIONS[0:index]) + "%"
-                OBJ_ABBR_ID = select_abbreviation_id(PATH)
-
-                if OBJ_ABBR_ID is not None:
-                    if insert_object_abbreviation(OBJECT_ID, OBJ_ABBR_ID):
-                        total += 1
-                    index = 0
-                index -= 1
-
-    print(total)
-
-
-# MARK: Actual Code
-# parseMovies()
-# parseWikiPageId()
-# parseAbstract()
-# insertOrigin(CURS)
-# parseAbbreviations()
-# parseRDF_Reverse()
-# parseRDF_Dataset()
-# parseAbbr_Reverse()
-parseAbbr_Dataset()
-
-
-CONN.commit()
-CONN.close()
-
-
-MOVIES_CSV_HANDLER.close()
-PAGEID_CSV_HANDLER.close()
-SUMMARY_CSV_HANDLER.close()
-DATASET_CSV_HANDLER.close()
-REVERSE_CSV_HANDLER.close()
-URI_ABBR_CSV_HANDLER.close()
-
-
-"""
-The MovieUri: http://dbpedia.org/resource/1%25_(film) has not a MovieId
-The MovieUri: http://dbpedia.org/resource/10%25:_What_Makes_a_Hero%3F has not a MovieId
-The MovieUri: http://dbpedia.org/resource/100%25_Arabica has not a MovieId
-The MovieUri: http://dbpedia.org/resource/100%25_Kadhal has not a MovieId
-The MovieUri: http://dbpedia.org/resource/100%25_Love_(2011_film) has not a MovieId
-The MovieUri: http://dbpedia.org/resource/100%25_Love_(2012_film) has not a MovieId
-The MovieUri: http://dbpedia.org/resource/100%25_Wolf has not a MovieId
-The MovieUri: http://dbpedia.org/resource/Who_the_$&%25_Is_Jackson_Pollock%3F has not a MovieId
-The MovieUri: http://dbpedia.org/resource/99%25:_The_Occupy_Wall_Street_Collaborative_Film has not a MovieId
-The MovieUri: http://dbpedia.org/resource/99_and_44/100%25_Dead has not a MovieId
-The MovieUri: http://dbpedia.org/resource/Postcards_from_the_48%25 has not a MovieId
-The MovieUri: http://dbpedia.org/resource/Wool_100%25 has not a MovieId
-"""
-
-"""
-The WikiPageId: 10068850 has not a MovieId
-The WikiPageId: 55069615 has not a MovieId
-The WikiPageId: 49510056 has not a MovieId
-The WikiPageId: 4049786 has not a MovieId
-The WikiPageId: 55510238 has not a MovieId
-The WikiPageId: 31239628 has not a MovieId
-The WikiPageId: 34757217 has not a MovieId
-The WikiPageId: 64311757 has not a MovieId
-The WikiPageId: 8326198 has not a MovieId
-The WikiPageId: 42162164 has not a MovieId
-The WikiPageId: 18502369 has not a MovieId
-The WikiPageId: 58092358 has not a MovieId
-The WikiPageId: 40710250 has not a MovieId
-"""
--- a/Scripts/Experiments/.gitkeep
+++ b/Scripts/Experiments/.gitkeep
--- a/Scripts/Experiments/Queries/.gitkeep
+++ b/Scripts/Experiments/Queries/.gitkeep
--- a/Scripts/Experiments/Tmp/.gitkeep
+++ b/Scripts/Experiments/Tmp/.gitkeep
--- a/Scripts/Libs/CleaningPipeline/.gitkeep
+++ b/Scripts/Libs/CleaningPipeline/.gitkeep
--- a/Scripts/Libs/Utils/.gitkeep
+++ b/Scripts/Libs/Utils/.gitkeep
--- a/Scripts/UML/CleaningPipeline/classes.excalidraw.json
+++ b/Scripts/UML/CleaningPipeline/classes.excalidraw.json
@ -1,826 +0,0 @@
-{
-  "type": "excalidraw",
-  "version": 2,
-  "source": "https://marketplace.visualstudio.com/items?itemName=pomdtr.excalidraw-editor",
-  "elements": [
-    {
-      "type": "line",
-      "version": 4622,
-      "versionNonce": 1623045672,
-      "isDeleted": false,
-      "id": "twu_PiAvEuQ4l1YYtZLET",
-      "fillStyle": "solid",
-      "strokeWidth": 1,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 289.8504963515835,
-      "y": 91.87474806402287,
-      "strokeColor": "#000000",
-      "backgroundColor": "#a5d8ff",
-      "width": 77.09201683999922,
-      "height": 99.49948667804088,
-      "seed": 1975340120,
-      "groupIds": [
-        "9PT4BXPfQ6UoCaB-T-h9A",
-        "dp_TZJyYdyPIH1hOkAPlb"
-      ],
-      "strokeSharpness": "round",
-      "boundElementIds": [],
-      "startBinding": null,
-      "endBinding": null,
-      "lastCommittedPoint": null,
-      "startArrowhead": null,
-      "endArrowhead": null,
-      "points": [
-        [
-          0,
-          0
-        ],
-        [
-          0.2542098813493443,
-          75.20117273657175
-        ],
-        [
-          0.011896425679918422,
-          83.76249969444815
-        ],
-        [
-          3.970409367559332,
-          87.46174320643391
-        ],
-        [
-          17.75573317066317,
-          90.59250103325854
-        ],
-        [
-          41.05683533152865,
-          91.56737225214069
-        ],
-        [
-          63.319497586673116,
-          90.01084754868091
-        ],
-        [
-          75.14781395923075,
-          86.28844687220405
-        ],
-        [
-          76.81603792670788,
-          83.15042405259751
-        ],
-        [
-          77.05033394391478,
-          76.25776215104557
-        ],
-        [
-          76.86643881413028,
-          6.3089586511537865
-        ],
-        [
-          76.45188016352971,
-          -0.2999144698665015
-        ],
-        [
-          71.50179495549581,
-          -3.9936571317850627
-        ],
-        [
-          61.077971898861186,
-          -6.132877429442784
-        ],
-        [
-          37.32348754161154,
-          -7.932114425900202
-        ],
-        [
-          18.278415656797975,
-          -6.859225353587373
-        ],
-        [
-          3.2995959613238286,
-          -3.2201165291205287
-        ],
-        [
-          -0.04168289608444441,
-          -0.045185660461322996
-        ],
-        [
-          0,
-          0
-        ]
-      ],
-      "index": "a1",
-      "frameId": null,
-      "roundness": {
-        "type": 2
-      },
-      "boundElements": [],
-      "updated": 1758646548051,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "line",
-      "version": 2327,
-      "versionNonce": 1593094440,
-      "isDeleted": false,
-      "id": "hmJk4dH9VpOsfkrCTkhvh",
-      "fillStyle": "solid",
-      "strokeWidth": 1,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 290.3744257898585,
-      "y": 149.00103172175278,
-      "strokeColor": "#000000",
-      "backgroundColor": "#a5d8ff",
-      "width": 77.17198221193564,
-      "height": 8.562348957853036,
-      "seed": 637665624,
-      "groupIds": [
-        "9PT4BXPfQ6UoCaB-T-h9A",
-        "dp_TZJyYdyPIH1hOkAPlb"
-      ],
-      "strokeSharpness": "round",
-      "boundElementIds": [],
-      "startBinding": null,
-      "endBinding": null,
-      "lastCommittedPoint": null,
-      "startArrowhead": null,
-      "endArrowhead": null,
-      "points": [
-        [
-          0,
-          0
-        ],
-        [
-          2.033150371639873,
-          3.413095389435587
-        ],
-        [
-          10.801287372573954,
-          6.276651055277943
-        ],
-        [
-          22.468666942209353,
-          8.010803051612635
-        ],
-        [
-          40.747074201802775,
-          8.168828515515864
-        ],
-        [
-          62.077348233027564,
-          7.0647721921469495
-        ],
-        [
-          74.53446931782398,
-          3.04824021069218
-        ],
-        [
-          77.17198221193564,
-          -0.3935204423371723
-        ]
-      ],
-      "index": "a2",
-      "frameId": null,
-      "roundness": {
-        "type": 2
-      },
-      "boundElements": [],
-      "updated": 1758646548051,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "line",
-      "version": 2413,
-      "versionNonce": 311708712,
-      "isDeleted": false,
-      "id": "X1ldVIXm4DfBal5N2Pwn9",
-      "fillStyle": "solid",
-      "strokeWidth": 1,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 289.3425684673547,
-      "y": 120.03697638652972,
-      "strokeColor": "#000000",
-      "backgroundColor": "#a5d8ff",
-      "width": 77.17198221193564,
-      "height": 8.562348957853036,
-      "seed": 904402520,
-      "groupIds": [
-        "9PT4BXPfQ6UoCaB-T-h9A",
-        "dp_TZJyYdyPIH1hOkAPlb"
-      ],
-      "strokeSharpness": "round",
-      "boundElementIds": [],
-      "startBinding": null,
-      "endBinding": null,
-      "lastCommittedPoint": null,
-      "startArrowhead": null,
-      "endArrowhead": null,
-      "points": [
-        [
-          0,
-          0
-        ],
-        [
-          2.033150371639873,
-          3.413095389435587
-        ],
-        [
-          10.801287372573954,
-          6.276651055277943
-        ],
-        [
-          22.468666942209353,
-          8.010803051612635
-        ],
-        [
-          40.747074201802775,
-          8.168828515515864
-        ],
-        [
-          62.077348233027564,
-          7.0647721921469495
-        ],
-        [
-          74.53446931782398,
-          3.04824021069218
-        ],
-        [
-          77.17198221193564,
-          -0.3935204423371723
-        ]
-      ],
-      "index": "a3",
-      "frameId": null,
-      "roundness": {
-        "type": 2
-      },
-      "boundElements": [],
-      "updated": 1758646548051,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "ellipse",
-      "version": 5410,
-      "versionNonce": 92833576,
-      "isDeleted": false,
-      "id": "CFhp5ZxSVwHYzGUj4hEn1",
-      "fillStyle": "solid",
-      "strokeWidth": 1,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 288.28461948527263,
-      "y": 84.74247943834126,
-      "strokeColor": "#000000",
-      "backgroundColor": "#a5d8ff",
-      "width": 76.59753601865496,
-      "height": 15.49127539284798,
-      "seed": 1782811480,
-      "groupIds": [
-        "9PT4BXPfQ6UoCaB-T-h9A",
-        "dp_TZJyYdyPIH1hOkAPlb"
-      ],
-      "strokeSharpness": "sharp",
-      "boundElementIds": [
-        "bxuMGTzXLn7H-uBCptINx"
-      ],
-      "index": "a4",
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1758646548051,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "ellipse",
-      "version": 820,
-      "versionNonce": 608002600,
-      "isDeleted": false,
-      "id": "B43R7rWwK2_vdiRHBSSPk",
-      "fillStyle": "solid",
-      "strokeWidth": 1,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 324.77660659049513,
-      "y": 109.21914711824485,
-      "strokeColor": "#000000",
-      "backgroundColor": "#228be6",
-      "width": 11.226103154161754,
-      "height": 12.183758484455605,
-      "seed": 1298686040,
-      "groupIds": [
-        "9PT4BXPfQ6UoCaB-T-h9A",
-        "dp_TZJyYdyPIH1hOkAPlb"
-      ],
-      "strokeSharpness": "sharp",
-      "boundElementIds": [],
-      "index": "a5",
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1758646548051,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "ellipse",
-      "version": 1108,
-      "versionNonce": 1839127848,
-      "isDeleted": false,
-      "id": "CkKMb9wkJfVk04T217zSs",
-      "fillStyle": "solid",
-      "strokeWidth": 1,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 325.12774837442873,
-      "y": 135.43576140530996,
-      "strokeColor": "#000000",
-      "backgroundColor": "#228be6",
-      "width": 11.226103154161754,
-      "height": 12.183758484455605,
-      "seed": 2133497176,
-      "groupIds": [
-        "9PT4BXPfQ6UoCaB-T-h9A",
-        "dp_TZJyYdyPIH1hOkAPlb"
-      ],
-      "strokeSharpness": "sharp",
-      "boundElementIds": [],
-      "index": "a6",
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1758646548051,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "ellipse",
-      "version": 991,
-      "versionNonce": 588838952,
-      "isDeleted": false,
-      "id": "SHJdKeQPkfpvzSoNH--3o",
-      "fillStyle": "solid",
-      "strokeWidth": 1,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 6.239590202363168,
-      "x": 325.77660659049513,
-      "y": 164.20448797661635,
-      "strokeColor": "#000000",
-      "backgroundColor": "#228be6",
-      "width": 11.226103154161754,
-      "height": 12.183758484455605,
-      "seed": 81668696,
-      "groupIds": [
-        "9PT4BXPfQ6UoCaB-T-h9A",
-        "dp_TZJyYdyPIH1hOkAPlb"
-      ],
-      "strokeSharpness": "sharp",
-      "boundElementIds": [],
-      "index": "a7",
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1758646548051,
-      "link": null,
-      "locked": false
-    },
-    {
-      "type": "text",
-      "version": 489,
-      "versionNonce": 2023207720,
-      "isDeleted": false,
-      "id": "vUSyMBPup0jZ71CYXKyGb",
-      "fillStyle": "solid",
-      "strokeWidth": 1,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "angle": 0,
-      "x": 280.1846389770508,
-      "y": 185.79462957545917,
-      "strokeColor": "#000000",
-      "backgroundColor": "#a5d8ff",
-      "width": 95.63072204589844,
-      "height": 23.595161071904883,
-      "seed": 425140056,
-      "groupIds": [
-        "dp_TZJyYdyPIH1hOkAPlb"
-      ],
-      "strokeSharpness": "sharp",
-      "boundElementIds": [],
-      "fontSize": 17.4778970902999,
-      "fontFamily": 1,
-      "text": "dataset.db",
-      "baseline": 16.595161071904883,
-      "textAlign": "center",
-      "verticalAlign": "top",
-      "index": "a8",
-      "frameId": null,
-      "roundness": null,
-      "boundElements": [],
-      "updated": 1758646548051,
-      "link": null,
-      "locked": false,
-      "containerId": null,
-      "originalText": "dataset.db",
-      "autoResize": true,
-      "lineHeight": 1.350000000000001
-    },
-    {
-      "id": "R7pU0VP6CFKCAwuvt0xsr",
-      "type": "text",
-      "x": 295.5,
-      "y": 342,
-      "width": 374,
-      "height": 225,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#228be6",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "a9",
-      "roundness": null,
-      "seed": 705463336,
-      "version": 1130,
-      "versionNonce": 72522328,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1758648226024,
-      "link": null,
-      "locked": false,
-      "text": "class Extract(Action):\n    # Static\n    + type : ActionTypes = Extract\n    \n    # Properties\n    - db_connection: Path\n    - query: str\n    - query_parameters: [str]\n    - output_mapper: [str]",
-      "fontSize": 20,
-      "fontFamily": 8,
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "class Extract(Action):\n    # Static\n    + type : ActionTypes = Extract\n    \n    # Properties\n    - db_connection: Path\n    - query: str\n    - query_parameters: [str]\n    - output_mapper: [str]",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "G1xIRcJgm34_NMEWQFFlW",
-      "type": "text",
-      "x": 1419.5,
-      "y": 110,
-      "width": 253,
-      "height": 75,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#228be6",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "aA",
-      "roundness": null,
-      "seed": 651981400,
-      "version": 256,
-      "versionNonce": 138082856,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1758646570344,
-      "link": null,
-      "locked": false,
-      "text": "class Pipeline\n    - actions: [Action]\n    ",
-      "fontSize": 20,
-      "fontFamily": 8,
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "class Pipeline\n    - actions: [Action]\n    ",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "TBVy3JbJCkbA9kjVEJ8lv",
-      "type": "text",
-      "x": 694,
-      "y": 100,
-      "width": 495,
-      "height": 150,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#228be6",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "aB",
-      "roundness": null,
-      "seed": 680960040,
-      "version": 560,
-      "versionNonce": 85012520,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1758649442239,
-      "link": null,
-      "locked": false,
-      "text": "class Action\n    + type: ActionTypes\n    + name: str\n    + depends_on: [str]\n\n    + execute(mem) -> [Dict<str, any>] | Void",
-      "fontSize": 20,
-      "fontFamily": 8,
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "class Action\n    + type: ActionTypes\n    + name: str\n    + depends_on: [str]\n\n    + execute(mem) -> [Dict<str, any>] | Void",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "an7KRTzWpCytKNKgHftKC",
-      "type": "text",
-      "x": 1528.5,
-      "y": 365.5,
-      "width": 187,
-      "height": 150,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#228be6",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "aC",
-      "roundness": null,
-      "seed": 1974317656,
-      "version": 306,
-      "versionNonce": 1574962264,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1758648154009,
-      "link": null,
-      "locked": false,
-      "text": "enum ActionTypes:\n    + Extract\n    + Aggregate\n    + Filter\n    + Map\n    + Dump",
-      "fontSize": 20,
-      "fontFamily": 8,
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "enum ActionTypes:\n    + Extract\n    + Aggregate\n    + Filter\n    + Map\n    + Dump",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "2pQ5EULirrWs_QZPbClhh",
-      "type": "text",
-      "x": 785,
-      "y": 332.5,
-      "width": 418,
-      "height": 375,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#228be6",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "aH",
-      "roundness": null,
-      "seed": 1402251560,
-      "version": 742,
-      "versionNonce": 680432168,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1758649532881,
-      "link": null,
-      "locked": false,
-      "text": "class Aggregate(Action):\n    # Static\n    + type: ActionTypes = Aggregate\n\n    # Properties\n    - actionIDs: [str]\n    - associations: [Association]\n    - output_mapper: [str]\n\n    + execute(mem):\n        tables = mem.gather(actionIDs)\n\n        for join in association:\n            \n            ",
-      "fontSize": 20,
-      "fontFamily": 8,
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "class Aggregate(Action):\n    # Static\n    + type: ActionTypes = Aggregate\n\n    # Properties\n    - actionIDs: [str]\n    - associations: [Association]\n    - output_mapper: [str]\n\n    + execute(mem):\n        tables = mem.gather(actionIDs)\n\n        for join in association:\n            \n            ",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "O0fso8DJqFfwJEzmpUikM",
-      "type": "text",
-      "x": 1289,
-      "y": 195,
-      "width": 594,
-      "height": 100,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#228be6",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "aI",
-      "roundness": null,
-      "seed": 1582329944,
-      "version": 459,
-      "versionNonce": 1080077144,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1758647067031,
-      "link": null,
-      "locked": false,
-      "text": "input_mapper: \n    - key: ActionID (name) that produced such output\n    - value: list of strings that represent the values\n                to take",
-      "fontSize": 20,
-      "fontFamily": 8,
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "input_mapper: \n    - key: ActionID (name) that produced such output\n    - value: list of strings that represent the values\n                to take",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "v0kzO6vlBWOdJCV3yoG69",
-      "type": "text",
-      "x": 1379.5,
-      "y": 718.5,
-      "width": 286,
-      "height": 175,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#228be6",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "aL",
-      "roundness": null,
-      "seed": 1462407976,
-      "version": 635,
-      "versionNonce": 1012998696,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1758649495598,
-      "link": null,
-      "locked": false,
-      "text": "class Association:\n    - from_actionID: str\n    - from_key_name: str\n    - from_value_name: str\n    - to_actionID: str\n    - to_value_name: str\n    - type: Type",
-      "fontSize": 20,
-      "fontFamily": 8,
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "class Association:\n    - from_actionID: str\n    - from_key_name: str\n    - from_value_name: str\n    - to_actionID: str\n    - to_value_name: str\n    - type: Type",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "WK34n9xeVxntypCtrlK6p",
-      "type": "text",
-      "x": 256.5,
-      "y": 787.5,
-      "width": 517,
-      "height": 175,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#228be6",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "aM",
-      "roundness": null,
-      "seed": 1166526296,
-      "version": 318,
-      "versionNonce": 1042162520,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1758649002604,
-      "link": null,
-      "locked": false,
-      "text": "class Filter(Action):\n    # Static\n    + type: ActionTypes = Filter\n\n    # Properties\n    - compare: function(Dict<str, any>) -> bool\n    - output_mapper: [str]",
-      "fontSize": 20,
-      "fontFamily": 8,
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "class Filter(Action):\n    # Static\n    + type: ActionTypes = Filter\n\n    # Properties\n    - compare: function(Dict<str, any>) -> bool\n    - output_mapper: [str]",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "NY9jyUFLFFCNPE2sh00SX",
-      "type": "text",
-      "x": 1639,
-      "y": 606.5,
-      "width": 407,
-      "height": 200,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#228be6",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "aP",
-      "roundness": null,
-      "seed": 20345896,
-      "version": 168,
-      "versionNonce": 627282472,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1758649426380,
-      "link": null,
-      "locked": false,
-      "text": "class Map(Action):\n    # Static\n    + type: ActionTypes = Map\n\n    # Properties\n    - compare_mapper: [str]\n    - mapper: function(any...) -> any\n    - output_mapper: [str]",
-      "fontSize": 20,
-      "fontFamily": 8,
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "class Map(Action):\n    # Static\n    + type: ActionTypes = Map\n\n    # Properties\n    - compare_mapper: [str]\n    - mapper: function(any...) -> any\n    - output_mapper: [str]",
-      "autoResize": true,
-      "lineHeight": 1.25
-    },
-    {
-      "id": "SkhaoW-3TTKDZzEii3Lf6",
-      "type": "text",
-      "x": 1457.5,
-      "y": 955.5,
-      "width": 121,
-      "height": 50,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "#228be6",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "index": "aQ",
-      "roundness": null,
-      "seed": 2071523672,
-      "version": 37,
-      "versionNonce": 105260376,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1758648834435,
-      "link": null,
-      "locked": false,
-      "text": "class Dump:\n    -",
-      "fontSize": 20,
-      "fontFamily": 8,
-      "textAlign": "left",
-      "verticalAlign": "top",
-      "containerId": null,
-      "originalText": "class Dump:\n    -",
-      "autoResize": true,
-      "lineHeight": 1.25
-    }
-  ],
-  "appState": {
-    "gridSize": 20,
-    "gridStep": 5,
-    "gridModeEnabled": false,
-    "viewBackgroundColor": "#ffffff"
-  },
-  "files": {}
-}
--- a/docs/DBPEDIA.md
+++ b/docs/DBPEDIA.md
@ -1,215 +0,0 @@
-# DBPedia
-
-## GraphIRI
-
-This is the graph identifier (URI):
-
-`http://dbpedia.org`
-
-## History of queries
-
-```SQL
-PREFIX dbo:  <http://dbpedia.org/ontology/>
-PREFIX dbp:  <http://dbpedia.org/property/>
-PREFIX dbr:  <http://dbpedia.org/resource/>
-
-SELECT ?subject, ?relationship, ?object
-WHERE {
-  ?subject ?relationship ?object .
-  {
-    SELECT ?object
-    WHERE {
-      ?m rdf:type dbo:Film .
-      ?object ?r ?m
-    }
-  }
-}
-```
-
-### 2 Hops
-
-```SQL
-PREFIX dbo:  <http://dbpedia.org/ontology/>
-PREFIX dbp:  <http://dbpedia.org/property/>
-PREFIX dbr:  <http://dbpedia.org/resource/>
-
-SELECT ?subject, ?relationship, ?object
-WHERE {
-  ?subject ?relationship ?object .
-  FILTER (?relationship != <http://dbpedia.org/ontology/wikiPageWikiLink>)
-  {
-    SELECT ?object
-    WHERE {
-      ?m rdf:type dbo:Film .
-      ?object ?r ?m
-      FILTER (?r != <http://dbpedia.org/ontology/wikiPageWikiLink>)
-    }
-  }
-}
-LIMIT 1000000
-```
-
-### 1 Hop
-
-```SQL
-PREFIX dbo:  <http://dbpedia.org/ontology/>
-PREFIX dbp:  <http://dbpedia.org/property/>
-PREFIX dbr:  <http://dbpedia.org/resource/>
-
-SELECT ?subject, ?relationship, ?object
-WHERE {
-  ?subject ?relationship ?object .
-  ?object rdf:type dbo:Film .
-  FILTER (?relationship != <http://dbpedia.org/ontology/wikiPageWikiLink>)
-}
-LIMIT 1000000
-```
-
-```SQL
-PREFIX dbo:  <http://dbpedia.org/ontology/>
-PREFIX dbp:  <http://dbpedia.org/property/>
-PREFIX dbr:  <http://dbpedia.org/resource/>
-
-SELECT ?subject, ?relationship, ?object
-WHERE {
-  ?subject ?relationship ?object .
-  ?subject rdf:type dbo:Film .
-}
-```
-
-```SQL
-PREFIX dbo:  <http://dbpedia.org/ontology/>
-PREFIX dbp:  <http://dbpedia.org/property/>
-PREFIX dbr:  <http://dbpedia.org/resource/>
-PREFIX foaf: <http://xmlns.com/foaf/0.1/>
-
-SELECT ?subject, ?relationship, ?object
-WHERE {
-  ?subject ?relationship ?object .
-  ?subject rdf:type dbo:Film .
-  ?a foaf:primaryTopic ?subject
-}
-```
-
-```SQL
-PREFIX dbo:  <http://dbpedia.org/ontology/>
-PREFIX dbp:  <http://dbpedia.org/property/>
-PREFIX dbr:  <http://dbpedia.org/resource/>
-
-SELECT ?subject
-WHERE {
-  ?subject rdf:type dbo:Film .
-}
-```
-
-```SQL
-PREFIX dbo:  <http://dbpedia.org/ontology/>
-PREFIX dbp:  <http://dbpedia.org/property/>
-PREFIX dbr:  <http://dbpedia.org/resource/>
-PREFIX foaf: <http://xmlns.com/foaf/0.1/>
-
-SELECT ?subject
-WHERE {
-  ?subject rdf:type dbo:Film .
-  ?a foaf:primaryTopic ?subject
-}
-```
-
-```SQL
-PREFIX dbo:  <http://dbpedia.org/ontology/>
-PREFIX dbp:  <http://dbpedia.org/property/>
-PREFIX dbr:  <http://dbpedia.org/resource/>
-PREFIX foaf: <http://xmlns.com/foaf/0.1/>
-
-SELECT ?subject, ?relationship, ?object
-WHERE {
-  ?subject ?relationship ?object .
-  ?subject rdf:type dbo:Film .
-  ?a foaf:primaryTopic ?subject
-  FILTER (?relationship NOT IN (
-    dbo:wikiPageRedirects,
-    dbo:wikiPageExternalLink,
-    dbo:wikiPageWikiLink,
-    foaf:primaryTopic
-  ))
-}
-
-```
-
-#### Wikipedia-movie
-
-a.k.a the file with the wikipedia abstract
-
-```SQL
-PREFIX dbo:  <http://dbpedia.org/ontology/>
-PREFIX dbp:  <http://dbpedia.org/property/>
-PREFIX dbr:  <http://dbpedia.org/resource/>
-PREFIX foaf: <http://xmlns.com/foaf/0.1/>
-
-SELECT  ?subject , ?object
-WHERE {
-  ?subject foaf:primaryTopic ?object .
-  ?object rdf:type dbo:Film 
-}
-```
-
-#### Reverse
-
-```SQL
-PREFIX dbo:  <http://dbpedia.org/ontology/>
-PREFIX dbp:  <http://dbpedia.org/property/>
-PREFIX dbr:  <http://dbpedia.org/resource/>
-PREFIX foaf: <http://xmlns.com/foaf/0.1/>
-
-SELECT ?subject, ?relationship, ?object
-WHERE {
-  ?subject ?relationship ?object .
-  ?object rdf:type dbo:Film .
-  ?a foaf:primaryTopic ?object
-  FILTER (?relationship NOT IN (
-    dbo:wikiPageRedirects,
-    dbo:wikiPageExternalLink,
-    dbo:wikiPageWikiLink,
-    foaf:primaryTopic
-  ))
-}
-```
-
-```SQL
-PREFIX dbo:  <http://dbpedia.org/ontology/>
-PREFIX dbp:  <http://dbpedia.org/property/>
-PREFIX dbr:  <http://dbpedia.org/resource/>
-PREFIX foaf: <http://xmlns.com/foaf/0.1/>
-
-SELECT ?subject, ?relationship, ?object
-WHERE {
-  ?subject ?relationship ?object .
-  ?object rdf:type dbo:Film .
-  ?a foaf:primaryTopic ?object
-  FILTER (?relationship NOT IN (
-    dbo:wikiPageRedirects,
-    dbo:wikiPageExternalLink,
-    dbo:wikiPageWikiLink,
-    foaf:primaryTopic
-  ))
-
-```
-
-#### Film \ wiki page ID
-
-```SQL
-PREFIX dbo:  <http://dbpedia.org/ontology/>
-PREFIX dbp:  <http://dbpedia.org/property/>
-PREFIX dbr:  <http://dbpedia.org/resource/>
-PREFIX foaf: <http://xmlns.com/foaf/0.1/>
-PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
-
-SELECT ?subject ?pageID
-WHERE {
-  ?subject rdf:type dbo:Film .
-  ?subject dbo:wikiPageID ?pageID .
-  ?subject rdfs:label ?label .
-  FILTER (lang(?label) = "en")
-}
-
-```
--- a/docs/DEVELOPMENT.md
+++ b/docs/DEVELOPMENT.md
@ -1,3 +0,0 @@
-# Development
-
-## Data Gathering
--- a/docs/RESOURCES.md
+++ b/docs/RESOURCES.md
@ -1,108 +0,0 @@
-# Resources
-
-## Byte-Pair Encoding (BPE)
-
-### Overview
-
-Byte-Pair Encoding (BPE) is a simple but powerful text compression and tokenization algorithm.
-Originally introduced as a data compression method, it has been widely adopted in **Natural Language Processing (NLP)** to build subword vocabularies for models such as GPT and BERT.
-
---
-
-### Key Idea
-
-BPE works by iteratively replacing the most frequent pair of symbols (initially characters) with a new symbol.
-Over time, frequent character sequences (e.g., common morphemes, prefixes, suffixes) are merged into single tokens.
-
---
-
-### Algorithm Steps
-
-1. **Initialization**
-   - Treat each character of the input text as a token.
-
-2. **Find Frequent Pairs**
-   - Count all adjacent token pairs in the sequence.
-
-3. **Merge Most Frequent Pair**
-   - Replace the most frequent pair with a new symbol not used in the text.
-
-4. **Repeat**
-   - Continue until no frequent pairs remain or a desired vocabulary size is reached.
-
---
-
-### Example
-
-Suppose the data to be encoded is:
-
-```text
-aaabdaaabac
-```
-
-#### Step 1: Merge `"aa"`
-
-Most frequent pair: `"aa"` → replace with `"Z"`
-
-```text
-ZabdZabac
-Z = aa
-```
-
---
-
-#### Step 2: Merge `"ab"`
-
-Most frequent pair: `"ab"` → replace with `"Y"`
-
-```text
-ZYdZYac
-Y = ab
-Z = aa
-```
-
---
-
-#### Step 3: Merge `"ZY"`
-
-Most frequent pair: `"ZY"` → replace with `"X"`
-
-```text
-XdXac
-X = ZY
-Y = ab
-Z = aa
-```
-
---
-
-At this point, no pairs occur more than once, so the process stops.
-
---
-
-### Decompression
-
-To recover the original data, replacements are applied in **reverse order**:
-
-```text
-XdXac
-→ ZYdZYac
-→ ZabdZabac
-→ aaabdaaabac
-```
-
---
-
-### Advantages
-
- **Efficient vocabulary building**: reduces the need for massive word lists.
- **Handles rare words**: breaks them into meaningful subword units.
- **Balances character- and word-level tokenization**.
-
---
-
-### Limitations
-
- Does not consider linguistic meaning—merges are frequency-based.
- May create tokens that are not linguistically natural.
- Vocabulary is fixed after training.
--- a/docs/SPARQL.md
+++ b/docs/SPARQL.md
@ -1,67 +0,0 @@
-# SparQL
-
-> [!NOTE]
-> Resources taken from [this website](https://sparql.dev/)
-
-## SQL Queries
-
-### SELECT
-
-```SQL
-SELECT ?var1, ?var2, ...
-```
-
-### WHERE
-
-```SQL
-WHERE {
-    pattern1 .
-    pattern2 .
-    ...
-}
-```
-
-### FILTER
-
-It's used to restrict [`WHERE`](#where) clauses
-
-```SQL
-WHERE {
-  ?person <http://example.com/hasCar> ?car .
-  FILTER (?car = <http://example.com/Car1>)
-}
-```
-
-### OPTIONAL
-
-It's used to fetch available content if exists
-
-```SQL
-SELECT ?person ?car
-WHERE {
-  ?person <http://example.com/hasCar> ?car .
-  OPTIONAL {
-    ?car <http://example.com/hasColor> ?color .
-  }
-}
-```
-
-### LIMIT
-
-Limits results
-
-```SQL
-LIMIT 10 -- Take only 10 results
-```
-
-## SparQL functions
-
-### COUNT
-
-```SQL
-SELECT (COUNT(?person) AS ?count)
-WHERE {
-  ?person <http://example.com/hasCar> ?car .
-}
-```
-
--- a/environment.yaml
+++ b/environment.yaml
--- a/requirements.txt
+++ b/requirements.txt
@ -1,17 +0,0 @@
-certifi==2025.8.3
-charset-normalizer==3.4.3
-idna==3.10
-numpy==2.3.3
-pandas==2.3.2
-pyparsing==3.2.4
-python-dateutil==2.9.0.post0
-pytz==2025.2
-rdflib==7.1.4
-requests==2.32.5
-setuptools==78.1.1
-six==1.17.0
-SPARQLWrapper==2.0.0
-tzdata==2025.2
-urllib3==2.5.0
-wheel==0.45.1
-Wikipedia-API==0.8.1