diff --git a/Scripts/DataCleaning/dbpedia-uri.py b/Scripts/DataCleaning/dbpedia-uri.py
new file mode 100644
index 0000000..ceafe87
--- /dev/null
+++ b/Scripts/DataCleaning/dbpedia-uri.py
@@ -0,0 +1,77 @@
+import argparse
+import csv
+import sys
+from typing import Self
+
+
+class ProgramArgs:
+
+    def __init__(self, file: str, output: str, treshold: int):
+        self.file = file
+        self.output = output
+        self.treshold = treshold
+
+def get_args(args: list[str]) -> ProgramArgs:
+
+    PARSER = argparse.ArgumentParser()
+    PARSER.add_argument("--input-file", "-i", required=True, type=str)
+    PARSER.add_argument("--output-file", "-o", required=True, type=str)
+    PARSER.add_argument("--treshold", "-t", type=int, default=1)
+    parsed_args, _ = PARSER.parse_known_args(args)
+
+    # print(parsed_args.input_file)
+
+    return ProgramArgs(parsed_args.input_file,parsed_args.output_file, parsed_args.treshold)  # type ignore
+
+
+def print_dbpedia(file: str, out: str):
+
+
+    FILE = open(file, "r", encoding="utf-8")
+    OUT = open(out, mode="w", encoding="utf-8")
+
+    DOMAIN_PART = "dbpedia"
+
+    already_parsed : set[str] = set()
+
+
+    for row in FILE:
+
+        sections = row.split("/")
+        sections = list(filter(lambda item: item != "", sections))
+
+        # print(sections)
+
+        if len(sections) < 3:
+            continue
+
+        URI = "/".join(sections[:3])
+
+        if  URI in already_parsed:
+            continue
+
+        DOMAIN = sections[1]
+        SUBDOMAINS = DOMAIN.split(".")
+        TYPE = sections[2][0]
+
+        if DOMAIN_PART not in SUBDOMAINS:
+            continue
+
+        already_parsed.add(URI)
+
+        SUB_ID = SUBDOMAINS[0]
+
+        if len(SUB_ID) > 3:
+            SUB_ID = SUB_ID[:3]
+
+        OUT.write(f"\"{URI}\", \"{SUB_ID}-db{TYPE}\"\n")
+
+
+    FILE.close()
+    OUT.close()
+
+
+if __name__ == "__main__":
+    ARGS = get_args(sys.argv)
+    # ARGS = get_debug_args()
+    print_dbpedia(ARGS.file, ARGS.output)