Added file to execute the complete cleaning pipeline

2025-09-29 15:21:26 +02:00
parent 6ddb7de9da
commit bd72ad3571
9 changed files with 596 additions and 0 deletions
--- a/Scripts/DataCleaning/data_output_models/rdf_text_tasks.py
+++ b/Scripts/DataCleaning/data_output_models/rdf_text_tasks.py
@@ -0,0 +1,26 @@
+import pandas as pd
+
+class RDF_text_task_dataset():
+    """
+        Write the CSV for the firsts two tasks, which are "Generating structured RDF triples from natural language text" and reverse.
+        In the CVS the RDFs will be saved toghether as a string.
+        CSV Composition: ["MovieID","RDFs","Abstract"]
+    """
+    def __init__(self, output_path:str):
+     
+
+        self.output =  open(output_path, "w")
+        # then the first row as header
+        header = ["MovieID","RDFs","Abstract"]
+        self.output.write(",".join(header) + "\n")
+
+    def close(self):
+        self.output.close()
+
+    def write(self, RDF: pd.DataFrame):
+        """
+        Args:
+            RDF (pd.DataFrame): ["MovieID","Triple","Abstract"]
+        """        
+
+        RDF.to_csv(self.output, index=False, header=False)