Added file to execute the complete cleaning pipeline

This commit is contained in:
GassiGiuseppe
2025-09-29 15:21:26 +02:00
parent 6ddb7de9da
commit bd72ad3571
9 changed files with 596 additions and 0 deletions

View File

@@ -0,0 +1,26 @@
import pandas as pd
class RDF_completation_task_dataset():
"""
Write the CSV for the fourth task, which is "Predicting subsequent triples based on a given context".
Each RDF is saved as str
CSV Composition: ["MovieID","RDF"]
"""
def __init__(self, output_path:str):
self.output = open(output_path, "w")
# then the first row as header
header = ["MovieID","RDF"]
self.output.write(",".join(header) + "\n")
def close(self):
self.output.close()
def write(self, RDF: pd.DataFrame):
"""
Args:
RDF (pd.DataFrame): ["MovieID","RDF"]
"""
RDF.to_csv(self.output, index=False, header=False)