Added file to execute the complete cleaning pipeline
This commit is contained in:
26
Scripts/DataCleaning/data_output_models/rdf_text_tasks.py
Normal file
26
Scripts/DataCleaning/data_output_models/rdf_text_tasks.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import pandas as pd
|
||||
|
||||
class RDF_text_task_dataset():
|
||||
"""
|
||||
Write the CSV for the firsts two tasks, which are "Generating structured RDF triples from natural language text" and reverse.
|
||||
In the CVS the RDFs will be saved toghether as a string.
|
||||
CSV Composition: ["MovieID","RDFs","Abstract"]
|
||||
"""
|
||||
def __init__(self, output_path:str):
|
||||
|
||||
|
||||
self.output = open(output_path, "w")
|
||||
# then the first row as header
|
||||
header = ["MovieID","RDFs","Abstract"]
|
||||
self.output.write(",".join(header) + "\n")
|
||||
|
||||
def close(self):
|
||||
self.output.close()
|
||||
|
||||
def write(self, RDF: pd.DataFrame):
|
||||
"""
|
||||
Args:
|
||||
RDF (pd.DataFrame): ["MovieID","Triple","Abstract"]
|
||||
"""
|
||||
|
||||
RDF.to_csv(self.output, index=False, header=False)
|
||||
Reference in New Issue
Block a user