new utility to generate a csv debug file of the output of the pipeline
This commit is contained in:
parent
64e355e80c
commit
69fba7c3e9
21
Scripts/DataCleaning/data_output_models/debug_csv.py
Normal file
21
Scripts/DataCleaning/data_output_models/debug_csv.py
Normal file
@ -0,0 +1,21 @@
|
||||
import pandas as pd
|
||||
|
||||
class Debug_csv():
|
||||
def __init__(self, output_path:str):
|
||||
|
||||
|
||||
self.output = open(output_path, "w")
|
||||
# then the first row as header
|
||||
header = ["MovieURI","SubjectURI","RelationshipURI","ObjectURI","Abstract"]
|
||||
self.output.write(",".join(header) + "\n")
|
||||
|
||||
def close(self):
|
||||
self.output.close()
|
||||
|
||||
def write(self, RDF: pd.DataFrame):
|
||||
"""
|
||||
Args:
|
||||
RDF (pd.DataFrame): ["MovieURI","SubjectURI","RelationshipURI","ObjectURI","Abstract"]
|
||||
"""
|
||||
|
||||
RDF.to_csv(self.output, index=False, header=False)
|
||||
@ -6,6 +6,7 @@ from Scripts.DataCleaning.data_output_models.rdf_mask_task import RDF_mask_task_
|
||||
from Scripts.DataCleaning.data_output_models.bpe_corpus import BPE_corpus
|
||||
from Scripts.DataCleaning.data_output_models.rdf_text_tasks import RDF_text_task_dataset
|
||||
from Scripts.DataCleaning.data_output_models.rdf_completation_task import RDF_completation_task_dataset
|
||||
from Scripts.DataCleaning.data_output_models.debug_csv import Debug_csv
|
||||
|
||||
import pandas as pd
|
||||
|
||||
@ -115,6 +116,13 @@ class Pipeline():
|
||||
movie_list = [117248, 147074, 113621, 1123, 117586, 90177, 71587, 138952, 144137, 148025]
|
||||
self.sql_endpoint.movie_ids = movie_list
|
||||
|
||||
def generate_csv_debug_file(self, debug_path:str):
|
||||
debug_csv = Debug_csv(debug_path)
|
||||
|
||||
for RDF in self._get_cleaned_movie_rows():
|
||||
debug_csv.write(RDF)
|
||||
|
||||
debug_csv.close()
|
||||
|
||||
|
||||
# there are a lot of settings to manage
|
||||
@ -125,9 +133,10 @@ class Pipeline():
|
||||
|
||||
pipeline = Pipeline()
|
||||
|
||||
# pipeline.use_toy_dataset()
|
||||
pipeline.use_toy_dataset()
|
||||
# pipeline.execute_task_bpe_corpus()
|
||||
# pipeline.execute_task_rdf_mask()
|
||||
# pipeline.execute_tasks_rdf_text()
|
||||
# pipeline.execute_task_rdf_completation()
|
||||
pipeline.execute_all_task()
|
||||
# pipeline.execute_all_task()
|
||||
pipeline.generate_csv_debug_file("Assets/Dataset/Tmp/debug.csv")
|
||||
Loading…
x
Reference in New Issue
Block a user