Added possibility to whitelist relationships

This commit is contained in:
GassiGiuseppe
2025-10-12 12:26:26 +02:00
parent e9d30b3cea
commit 856c693650
2 changed files with 19 additions and 2 deletions

View File

@@ -35,6 +35,9 @@ RELATIONSHIP_FILTER_LIST = [
"dbp-dbp:website"
]
RELATIONSHIP_WHITE_LIST = [
"dbp-dbp:director","dbp-dbo:starring", "dbp-dbo:writer", "dbp-dbp:name", "dbp-dbp:genre", "purl:dc/terms/subject"
]
"""
SELECT DISTINCT field3
FROM debug
@@ -66,6 +69,7 @@ class Pipeline():
def _get_cleaned_movie_rows(self):
movie_ids = self._movie_filter.get_movie_id()
rel_ids = self._relationship_filter.get_relationship_id()
# rel_ids = self._relationship_filter.get_relationship_id_from_white_list(RELATIONSHIP_WHITE_LIST)
for RDF in self._rdf_filter.yield_movie_abbreviated_rdfs(movie_ids,rel_ids):
RDF = self._pipeline.drop_na_from_dataset(RDF)
@@ -147,5 +151,5 @@ class Pipeline():
pipe = Pipeline()
#pipe.use_toy_dataset()
pipe.other_filter()
pipe.execute_all_task()
# pipe.generate_csv_debug_file("Assets/Dataset/Tmp/debug.csv")
# pipe.execute_all_task()
pipe.generate_csv_debug_file("Assets/Dataset/Tmp/debug.csv")