Added possibility to whitelist relationships
This commit is contained in:
parent
e9d30b3cea
commit
856c693650
@ -35,6 +35,9 @@ RELATIONSHIP_FILTER_LIST = [
|
||||
"dbp-dbp:website"
|
||||
]
|
||||
|
||||
RELATIONSHIP_WHITE_LIST = [
|
||||
"dbp-dbp:director","dbp-dbo:starring", "dbp-dbo:writer", "dbp-dbp:name", "dbp-dbp:genre", "purl:dc/terms/subject"
|
||||
]
|
||||
"""
|
||||
SELECT DISTINCT field3
|
||||
FROM debug
|
||||
@ -66,6 +69,7 @@ class Pipeline():
|
||||
def _get_cleaned_movie_rows(self):
|
||||
movie_ids = self._movie_filter.get_movie_id()
|
||||
rel_ids = self._relationship_filter.get_relationship_id()
|
||||
# rel_ids = self._relationship_filter.get_relationship_id_from_white_list(RELATIONSHIP_WHITE_LIST)
|
||||
|
||||
for RDF in self._rdf_filter.yield_movie_abbreviated_rdfs(movie_ids,rel_ids):
|
||||
RDF = self._pipeline.drop_na_from_dataset(RDF)
|
||||
@ -147,5 +151,5 @@ class Pipeline():
|
||||
pipe = Pipeline()
|
||||
#pipe.use_toy_dataset()
|
||||
pipe.other_filter()
|
||||
pipe.execute_all_task()
|
||||
# pipe.generate_csv_debug_file("Assets/Dataset/Tmp/debug.csv")
|
||||
# pipe.execute_all_task()
|
||||
pipe.generate_csv_debug_file("Assets/Dataset/Tmp/debug.csv")
|
||||
@ -26,6 +26,19 @@ class RelationshipFilter:
|
||||
def get_relationship_id(self):
|
||||
return self.RELATIONSHIP_FILTER
|
||||
|
||||
def get_relationship_id_from_white_list(self, relationship_list: list[str]):
|
||||
ids_placeholder = ",".join(["?"] * len(self.RELATIONSHIP_FILTER))
|
||||
uri_placeholder = ",".join(["?"] * len(relationship_list))
|
||||
filter_query = f"""
|
||||
SELECT RelationshipID
|
||||
FROM ParsedRelationships
|
||||
WHERE RelationshipID IN ({ids_placeholder})
|
||||
AND RelationshipURI IN ({uri_placeholder});
|
||||
"""
|
||||
params = tuple(self.RELATIONSHIP_FILTER["RelationshipID"].to_list()) + tuple(relationship_list)
|
||||
return self.sql_endpoint.get_dataframe_from_query(filter_query, params)
|
||||
|
||||
|
||||
|
||||
def delete_relationship_uri_by_list(self, filter_list: list[str]):
|
||||
ids_placeholder = ",".join(["?"] * len(self.RELATIONSHIP_FILTER))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user