Added possibility to whitelist relationships
This commit is contained in:
parent
e9d30b3cea
commit
856c693650
@ -35,6 +35,9 @@ RELATIONSHIP_FILTER_LIST = [
|
|||||||
"dbp-dbp:website"
|
"dbp-dbp:website"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
RELATIONSHIP_WHITE_LIST = [
|
||||||
|
"dbp-dbp:director","dbp-dbo:starring", "dbp-dbo:writer", "dbp-dbp:name", "dbp-dbp:genre", "purl:dc/terms/subject"
|
||||||
|
]
|
||||||
"""
|
"""
|
||||||
SELECT DISTINCT field3
|
SELECT DISTINCT field3
|
||||||
FROM debug
|
FROM debug
|
||||||
@ -66,6 +69,7 @@ class Pipeline():
|
|||||||
def _get_cleaned_movie_rows(self):
|
def _get_cleaned_movie_rows(self):
|
||||||
movie_ids = self._movie_filter.get_movie_id()
|
movie_ids = self._movie_filter.get_movie_id()
|
||||||
rel_ids = self._relationship_filter.get_relationship_id()
|
rel_ids = self._relationship_filter.get_relationship_id()
|
||||||
|
# rel_ids = self._relationship_filter.get_relationship_id_from_white_list(RELATIONSHIP_WHITE_LIST)
|
||||||
|
|
||||||
for RDF in self._rdf_filter.yield_movie_abbreviated_rdfs(movie_ids,rel_ids):
|
for RDF in self._rdf_filter.yield_movie_abbreviated_rdfs(movie_ids,rel_ids):
|
||||||
RDF = self._pipeline.drop_na_from_dataset(RDF)
|
RDF = self._pipeline.drop_na_from_dataset(RDF)
|
||||||
@ -147,5 +151,5 @@ class Pipeline():
|
|||||||
pipe = Pipeline()
|
pipe = Pipeline()
|
||||||
#pipe.use_toy_dataset()
|
#pipe.use_toy_dataset()
|
||||||
pipe.other_filter()
|
pipe.other_filter()
|
||||||
pipe.execute_all_task()
|
# pipe.execute_all_task()
|
||||||
# pipe.generate_csv_debug_file("Assets/Dataset/Tmp/debug.csv")
|
pipe.generate_csv_debug_file("Assets/Dataset/Tmp/debug.csv")
|
||||||
@ -26,6 +26,19 @@ class RelationshipFilter:
|
|||||||
def get_relationship_id(self):
|
def get_relationship_id(self):
|
||||||
return self.RELATIONSHIP_FILTER
|
return self.RELATIONSHIP_FILTER
|
||||||
|
|
||||||
|
def get_relationship_id_from_white_list(self, relationship_list: list[str]):
|
||||||
|
ids_placeholder = ",".join(["?"] * len(self.RELATIONSHIP_FILTER))
|
||||||
|
uri_placeholder = ",".join(["?"] * len(relationship_list))
|
||||||
|
filter_query = f"""
|
||||||
|
SELECT RelationshipID
|
||||||
|
FROM ParsedRelationships
|
||||||
|
WHERE RelationshipID IN ({ids_placeholder})
|
||||||
|
AND RelationshipURI IN ({uri_placeholder});
|
||||||
|
"""
|
||||||
|
params = tuple(self.RELATIONSHIP_FILTER["RelationshipID"].to_list()) + tuple(relationship_list)
|
||||||
|
return self.sql_endpoint.get_dataframe_from_query(filter_query, params)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def delete_relationship_uri_by_list(self, filter_list: list[str]):
|
def delete_relationship_uri_by_list(self, filter_list: list[str]):
|
||||||
ids_placeholder = ",".join(["?"] * len(self.RELATIONSHIP_FILTER))
|
ids_placeholder = ",".join(["?"] * len(self.RELATIONSHIP_FILTER))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user