Movie filters updated
This commit is contained in:
parent
7307916891
commit
0373460105
@ -45,5 +45,44 @@ class MovieFilter:
|
||||
self.MOVIE_FILTER = self.sql_endpoint.get_dataframe_from_query(filter_query, params)
|
||||
|
||||
|
||||
def filter_by_director(self):
|
||||
director_list = ['dbp-dbo:director','dbp-dbp:director']
|
||||
|
||||
movie_ids = self.MOVIE_FILTER["MovieID"].to_list()
|
||||
movie_list_placeholder = ",".join(["?"] * len(movie_ids))
|
||||
|
||||
filter_query = f"""
|
||||
SELECT DISTINCT RDFs.MovieID
|
||||
FROM RDFs
|
||||
JOIN ParsedRelationships USING (RelationshipID)
|
||||
WHERE RDFs.MovieID IN ({movie_list_placeholder})
|
||||
AND ParsedRelationships.RelationshipURI IN {tuple(director_list)};
|
||||
"""
|
||||
|
||||
params = tuple(movie_ids)
|
||||
self.MOVIE_FILTER = self.sql_endpoint.get_dataframe_from_query(filter_query, params)
|
||||
|
||||
|
||||
def filter_by_english_movies(self):
|
||||
movie_ids = self.MOVIE_FILTER["MovieID"].to_list()
|
||||
movie_list_placeholder = ",".join(["?"] * len(movie_ids))
|
||||
|
||||
relationship = ["<PRED>dbp-dbp:language"]
|
||||
objects_list = ["<OBJ>English", "<OBJ>dbp-dbr:English_language"]
|
||||
|
||||
filter_query = f"""
|
||||
SELECT DISTINCT RDFs.MovieID
|
||||
FROM RDFs
|
||||
INNER JOIN ParsedRelationships USING (RelationshipID)
|
||||
INNER JOIN ParsedObjects USING (ObjectID)
|
||||
WHERE RDFs.MovieID IN ({movie_list_placeholder})
|
||||
AND ParsedRelationships.RelationshipURI IN {tuple(relationship)}
|
||||
AND ParsedObjects.ObjectURI in {tuple(objects_list)};
|
||||
"""
|
||||
params = tuple(movie_ids)
|
||||
self.MOVIE_FILTER = self.sql_endpoint.get_dataframe_from_query(filter_query, params)
|
||||
|
||||
|
||||
|
||||
# movie_filter = MovieFilter()
|
||||
# movie_filter.frequency_filter(5,10)
|
||||
@ -40,7 +40,8 @@ class Pipeline():
|
||||
|
||||
def other_filter(self):
|
||||
self._movie_filter.relation_filter("purl:dc/terms/subject",5,100)
|
||||
self._movie_filter.relation_filter("dbp-dbo:director",1,100)
|
||||
self._movie_filter.filter_by_director()
|
||||
# self._movie_filter.relation_filter("dbp-dbo:director",1,100)
|
||||
|
||||
def _get_cleaned_movie_rows(self):
|
||||
movie_ids = self._movie_filter.get_movie_id()
|
||||
@ -110,7 +111,7 @@ class Pipeline():
|
||||
# Django Unchained : 138952
|
||||
# Spirited Away : 144137
|
||||
# Knives Out : 148025
|
||||
movie_list = [117248, 147074, 113621, 1123, 117586, 90177, 71587, 138952, 144137, 148025]
|
||||
movie_list = [106465,106466,106467,106468,106469,106470,106471,106472,106473]#[117248, 147074, 113621, 1123, 117586, 90177, 71587, 138952, 144137, 148025]
|
||||
self._movie_filter.MOVIE_FILTER = pd.DataFrame({"MovieID": movie_list})
|
||||
|
||||
def generate_csv_debug_file(self, debug_path:str):
|
||||
@ -123,7 +124,7 @@ class Pipeline():
|
||||
|
||||
|
||||
pipe = Pipeline()
|
||||
pipe.use_toy_dataset()
|
||||
# pipe.use_toy_dataset()
|
||||
pipe.other_filter()
|
||||
# pipe.execute_all_task()
|
||||
pipe.generate_csv_debug_file("Assets/Dataset/Tmp/debug.csv")
|
||||
Loading…
x
Reference in New Issue
Block a user