changes to shorten the dataset

This commit is contained in:
GassiGiuseppe 2025-10-07 15:49:25 +02:00
parent a93e61b8c1
commit a04f4c7cb7

View File

@ -60,7 +60,8 @@ class Pipeline():
self._movie_filter.relation_filter("purl:dc/terms/subject",5,100)
self._movie_filter.filter_by_director()
self._movie_filter.filter_by_english_movies()
self._movie_filter.relation_filter("dbp-dbp:budget",1,100) # the most important film have relationship budget
self._movie_filter.relation_filter("dbp-dbp:budget",1,100) # the most important film have relationship budget
self._movie_filter.relation_filter("dbp-dbp:released",1,100) # to cut to 2000 :(
def _get_cleaned_movie_rows(self):
movie_ids = self._movie_filter.get_movie_id()