diff --git a/Scripts/DataCleaning/pipeline/pipeline.py b/Scripts/DataCleaning/pipeline/pipeline.py index d523897..d350497 100644 --- a/Scripts/DataCleaning/pipeline/pipeline.py +++ b/Scripts/DataCleaning/pipeline/pipeline.py @@ -60,7 +60,8 @@ class Pipeline(): self._movie_filter.relation_filter("purl:dc/terms/subject",5,100) self._movie_filter.filter_by_director() self._movie_filter.filter_by_english_movies() - self._movie_filter.relation_filter("dbp-dbp:budget",1,100) # the most important film have relationship budget + self._movie_filter.relation_filter("dbp-dbp:budget",1,100) # the most important film have relationship budget + self._movie_filter.relation_filter("dbp-dbp:released",1,100) # to cut to 2000 :( def _get_cleaned_movie_rows(self): movie_ids = self._movie_filter.get_movie_id()