Added Toy Dataset entry point into the Pipeline class
Before it was forced into the sql_endpoint, now all the pipeline can be managed in the Pipeline class
This commit is contained in:
@@ -18,8 +18,8 @@ class SqlEndpoint():
|
||||
# self.conn = self.sql_engine.connect().execution_options(stream_results=True)
|
||||
# it seems that sqlite doenst support streamer cursor
|
||||
# PRAGMA exeutes better in writing not reading
|
||||
self.chunk_size_row = chunk_size_row
|
||||
pass
|
||||
self.chunk_size_row = chunk_size_row # not used now, since each chunk is a movie
|
||||
self.movie_ids = movie_ids = pd.read_sql_query("SELECT MovieID FROM Movies;", self.sql_engine)["MovieID"]
|
||||
|
||||
def get_RDF(self) -> pd.DataFrame :
|
||||
|
||||
@@ -79,7 +79,7 @@ class SqlEndpoint():
|
||||
Pandas.DataFrame: [MovieID, SubjectURI, RelationshipURI, ObjectURI, Abstract]
|
||||
"""
|
||||
# chunk by movieId, abstract is the same and some intersting logic are appliable
|
||||
movie_ids = pd.read_sql_query("SELECT MovieID FROM Movies;", self.sql_engine)["MovieID"]
|
||||
# movie_ids = pd.read_sql_query("SELECT MovieID FROM Movies;", self.sql_engine)["MovieID"]
|
||||
# CHOOSEN MOVIE:
|
||||
# The Dark Knight : 117248
|
||||
# Inception : 147074
|
||||
@@ -91,8 +91,8 @@ class SqlEndpoint():
|
||||
# Django Unchained : 138952
|
||||
# Spirited Away : 144137
|
||||
# Knives Out : 148025
|
||||
movie_list = [117248, 147074, 113621, 1123, 117586, 90177, 71587, 138952, 144137, 148025]
|
||||
movie_ids = movie_list
|
||||
# movie_list = [117248, 147074, 113621, 1123, 117586, 90177, 71587, 138952, 144137, 148025]
|
||||
# movie_ids = movie_list
|
||||
|
||||
QUERY = """
|
||||
SELECT MovieID, SubjectURI, RelationshipURI, ObjectURI, Abstract
|
||||
@@ -104,7 +104,7 @@ class SqlEndpoint():
|
||||
WHERE MovieID = (?);
|
||||
"""
|
||||
|
||||
for movie_id in movie_ids:
|
||||
for movie_id in self.movie_ids:
|
||||
yield pd.read_sql_query(QUERY, self.sql_engine, params=(movie_id,))
|
||||
|
||||
def get_movies_id_count(self) -> pd.DataFrame:
|
||||
|
||||
Reference in New Issue
Block a user