diff --git a/Scripts/DataBaseQueries/dataset.sql b/Scripts/DataBaseQueries/dataset.sql index e4f141b..7a9e4cb 100644 --- a/Scripts/DataBaseQueries/dataset.sql +++ b/Scripts/DataBaseQueries/dataset.sql @@ -8,4 +8,21 @@ INNER JOIN Objects USING (ObjectID); -- To pass to Pandas for abstracts SELECT * FROM RDFs -INNER JOIN WikipediaAbstracts USING (MovieID); \ No newline at end of file +INNER JOIN WikipediaAbstracts USING (MovieID); + +-- To pass to Pandas for abbreviations +SELECT * +FROM Abbreviations; + +-- More complex to have clean dataset +SELECT MovieID, GROUP_CONCAT('' || '' || SubjectURI || '' || RelationshipURI || '' || ObjectURI || '', '') as RDF_String, Abstract +FROM RDFs +INNER JOIN RelationshipsCountInRDFs USING(RelationshipID) +INNER JOIN ParsedSubjects USING (SubjectID) +INNER JOIN ParsedRelationships USING (RelationshipID) +INNER JOIN ParsedObjects USING (ObjectID) +INNER JOIN WikipediaAbstracts USING (MovieID) + -- insert WHERE here +-- WHERE Rel_Count > 10 +-- AND MovieID IN (10, 100, 1000) +GROUP BY MovieID;