From 3a6dca0681f4b07ff71a1e1a4bda3f8d446262b1 Mon Sep 17 00:00:00 2001 From: GassiGiuseppe Date: Mon, 22 Sep 2025 17:39:44 +0200 Subject: [PATCH] Infos about Dataset contruction from csv moved from python file to markdown --- Scripts/DatasetMerging/DBMerger.py | 45 --------------------------- Scripts/DatasetMerging/datasetInfo.md | 26 ++++++++++++++++ 2 files changed, 26 insertions(+), 45 deletions(-) delete mode 100644 Scripts/DatasetMerging/DBMerger.py create mode 100644 Scripts/DatasetMerging/datasetInfo.md diff --git a/Scripts/DatasetMerging/DBMerger.py b/Scripts/DatasetMerging/DBMerger.py deleted file mode 100644 index 4ad3989..0000000 --- a/Scripts/DatasetMerging/DBMerger.py +++ /dev/null @@ -1,45 +0,0 @@ -""" -What we have now: Saved AS: - -Wikipeda-summary : PageId / abstract subject,text -Movies : Movie URI "subject" -Dataset : Movie URI / Relationship / Object [RDF] subject,relationship,object -Movies-PageId : Movie URI / PageId (wiki) "subject", "object" -Reverse : Subject / Relationship / Movie URI "subject","relationship","object" - -What we want: -( we will generate MovieID) -Movies : MovieID [PK] / Movie URI -WikiPageIDs : MovieID [PK, FK]/ PageId [IDX] (wiki) (Not important for now) -Abstracts : MovieID [PK, FK]/ abstract -Subjects : SubjectID [PK] / RDF Subject ( both from either Dataset.csv or Reverse.csv) / OriginID [FK] -Relationships : RelationshipID [PK]/ RDF Relationship (not the actual relationshi but the value) -Objects : ObjectID [PK]/ RDF Object / OriginID [FK] -Origins : OriginID [PK]/ Origin Name -RDFs : RDF_ID[PK] / MovieID [FK] / SubjectID [FK]/ RelationshipID [FK]/ ObjectID [FK] - -What we will build for the model - -we need RDF list for each movie together with abstract - -: MovieID / RDF_set / abstrct - -""" - -import sqlite3 - -# Create a SQL connection to our SQLite database -con = sqlite3.connect("data/portal_mammals.sqlite") - -cur = con.cursor() - -# Return all results of query -cur.execute('SELECT plot_id FROM plots WHERE plot_type="Control"') -cur.fetchall() - -# Return first result of query -cur.execute('SELECT species FROM species WHERE taxa="Bird"') -cur.fetchone() - -# Be sure to close the connection -con.close() \ No newline at end of file diff --git a/Scripts/DatasetMerging/datasetInfo.md b/Scripts/DatasetMerging/datasetInfo.md new file mode 100644 index 0000000..01cd846 --- /dev/null +++ b/Scripts/DatasetMerging/datasetInfo.md @@ -0,0 +1,26 @@ +# HOW THE DATASET IS BUILT AND POPULATED + +Note: the data are taken from CSV files in 1-hop + +## CSV files composition + +| CSV files | Original structure | Saved AS | +|--------------------|---------------------------------------|-------------------------------------| +| Wikipeda-summary | PageId / abstract | subject, text | +| Movies | Movie URI | "subject" | +| Dataset | Movie URI / Relationship / Object [RDF] | subject, relationship, object | +| Movies-PageId | Movie URI / PageId (wiki) | "subject", "object" | +| Reverse | Subject / Relationship / Movie URI | "subject", "relationship", "object" | + +## Wanted tables schema + +| Table | Columns | +|---------------|-------------------------------------------------------------------------| +| Movies | MovieID [PK], Movie URI | +| WikiPageIDs | MovieID [PK, FK], PageId [IDX] (wiki) *(Not important for now)* | +| Abstracts | MovieID [PK, FK], abstract | +| Subjects | SubjectID [PK], RDF Subject (from Dataset.csv or Reverse.csv), OriginID [FK] | +| Relationships | RelationshipID [PK], RDF Relationship (value only, not the actual relation) | +| Objects | ObjectID [PK], RDF Object, OriginID [FK] | +| Origins | OriginID [PK], Origin Name | +| RDFs | RDF_ID [PK], MovieID [FK], SubjectID [FK], RelationshipID [FK], ObjectID [FK] |