From 4bb03f86b346ca2428f00cbd3b9bd0f62dd5abad Mon Sep 17 00:00:00 2001
From: GassiGiuseppe <g.gassi@studenti.poliba.it>
Date: Thu, 18 Sep 2025 20:25:25 +0200
Subject: [PATCH] Added file to study the most frequent relationship into a csv
 triplet

---
 Script/DataGathering/analysis.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 Script/DataGathering/analysis.py

diff --git a/Script/DataGathering/analysis.py b/Script/DataGathering/analysis.py
new file mode 100644
index 0000000..75fc704
--- /dev/null
+++ b/Script/DataGathering/analysis.py
@@ -0,0 +1,14 @@
+import pandas as pd
+
+# Load the CSV
+df = pd.read_csv("./Assets/Dataset/1-hop/reverse.csv")
+
+# Extract the last part of the URL in 'relationship'
+df["relationship_short"] = df["relationship"].apply(lambda x: x.split("/")[-1])
+
+# Count occurrences of each unique last part
+relationship_counts = df["relationship_short"].value_counts()
+
+# Print the counts
+for rel, count in relationship_counts.items():
+    print(f"{rel}: {count}")