Added file to study the most frequent relationship into a csv triplet

2025-09-18 20:25:25 +02:00
parent e5f201f3db
commit 4bb03f86b3
1 changed files with 14 additions and 0 deletions
--- a/Script/DataGathering/analysis.py
+++ b/Script/DataGathering/analysis.py
@@ -0,0 +1,14 @@
+import pandas as pd
+
+# Load the CSV
+df = pd.read_csv("./Assets/Dataset/1-hop/reverse.csv")
+
+# Extract the last part of the URL in 'relationship'
+df["relationship_short"] = df["relationship"].apply(lambda x: x.split("/")[-1])
+
+# Count occurrences of each unique last part
+relationship_counts = df["relationship_short"].value_counts()
+
+# Print the counts
+for rel, count in relationship_counts.items():
+    print(f"{rel}: {count}")