From 4bb03f86b346ca2428f00cbd3b9bd0f62dd5abad Mon Sep 17 00:00:00 2001 From: GassiGiuseppe Date: Thu, 18 Sep 2025 20:25:25 +0200 Subject: [PATCH] Added file to study the most frequent relationship into a csv triplet --- Script/DataGathering/analysis.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 Script/DataGathering/analysis.py diff --git a/Script/DataGathering/analysis.py b/Script/DataGathering/analysis.py new file mode 100644 index 0000000..75fc704 --- /dev/null +++ b/Script/DataGathering/analysis.py @@ -0,0 +1,14 @@ +import pandas as pd + +# Load the CSV +df = pd.read_csv("./Assets/Dataset/1-hop/reverse.csv") + +# Extract the last part of the URL in 'relationship' +df["relationship_short"] = df["relationship"].apply(lambda x: x.split("/")[-1]) + +# Count occurrences of each unique last part +relationship_counts = df["relationship_short"].value_counts() + +# Print the counts +for rel, count in relationship_counts.items(): + print(f"{rel}: {count}")