diff --git a/Script/DataGathering/analysis.py b/Script/DataGathering/analysis.py new file mode 100644 index 0000000..75fc704 --- /dev/null +++ b/Script/DataGathering/analysis.py @@ -0,0 +1,14 @@ +import pandas as pd + +# Load the CSV +df = pd.read_csv("./Assets/Dataset/1-hop/reverse.csv") + +# Extract the last part of the URL in 'relationship' +df["relationship_short"] = df["relationship"].apply(lambda x: x.split("/")[-1]) + +# Count occurrences of each unique last part +relationship_counts = df["relationship_short"].value_counts() + +# Print the counts +for rel, count in relationship_counts.items(): + print(f"{rel}: {count}")