From 14c5ade23034b42f823973a3ef9b849b76696e73 Mon Sep 17 00:00:00 2001
From: Christian Risi <75698846+CnF-Gris@users.noreply.github.com>
Date: Tue, 23 Sep 2025 17:57:38 +0200
Subject: [PATCH] Added CLI functionalities

---
 Scripts/DataGathering/analysis.py | 57 ++++++++++++++++++++++++++-----
 1 file changed, 48 insertions(+), 9 deletions(-)

diff --git a/Scripts/DataGathering/analysis.py b/Scripts/DataGathering/analysis.py
index 75fc704..7890e83 100644
--- a/Scripts/DataGathering/analysis.py
+++ b/Scripts/DataGathering/analysis.py
@@ -1,14 +1,53 @@
+import argparse
+import sys
 import pandas as pd
 
-# Load the CSV
-df = pd.read_csv("./Assets/Dataset/1-hop/reverse.csv")
 
-# Extract the last part of the URL in 'relationship'
-df["relationship_short"] = df["relationship"].apply(lambda x: x.split("/")[-1])
+class ProgramArgs:
 
-# Count occurrences of each unique last part
-relationship_counts = df["relationship_short"].value_counts()
+    def __init__(
+        self, input_file: str, column: str, output_file: str, count: bool
+    ) -> None:
+        self.input_file = input_file
+        self.column = column
+        self.output_file = output_file
+        self.count = count
 
-# Print the counts
-for rel, count in relationship_counts.items():
-    print(f"{rel}: {count}")
+
+def get_args(args: list[str]) -> ProgramArgs:
+
+    PARSER = argparse.ArgumentParser()
+    PARSER.add_argument("--input-file", "--input", "-i", required=True, type=str)
+    PARSER.add_argument("--output-file", "--output", "-o", required=True, type=str)
+    PARSER.add_argument("--column", "--col", required=True, type=str)
+    PARSER.add_argument(
+        "--count", "-c", action="store_const", const=True, default=False
+    )
+    parsed_args, _ = PARSER.parse_known_args(args)
+
+    return ProgramArgs(
+        parsed_args.input_file,
+        parsed_args.column,
+        parsed_args.output_file,
+        parsed_args.count,
+    )  # type ignore
+
+
+if __name__ == "__main__":
+    ARGS = get_args(sys.argv)
+
+    OUTPUT_FILE = open(ARGS.output_file, "w+", encoding="utf-8")
+
+    # Load the CSV
+    df = pd.read_csv(ARGS.input_file)
+
+    # Count occurrences of each unique last part
+    item_counts = df[ARGS.column].value_counts()
+
+    # Print the counts
+    for item, count in item_counts.items():
+
+        if ARGS.count:
+            OUTPUT_FILE.write(f"{item}: {count}\n")
+        else:
+            OUTPUT_FILE.write(f"{item}\n")