From 14c5ade23034b42f823973a3ef9b849b76696e73 Mon Sep 17 00:00:00 2001 From: Christian Risi <75698846+CnF-Gris@users.noreply.github.com> Date: Tue, 23 Sep 2025 17:57:38 +0200 Subject: [PATCH] Added CLI functionalities --- Scripts/DataGathering/analysis.py | 57 ++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/Scripts/DataGathering/analysis.py b/Scripts/DataGathering/analysis.py index 75fc704..7890e83 100644 --- a/Scripts/DataGathering/analysis.py +++ b/Scripts/DataGathering/analysis.py @@ -1,14 +1,53 @@ +import argparse +import sys import pandas as pd -# Load the CSV -df = pd.read_csv("./Assets/Dataset/1-hop/reverse.csv") -# Extract the last part of the URL in 'relationship' -df["relationship_short"] = df["relationship"].apply(lambda x: x.split("/")[-1]) +class ProgramArgs: -# Count occurrences of each unique last part -relationship_counts = df["relationship_short"].value_counts() + def __init__( + self, input_file: str, column: str, output_file: str, count: bool + ) -> None: + self.input_file = input_file + self.column = column + self.output_file = output_file + self.count = count -# Print the counts -for rel, count in relationship_counts.items(): - print(f"{rel}: {count}") + +def get_args(args: list[str]) -> ProgramArgs: + + PARSER = argparse.ArgumentParser() + PARSER.add_argument("--input-file", "--input", "-i", required=True, type=str) + PARSER.add_argument("--output-file", "--output", "-o", required=True, type=str) + PARSER.add_argument("--column", "--col", required=True, type=str) + PARSER.add_argument( + "--count", "-c", action="store_const", const=True, default=False + ) + parsed_args, _ = PARSER.parse_known_args(args) + + return ProgramArgs( + parsed_args.input_file, + parsed_args.column, + parsed_args.output_file, + parsed_args.count, + ) # type ignore + + +if __name__ == "__main__": + ARGS = get_args(sys.argv) + + OUTPUT_FILE = open(ARGS.output_file, "w+", encoding="utf-8") + + # Load the CSV + df = pd.read_csv(ARGS.input_file) + + # Count occurrences of each unique last part + item_counts = df[ARGS.column].value_counts() + + # Print the counts + for item, count in item_counts.items(): + + if ARGS.count: + OUTPUT_FILE.write(f"{item}: {count}\n") + else: + OUTPUT_FILE.write(f"{item}\n")