import argparse import sys import pandas as pd class ProgramArgs: def __init__( self, input_file: str, column: str, output_file: str, count: bool ) -> None: self.input_file = input_file self.column = column self.output_file = output_file self.count = count def get_args(args: list[str]) -> ProgramArgs: PARSER = argparse.ArgumentParser() PARSER.add_argument("--input-file", "--input", "-i", required=True, type=str) PARSER.add_argument("--output-file", "--output", "-o", required=True, type=str) PARSER.add_argument("--column", "--col", required=True, type=str) PARSER.add_argument( "--count", "-c", action="store_const", const=True, default=False ) parsed_args, _ = PARSER.parse_known_args(args) return ProgramArgs( parsed_args.input_file, parsed_args.column, parsed_args.output_file, parsed_args.count, ) # type ignore if __name__ == "__main__": ARGS = get_args(sys.argv) OUTPUT_FILE = open(ARGS.output_file, "w+", encoding="utf-8") # Load the CSV df = pd.read_csv(ARGS.input_file) # Count occurrences of each unique last part item_counts = df[ARGS.column].value_counts() # Print the counts for item, count in item_counts.items(): if ARGS.count: OUTPUT_FILE.write(f"{item}: {count}\n") else: OUTPUT_FILE.write(f"{item}\n")