54 lines
1.4 KiB
Python
Raw Normal View History

2025-09-23 17:57:38 +02:00
import argparse
import sys
import pandas as pd
2025-09-23 17:57:38 +02:00
class ProgramArgs:
2025-09-23 17:57:38 +02:00
def __init__(
self, input_file: str, column: str, output_file: str, count: bool
) -> None:
self.input_file = input_file
self.column = column
self.output_file = output_file
self.count = count
2025-09-23 17:57:38 +02:00
def get_args(args: list[str]) -> ProgramArgs:
PARSER = argparse.ArgumentParser()
PARSER.add_argument("--input-file", "--input", "-i", required=True, type=str)
PARSER.add_argument("--output-file", "--output", "-o", required=True, type=str)
PARSER.add_argument("--column", "--col", required=True, type=str)
PARSER.add_argument(
"--count", "-c", action="store_const", const=True, default=False
)
parsed_args, _ = PARSER.parse_known_args(args)
return ProgramArgs(
parsed_args.input_file,
parsed_args.column,
parsed_args.output_file,
parsed_args.count,
) # type ignore
if __name__ == "__main__":
ARGS = get_args(sys.argv)
OUTPUT_FILE = open(ARGS.output_file, "w+", encoding="utf-8")
# Load the CSV
df = pd.read_csv(ARGS.input_file)
# Count occurrences of each unique last part
item_counts = df[ARGS.column].value_counts()
# Print the counts
for item, count in item_counts.items():
if ARGS.count:
OUTPUT_FILE.write(f"{item}: {count}\n")
else:
OUTPUT_FILE.write(f"{item}\n")