import argparse import csv import sys from typing import Self class ProgramArgs: def __init__(self, file: str, output: str, treshold: int): self.file = file self.output = output self.treshold = treshold class Node: def __init__( self, name: str, quantity: int = 0, ): self.name = name self.quantity = quantity self.children: dict[str, Node] = {} @property def is_leaf(self): return len(self.children) == 0 def append_child(self, child: list[str]): # print(child) KEY = child[0] if not self.children.get(KEY): self.children[KEY] = Node(KEY, 0) CHILD = self.children[KEY] self.quantity += 1 if len(child) == 1: return new_children = child[1:] CHILD.append_child(new_children) def __str__(self): return f"{self.name}/ - {self.quantity}" def get_args(args: list[str]) -> ProgramArgs: PARSER = argparse.ArgumentParser() PARSER.add_argument("--input-file", "-i", required=True, type=str) PARSER.add_argument("--output-file", "-o", required=True, type=str) PARSER.add_argument("--treshold", "-t", type=int, default=1) parsed_args, _ = PARSER.parse_known_args(args) # print(parsed_args.input_file) return ProgramArgs(parsed_args.input_file,parsed_args.output_file, parsed_args.treshold) # type ignore def get_debug_args() -> ProgramArgs: FILE = "./Assets/Dataset/Tmp/reverse-rel.txt" TRESHOLD = 1 return ProgramArgs( FILE, TRESHOLD ) def tree_like(file: str, out: str): INDENTATION = " " properties: dict[str, Node] = {} properties["pure"] = Node("pure", 0) properties["URI"] = Node("uri", 0) FILE = open(file, "r", encoding="utf-8") # TODO: Change here so it takes single URI from a CSV file for row in FILE: sections = row.split("/") sections = list(filter(lambda item: item != "", sections)) # print(sections) if sections[0] != "http:" and sections[0] != "https:": properties["pure"].append_child(sections) continue properties["URI"].append_child(sections) FILE.close() stack: list[tuple[Node, int]] = [] for _, item in properties.items(): stack.append((item, 0)) OUT = open(out, mode="w", encoding="utf-8") while len(stack) > 0: LAST_ITEM = stack.pop() NODE: Node = LAST_ITEM[0] DEPTH: int = LAST_ITEM[1] INDENT: str = INDENTATION * DEPTH if NODE.quantity < ARGS.treshold: continue OUT.write(f"{INDENT}- {NODE}\n") if NODE.is_leaf: continue CHILDREN = [] for _, child in NODE.children.items(): CHILDREN.append((child, DEPTH + 1)) stack.extend(CHILDREN) OUT.close() if __name__ == "__main__": ARGS = get_args(sys.argv) # ARGS = get_debug_args() tree_like(ARGS.file, ARGS.output)