Fixed bug for parsing and added CLI functionalities

This commit is contained in:
Christian Risi 2025-09-23 17:58:08 +02:00
parent 14c5ade230
commit 25f401b577

View File

@ -3,14 +3,12 @@ import csv
import sys import sys
from typing import Self from typing import Self
class ProgramArgs: class ProgramArgs:
def __init__( def __init__(self, file: str, output: str, treshold: int):
self,
file: str,
treshold: int
):
self.file = file self.file = file
self.output = output
self.treshold = treshold self.treshold = treshold
@ -23,61 +21,63 @@ class Node:
): ):
self.name = name self.name = name
self.quantity = quantity self.quantity = quantity
self.children : dict[str, Node] = {} self.children: dict[str, Node] = {}
@property @property
def is_leaf(self): def is_leaf(self):
return len(self.children) == 0 return len(self.children) == 0
def append_child(self, child: list[str]):
def append_child(self, child : list[str]): # print(child)
print(child)
KEY = child[0] KEY = child[0]
if not self.children.get(KEY): if not self.children.get(KEY):
self.children[KEY] = Node(KEY, 0) self.children[KEY] = Node(KEY, 0)
CHILD = self.children[KEY] CHILD = self.children[KEY]
CHILD.quantity += 1 self.quantity += 1
if len(child) == 1: if len(child) == 1:
return return
new_children = child[1:] new_children = child[1:]
CHILD.append_child(new_children[1:]) CHILD.append_child(new_children)
def __str__(self): def __str__(self):
return f"{self.name}: {self.quantity}" return f"{self.name}/ - {self.quantity}"
def get_args(args: list[str]) -> ProgramArgs: def get_args(args: list[str]) -> ProgramArgs:
PARSER = argparse.ArgumentParser() PARSER = argparse.ArgumentParser()
PARSER.add_argument("--input-file", "-i", required=True, type=str) PARSER.add_argument("--input-file", "-i", required=True, type=str)
PARSER.add_argument("--output-file", "-o", required=True, type=str)
PARSER.add_argument("--treshold", "-t", type=int, default=1) PARSER.add_argument("--treshold", "-t", type=int, default=1)
parsed_args, _ = PARSER.parse_known_args(args) parsed_args, _ = PARSER.parse_known_args(args)
print(parsed_args.input_file) # print(parsed_args.input_file)
return ProgramArgs(parsed_args.input_file,parsed_args.output_file, parsed_args.treshold) # type ignore
def get_debug_args() -> ProgramArgs:
FILE = "./Assets/Dataset/Tmp/reverse-rel.txt"
TRESHOLD = 1
return ProgramArgs( return ProgramArgs(
parsed_args.input_file, FILE,
parsed_args.treshold TRESHOLD
) # type ignore )
def tree_like(file: str): def tree_like(file: str, out: str):
INDENTATION = "\t" INDENTATION = " "
properties : dict[str, Node] = {} properties: dict[str, Node] = {}
properties["pure"] = Node("pure", 0) properties["pure"] = Node("pure", 0)
properties["URI"] = Node("uri", 0) properties["URI"] = Node("uri", 0)
@ -87,39 +87,38 @@ def tree_like(file: str):
for row in FILE: for row in FILE:
sections = row.split("/") sections = row.split("/")
sections = list(filter(lambda item: item != "", sections))
print(sections) # print(sections)
if len(sections) < 2: if sections[0] != "http:" and sections[0] != "https:":
properties["pure"].append_child(sections) properties["pure"].append_child(sections)
continue continue
properties["URI"].append_child( properties["URI"].append_child(sections)
sections
)
FILE.close() FILE.close()
stack : list[(Node, int)] = [] stack: list[tuple[Node, int]] = []
for _, item in properties.items(): for _, item in properties.items():
stack.append((item, 0)) stack.append((item, 0))
OUT = open(out, mode="w", encoding="utf-8")
while len(stack) > 0: while len(stack) > 0:
LAST_ITEM = stack.pop() LAST_ITEM = stack.pop()
NODE : Node = LAST_ITEM[0] NODE: Node = LAST_ITEM[0]
DEPTH : int = LAST_ITEM[1] DEPTH: int = LAST_ITEM[1]
INDENT : str = INDENTATION * DEPTH INDENT: str = INDENTATION * DEPTH
if NODE.quantity < ARGS.treshold: if NODE.quantity < ARGS.treshold:
continue continue
print(f"{INDENT}{NODE}") OUT.write(f"{INDENT}- {NODE}\n")
if NODE.is_leaf: if NODE.is_leaf:
continue continue
@ -127,15 +126,14 @@ def tree_like(file: str):
CHILDREN = [] CHILDREN = []
for _, child in NODE.children.items(): for _, child in NODE.children.items():
CHILDREN.append( CHILDREN.append((child, DEPTH + 1))
(child, DEPTH + 1)
)
stack.extend( stack.extend(CHILDREN)
CHILDREN
) OUT.close()
if __name__ == "__main__": if __name__ == "__main__":
ARGS = get_args(sys.argv) ARGS = get_args(sys.argv)
tree_like(ARGS.file) # ARGS = get_debug_args()
tree_like(ARGS.file, ARGS.output)