Fixed bug for parsing and added CLI functionalities
This commit is contained in:
parent
14c5ade230
commit
25f401b577
@ -3,14 +3,12 @@ import csv
|
||||
import sys
|
||||
from typing import Self
|
||||
|
||||
|
||||
class ProgramArgs:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
file: str,
|
||||
treshold: int
|
||||
):
|
||||
def __init__(self, file: str, output: str, treshold: int):
|
||||
self.file = file
|
||||
self.output = output
|
||||
self.treshold = treshold
|
||||
|
||||
|
||||
@ -23,61 +21,63 @@ class Node:
|
||||
):
|
||||
self.name = name
|
||||
self.quantity = quantity
|
||||
self.children : dict[str, Node] = {}
|
||||
|
||||
self.children: dict[str, Node] = {}
|
||||
|
||||
@property
|
||||
def is_leaf(self):
|
||||
return len(self.children) == 0
|
||||
|
||||
def append_child(self, child: list[str]):
|
||||
|
||||
def append_child(self, child : list[str]):
|
||||
|
||||
print(child)
|
||||
# print(child)
|
||||
KEY = child[0]
|
||||
|
||||
if not self.children.get(KEY):
|
||||
self.children[KEY] = Node(KEY, 0)
|
||||
|
||||
CHILD = self.children[KEY]
|
||||
CHILD.quantity += 1
|
||||
self.quantity += 1
|
||||
|
||||
if len(child) == 1:
|
||||
return
|
||||
|
||||
new_children = child[1:]
|
||||
|
||||
CHILD.append_child(new_children[1:])
|
||||
|
||||
CHILD.append_child(new_children)
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name}: {self.quantity}"
|
||||
|
||||
|
||||
|
||||
|
||||
return f"{self.name}/ - {self.quantity}"
|
||||
|
||||
|
||||
def get_args(args: list[str]) -> ProgramArgs:
|
||||
|
||||
PARSER = argparse.ArgumentParser()
|
||||
PARSER.add_argument("--input-file", "-i", required=True, type=str)
|
||||
PARSER.add_argument("--output-file", "-o", required=True, type=str)
|
||||
PARSER.add_argument("--treshold", "-t", type=int, default=1)
|
||||
parsed_args, _ = PARSER.parse_known_args(args)
|
||||
|
||||
print(parsed_args.input_file)
|
||||
# print(parsed_args.input_file)
|
||||
|
||||
return ProgramArgs(parsed_args.input_file,parsed_args.output_file, parsed_args.treshold) # type ignore
|
||||
|
||||
|
||||
def get_debug_args() -> ProgramArgs:
|
||||
|
||||
FILE = "./Assets/Dataset/Tmp/reverse-rel.txt"
|
||||
TRESHOLD = 1
|
||||
|
||||
return ProgramArgs(
|
||||
parsed_args.input_file,
|
||||
parsed_args.treshold
|
||||
) # type ignore
|
||||
FILE,
|
||||
TRESHOLD
|
||||
)
|
||||
|
||||
|
||||
def tree_like(file: str):
|
||||
def tree_like(file: str, out: str):
|
||||
|
||||
INDENTATION = "\t"
|
||||
INDENTATION = " "
|
||||
|
||||
properties : dict[str, Node] = {}
|
||||
properties: dict[str, Node] = {}
|
||||
|
||||
properties["pure"] = Node("pure", 0)
|
||||
properties["URI"] = Node("uri", 0)
|
||||
@ -87,39 +87,38 @@ def tree_like(file: str):
|
||||
for row in FILE:
|
||||
|
||||
sections = row.split("/")
|
||||
sections = list(filter(lambda item: item != "", sections))
|
||||
|
||||
print(sections)
|
||||
# print(sections)
|
||||
|
||||
if len(sections) < 2:
|
||||
if sections[0] != "http:" and sections[0] != "https:":
|
||||
properties["pure"].append_child(sections)
|
||||
continue
|
||||
|
||||
properties["URI"].append_child(
|
||||
sections
|
||||
)
|
||||
|
||||
|
||||
properties["URI"].append_child(sections)
|
||||
|
||||
FILE.close()
|
||||
|
||||
stack : list[(Node, int)] = []
|
||||
stack: list[tuple[Node, int]] = []
|
||||
|
||||
for _, item in properties.items():
|
||||
stack.append((item, 0))
|
||||
|
||||
OUT = open(out, mode="w", encoding="utf-8")
|
||||
|
||||
while len(stack) > 0:
|
||||
|
||||
LAST_ITEM = stack.pop()
|
||||
|
||||
NODE : Node = LAST_ITEM[0]
|
||||
DEPTH : int = LAST_ITEM[1]
|
||||
NODE: Node = LAST_ITEM[0]
|
||||
DEPTH: int = LAST_ITEM[1]
|
||||
|
||||
INDENT : str = INDENTATION * DEPTH
|
||||
INDENT: str = INDENTATION * DEPTH
|
||||
|
||||
if NODE.quantity < ARGS.treshold:
|
||||
continue
|
||||
|
||||
print(f"{INDENT}{NODE}")
|
||||
OUT.write(f"{INDENT}- {NODE}\n")
|
||||
|
||||
if NODE.is_leaf:
|
||||
continue
|
||||
@ -127,15 +126,14 @@ def tree_like(file: str):
|
||||
CHILDREN = []
|
||||
|
||||
for _, child in NODE.children.items():
|
||||
CHILDREN.append(
|
||||
(child, DEPTH + 1)
|
||||
)
|
||||
CHILDREN.append((child, DEPTH + 1))
|
||||
|
||||
stack.extend(
|
||||
CHILDREN
|
||||
)
|
||||
stack.extend(CHILDREN)
|
||||
|
||||
OUT.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ARGS = get_args(sys.argv)
|
||||
tree_like(ARGS.file)
|
||||
# ARGS = get_debug_args()
|
||||
tree_like(ARGS.file, ARGS.output)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user