CSV support added to path_splitter_tree
Also resolved a minor bug to print also leaf nodes
This commit is contained in:
parent
3eec49ffa5
commit
57884eaf2e
@ -6,8 +6,16 @@ from typing import Self
|
||||
|
||||
class ProgramArgs:
|
||||
|
||||
def __init__(self, file: str, output: str, treshold: int):
|
||||
def __init__(self, file: str, csv_uri_header: str, output: str, treshold: int):
|
||||
"""
|
||||
Args:
|
||||
file (str):
|
||||
csv_header (str): The name of the column of the csv file from which the program will get the URIs
|
||||
output (str):
|
||||
treshold (int):
|
||||
"""
|
||||
self.file = file
|
||||
self.csv_uri_header = csv_uri_header
|
||||
self.output = output
|
||||
self.treshold = treshold
|
||||
|
||||
@ -33,11 +41,15 @@ class Node:
|
||||
KEY = child[0]
|
||||
|
||||
if not self.children.get(KEY):
|
||||
# if the key has no value, it means we are traversing this branch for the first time
|
||||
# create another node for the key
|
||||
self.children[KEY] = Node(KEY, 0)
|
||||
|
||||
# take the node for the key
|
||||
CHILD = self.children[KEY]
|
||||
self.quantity += 1
|
||||
|
||||
# if the child list to enter has only one element, which is KEY, no more node will be created
|
||||
if len(child) == 1:
|
||||
return
|
||||
|
||||
@ -53,27 +65,32 @@ def get_args(args: list[str]) -> ProgramArgs:
|
||||
|
||||
PARSER = argparse.ArgumentParser()
|
||||
PARSER.add_argument("--input-file", "-i", required=True, type=str)
|
||||
PARSER.add_argument("--header-name", "-c", required=True, type=str) # c stands for column
|
||||
PARSER.add_argument("--output-file", "-o", required=True, type=str)
|
||||
PARSER.add_argument("--treshold", "-t", type=int, default=1)
|
||||
parsed_args, _ = PARSER.parse_known_args(args)
|
||||
|
||||
# print(parsed_args.input_file)
|
||||
|
||||
return ProgramArgs(parsed_args.input_file,parsed_args.output_file, parsed_args.treshold) # type ignore
|
||||
return ProgramArgs(parsed_args.input_file, parsed_args.header_name ,parsed_args.output_file, parsed_args.treshold) # type ignore
|
||||
|
||||
|
||||
def get_debug_args() -> ProgramArgs:
|
||||
|
||||
FILE = "./Assets/Dataset/Tmp/reverse-rel.txt"
|
||||
# -i ./Assets/Dataset/1-hop/movies.csv -c subject -o Assets/Dataset/Tmp/prova.csv -t 1
|
||||
FILE = "./Assets/Dataset/1-hop/movies.csv"
|
||||
CSV_HEADER = "subject"
|
||||
OUTPUT = "./Assets/Dataset/Tmp/prova.csv"
|
||||
TRESHOLD = 1
|
||||
|
||||
return ProgramArgs(
|
||||
FILE,
|
||||
CSV_HEADER,
|
||||
OUTPUT,
|
||||
TRESHOLD
|
||||
)
|
||||
|
||||
|
||||
def tree_like(file: str, out: str):
|
||||
def tree_like(file: str, csv_uri_header:str, out: str):
|
||||
|
||||
INDENTATION = " "
|
||||
|
||||
@ -85,9 +102,11 @@ def tree_like(file: str, out: str):
|
||||
FILE = open(file, "r", encoding="utf-8")
|
||||
|
||||
# TODO: Change here so it takes single URI from a CSV file
|
||||
for row in FILE:
|
||||
# It is needed the header-name
|
||||
for row in csv.DictReader(FILE):
|
||||
|
||||
sections = row.split("/")
|
||||
uri_element = row[csv_uri_header]
|
||||
sections = uri_element.split("/")
|
||||
sections = list(filter(lambda item: item != "", sections))
|
||||
|
||||
# print(sections)
|
||||
@ -116,7 +135,9 @@ def tree_like(file: str, out: str):
|
||||
|
||||
INDENT: str = INDENTATION * DEPTH
|
||||
|
||||
if NODE.quantity < ARGS.treshold:
|
||||
# Leaf node have quantity 0, so if i want them to appear the threshold have to be 0
|
||||
# if NODE.quantity < ARGS.treshold:
|
||||
if ARGS.treshold > NODE.quantity:
|
||||
continue
|
||||
|
||||
OUT.write(f"{INDENT}- {NODE}\n")
|
||||
@ -134,7 +155,8 @@ def tree_like(file: str, out: str):
|
||||
OUT.close()
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ARGS = get_args(sys.argv)
|
||||
# ARGS = get_debug_args()
|
||||
tree_like(ARGS.file, ARGS.output)
|
||||
tree_like(ARGS.file,ARGS.csv_uri_header, ARGS.output)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user