diff --git a/src/docktoranalyzer/__init__.py b/src/docktoranalyzer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/docktoranalyzer/common/errors.py b/src/docktoranalyzer/common/errors.py new file mode 100644 index 0000000..1484950 --- /dev/null +++ b/src/docktoranalyzer/common/errors.py @@ -0,0 +1,6 @@ +class NotImplemented(Exception): + + def __init__(self, *args): + super().__init__(*args) + + # TODO: Implement a way to print the method not impelemented diff --git a/src/docktoranalyzer/dockerfile/command_parser_utils.py b/src/docktoranalyzer/dockerfile/command_parser_utils.py new file mode 100644 index 0000000..23f9f6f --- /dev/null +++ b/src/docktoranalyzer/dockerfile/command_parser_utils.py @@ -0,0 +1,29 @@ +import re +import shlex +from docktoranalyzer.dockerfile.docker_constants import DockerConstants + + +def command_parser(command_line: str): + + PARENTHESIS_OFFSET = 1 + + args: list[str] = [] + array_insides: str = "" + + command_line = command_line.strip() + + arr_regex = re.compile(DockerConstants.EXEC_FORM_REGEX) + match = arr_regex.search(command_line) + + if match is not None: + array_insides = command_line[match.start(): -1] + command_line = command_line[0:match.start() - PARENTHESIS_OFFSET] + + args.extend(shlex.split( + command_line + )) + args.extend(shlex.split( + array_insides + )) + + return args diff --git a/src/docktoranalyzer/dockerfile/docker_constants.py b/src/docktoranalyzer/dockerfile/docker_constants.py new file mode 100644 index 0000000..335ced2 --- /dev/null +++ b/src/docktoranalyzer/dockerfile/docker_constants.py @@ -0,0 +1,8 @@ +from typing import Final + + +class DockerConstants: + + LINE_CONTINUATION_REGEX: Final[str] = r"(^[\s]*\\[\s]*$|[\s]+\\[\s]*$)" + COMMENT_REGEX: Final[str] = r"[\s]*#" + EXEC_FORM_REGEX: Final[str] = r"(? list[str]: + return option.split("=") + + +def split_args(command): + pass + + +# MARK: InstructionChunk +class InstructionChunk: + + def __init__(self, chunk_lines: list[str]): + # UGLY: could preallocate space + self.lines: list[str] = [] + + tmp = chunk_lines.copy() + regex = re.compile(DockerConstants.LINE_CONTINUATION_REGEX) + + for line in tmp: + line = regex.sub("", line) + self.lines.append(line) + + def is_empty(self): + + if len(self.lines[0]) == 0: + return True + return False + + def __len__(self): + return len(self.lines) + + def __str__(self): + pass + + +# MARK: DockerInstruction +class DockerInstruction: + """_summary_ + Base Structure for all docker instructions + """ + + def __init__(self, chunk: list[str]): + self.type: DockerInstructionType = DockerInstructionType.UNPARSED + self.chunk: InstructionChunk = chunk + self.command: str = "" + self.args: str = "" + + if self.chunk.is_empty(): + self.type = DockerInstructionType.EMPTY + return + + total_command = " ".join(self.chunk.lines) + command_words = command_parser(total_command) + self.command = command_words[0] + self.args = command_words[1:] + + +# MARK: COMMENT +class DockerCOMMENT(DockerInstruction): + + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + self.type = DockerInstructionType.COMMENT + + +# TODO: Work on ADD +# MARK: ADD +class DockerADD(DockerInstruction): + + # --key=value + __OPTIONS: set[str] = { + "--keep-git-dir", + "--checksum", + "--chown", + "--chmod", + "--link", + "--exclude", + } + + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + self.options: list[str] = [] + self.type = DockerInstructionType.ADD + self.remote: list[str] = [] + self.local: list[str] = [] + self.destination: str = "" + + for arg in self.args: + + if arg in DockerADD.__OPTIONS: + self.options.append( + arg + ) + + if "]" in arg or "[" in arg: + pass + + +class DockerARG(DockerInstruction): + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + +# TODO: Work on CMD +# MARK: CMD +class DockerCMD(DockerInstruction): + + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + self.type = DockerInstructionType.CMD + + +# TODO: Work on Copy +# MARK: COPY +class DockerCOPY(DockerInstruction): + + # --key=value + __OPTIONS: set[str] = { + "--from", + "--chown", + "--chmod", + "--link", + "--parents", + "--exclude", + } + + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + self.type = DockerInstructionType.COPY + + +# TODO: Work on ENTRYPOINT +class DockerENTRYPOINT(DockerInstruction): + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + self.type = DockerInstructionType.ENTRYPOINT + + +class DockerENV(DockerInstruction): + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + +class DockerEXPOSE(DockerInstruction): + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + +# TODO: Work on FROM +# MARK: FROM +class DockerFROM(DockerInstruction): + + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + self.type = DockerInstructionType.FROM + + +class DockerHEALTHCHECK(DockerInstruction): + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + +class DockerLABEL(DockerInstruction): + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + +class DockerMAINTAINER(DockerInstruction): + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + +class DockerONBUILD(DockerInstruction): + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + +# TODO: Work on run +# MARK: RUN +class DockerRUN(DockerInstruction): + + # This variable is only --key=value + __OPTIONS: set[str] = {"--mount", "--netowrk", "--security"} + + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + self.type = DockerInstructionType.RUN + + +class DockerSHELL(DockerInstruction): + + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + +class DockerSTOPSIGNAL(DockerInstruction): + + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + +class DockerUSER(DockerInstruction): + + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + +class DockerVOLUME(DockerInstruction): + + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + +# TODO: Work on workdir +# MARK: WORKDIR +class DockerWORKDIR(DockerInstruction): + + def __init__(self, chunk: list[str]): + super().__init__(chunk) + + self.type = DockerInstructionType.WORKDIR + + # TODO: Add workdirectory property diff --git a/src/docktoranalyzer/dockerfile/dockerfile_parser.py b/src/docktoranalyzer/dockerfile/dockerfile_parser.py new file mode 100644 index 0000000..fb5bd85 --- /dev/null +++ b/src/docktoranalyzer/dockerfile/dockerfile_parser.py @@ -0,0 +1,194 @@ +from pathlib import Path +import re +import shlex +from docktoranalyzer.dockerfile.docker_constants import DockerConstants +from docktoranalyzer.dockerfile.instruction_enums import DockerInstructionType +from docktoranalyzer.dockerfile.dockerfile_ import DockerStage, Dockerfile +from docktoranalyzer.dockerfile.dockerfile_instructions import ( + DockerADD, + DockerCMD, + DockerCOMMENT, + DockerCOPY, + DockerENTRYPOINT, + DockerFROM, + DockerInstruction, + DockerRUN, + DockerWORKDIR, + InstructionChunk, +) + + +class DockerFileParser: + + DEFAULT_ESCAPE_STRING = "\\" + DEFAULT_LINE_CONTINUATION = "\\" + DIRECTIVE_REGEX = re.compile( + "# +(?P[^\\s]*)=(?P[^\\s]*)" + ) + + def __new__(cls): + raise TypeError("Static classes cannot be instantiated") + + # MARK: dockerfile_factory() + @staticmethod + def dockerfile_factory(dockerfile_path: Path): + + if not dockerfile_path.is_file(): + raise FileNotFoundError(f"{dockerfile_path} is not a valid path") + + dockerfile = dockerfile_path.open() + docker_instructions = dockerfile.readlines() + dockerfile.close() + + chunks = DockerFileParser.__parse_chunks(docker_instructions) + instructions = DockerFileParser.__parse_instructions(chunks) + stages = DockerFileParser.__parse_stages(instructions) + + return Dockerfile(stages.copy()) + + # MARK: __parse_directives() + @staticmethod + def __parse_directives(docker_lines: list[str]): + + found_directives: set[str] = set() + + for line in docker_lines: + + line_length = len(line) + + # Line is too short to + # make a directive + if line_length < 4: + continue + + match = DockerFileParser.DIRECTIVE_REGEX.match(line) + + # No match found + if match is None: + continue + + directive_name = match.group("directive_name") + value = match.group("value") + + # Duplicate directive, ignore + if directive_name in found_directives: + continue + + found_directives.add(directive_name) + + if directive_name == "escape": + DockerFileParser.ESCAPE_STRING = value + + # MARK: __parse_chunks() + @staticmethod + def __parse_chunks( + instruction_lines: list[str], + line_continuation_regex: str = DockerConstants.LINE_CONTINUATION_REGEX, + ) -> list[InstructionChunk]: + + continuation_check = re.compile(line_continuation_regex) + comment_check = re.compile(DockerConstants.COMMENT_REGEX) + chunks: list[InstructionChunk] = [] + accumulator: list[str] = [] + + for line in instruction_lines: + line = line.rstrip() + accumulator.append(line) + + # If line is a comment, it can't continue + if comment_check.search(line) is not None: + + if len(accumulator) > 1: + accumulator.pop() + chunks.append(InstructionChunk(accumulator)) + + chunks.append(InstructionChunk([line])) + accumulator = [] + + # If line doesn't continue, join everything found + if continuation_check.search(line) is None: + chunks.append(InstructionChunk(accumulator)) + accumulator = [] + + return chunks + + # MARK: __parse_instruction() + @staticmethod + def __parse_instructions( + instruction_chunks: list[InstructionChunk], + ) -> list[DockerInstruction]: + + docker_instructions: list[DockerInstruction] = [] + + for chunk in instruction_chunks: + docker_instructions.append( + DockerFileParser.__instruction_mapper(chunk) + ) + + return docker_instructions + + # MARK: __instruction_mapper() + @staticmethod + def __instruction_mapper( + chunk: InstructionChunk, + ) -> DockerInstruction: + + if chunk.is_empty(): + return DockerInstruction(chunk) + + command = shlex.split(chunk.lines[0])[0] + + if command == "#": + return DockerCOMMENT(chunk) + + instruction_type: DockerInstructionType = DockerInstructionType[ + f"{command}" + ] + + match instruction_type: + + case DockerInstructionType.CMD: + return DockerCMD(chunk) + + case DockerInstructionType.COPY: + return DockerCOPY(chunk) + + case DockerInstructionType.ENTRYPOINT: + return DockerENTRYPOINT(chunk) + + case DockerInstructionType.FROM: + return DockerFROM(chunk) + + case DockerInstructionType.RUN: + return DockerRUN(chunk) + + case DockerInstructionType.ADD: + return DockerADD(chunk) + + case DockerInstructionType.WORKDIR: + return DockerWORKDIR(chunk) + + case _: + return DockerInstruction(chunk) + + # MARK: __parse_stages() + @staticmethod + def __parse_stages( + instructions: list[DockerInstruction], + ) -> list[DockerStage]: + + stages: list[DockerStage] = [] + accumulator: list[DockerInstruction] = [] + + for instruction in instructions: + + if instruction.type is DockerInstructionType.FROM: + + stages.append(DockerStage(accumulator.copy())) + accumulator = [] + + accumulator.append(instruction) + + stages.append(DockerStage(accumulator.copy())) + + return stages diff --git a/src/docktoranalyzer/dockerfile/instruction_enums.py b/src/docktoranalyzer/dockerfile/instruction_enums.py new file mode 100644 index 0000000..a8c37d8 --- /dev/null +++ b/src/docktoranalyzer/dockerfile/instruction_enums.py @@ -0,0 +1,30 @@ +from enum import Enum, auto + + +class DockerInstructionType(Enum): + + # Special values + UNPARSED = auto() + UNKOWN = auto() + EMPTY = auto() + COMMENT = auto() + + # Docker Instructions + ADD = auto() + ARG = auto() + CMD = auto() + COPY = auto() + ENTRYPOINT = auto() + ENV = auto() + EXPOSE = auto() + FROM = auto() + HEALTHCHECK = auto() + LABEL = auto() + MAINTAINER = auto() + ONBUILD = auto() + RUN = auto() + SHELL = auto() + STOPSIGNAL = auto() + USER = auto() + VOLUME = auto() + WORKDIR = auto()