From 2c6d36a037043e85e57e2cd9326a879b4b30319a Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Tue, 14 Jan 2025 13:50:20 +0100 Subject: [PATCH] batch of fixes to issues reported by codacy --- hcl2/__init__.py | 3 +- hcl2/api.py | 1 - hcl2/builder.py | 24 +++-- hcl2/reconstructor.py | 157 +++++++++++++++-------------- hcl2/transformer.py | 2 +- test/unit/test_reconstruct_dict.py | 28 ++--- tree-to-hcl2-reconstruction.md | 4 +- 7 files changed, 120 insertions(+), 99 deletions(-) diff --git a/hcl2/__init__.py b/hcl2/__init__.py index 69404cf5..6819a475 100644 --- a/hcl2/__init__.py +++ b/hcl2/__init__.py @@ -14,5 +14,6 @@ reverse_transform, writes, AST, - Builder, ) + +from .builder import Builder diff --git a/hcl2/api.py b/hcl2/api.py index ddb40e98..fa7d5ed2 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -4,7 +4,6 @@ from lark.tree import Tree as AST from hcl2.parser import hcl2 from hcl2.transformer import DictTransformer -from hcl2.builder import Builder def load(file: TextIO, with_meta=False) -> dict: diff --git a/hcl2/builder.py b/hcl2/builder.py index 5ef0c48b..7381a669 100644 --- a/hcl2/builder.py +++ b/hcl2/builder.py @@ -1,18 +1,28 @@ """A utility class for constructing HCL documents from Python code.""" -from typing import List -from typing_extensions import Self +from typing import List, Optional class Builder: - def __init__(self, attributes: dict = {}): - self.blocks = {} - self.attributes = attributes + """ + The `hcl2.Builder` class produces a dictionary that should be identical to the + output of `hcl2.load(example_file, with_meta=True)`. The `with_meta` keyword + argument is important here. HCL "blocks" in the Python dictionary are + identified by the presence of `__start_line__` and `__end_line__` metadata + within them. The `Builder` class handles adding that metadata. If that metadata + is missing, the `hcl2.reconstructor.HCLReverseTransformer` class fails to + identify what is a block and what is just an attribute with an object value. + """ + + def __init__(self, attributes: Optional[dict] = None): + self.blocks: dict = {} + self.attributes = attributes or {} def block( - self, block_type: str, labels: List[str] = [], **attributes: dict - ) -> Self: + self, block_type: str, labels: Optional[List[str]] = None, **attributes: dict + ) -> "Builder": """Create a block within this HCL document.""" + labels = labels or [] block = Builder(attributes) # initialize a holder for blocks of that type diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index b2b57b89..04880453 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -2,7 +2,7 @@ import re import json -from typing import List, Dict, Callable, Optional +from typing import List, Dict, Callable, Optional, Union, Any, Tuple from lark import Lark, Tree from lark.grammar import Terminal, NonTerminal, Symbol @@ -145,6 +145,14 @@ def __init__( Terminal("BINARY_OP"), ] + def _is_equals_sign(self, terminal) -> bool: + return ( + isinstance(self.last_rule, Token) + and self.last_rule.value in ("attribute", "object_elem") + and self.last_terminal == Terminal("EQ") + and terminal != Terminal("NL_OR_COMMENT") + ) + # pylint: disable=too-many-branches, too-many-return-statements def _should_add_space(self, rule, current_terminal): """ @@ -172,22 +180,7 @@ def _should_add_space(self, rule, current_terminal): if not self.last_terminal or not self.last_rule: return False - # always add a space after the equals sign in an attribute - if ( - isinstance(self.last_rule, Token) - and self.last_rule.value == "attribute" - and self.last_terminal == Terminal("EQ") - and current_terminal != Terminal("NL_OR_COMMENT") - ): - return True - - # always add a space after the equals sign in an object - if ( - isinstance(self.last_rule, Token) - and self.last_rule.value == "object_elem" - and self.last_terminal == Terminal("EQ") - and current_terminal != Terminal("NL_OR_COMMENT") - ): + if self._is_equals_sign(current_terminal): return True # if we're in a ternary or binary operator, add space around the operator @@ -305,9 +298,11 @@ def _reconstruct(self, tree): # first, handle any deferred items if self.deferred_item is not None: - deferred_rule, deferred_terminal, deferred_value = ( - self.deferred_item - ) + ( + deferred_rule, + deferred_terminal, + deferred_value, + ) = self.deferred_item # if we deferred a comma and the next character ends a # parenthesis or block, we can throw it out @@ -366,6 +361,25 @@ class HCLReverseTransformer: The reverse of hcl2.transformer.DictTransformer. This method attempts to convert a dict back into a working AST, which can be written back out. """ + + @staticmethod + def _name_to_identifier(name: str) -> Tree: + """Converts a string to a NAME token within an identifier rule.""" + return Tree(Token("RULE", "identifier"), [Token("NAME", name)]) + + @staticmethod + def _escape_interpolated_str(interp_s: str) -> str: + # begin by doing basic JSON string escaping, to add backslashes + interp_s = json.dumps(interp_s) + + # find each interpolation within the string and remove the backslashes + interp_s = reverse_quotes_within_interpolation(interp_s) + return interp_s + + @staticmethod + def _block_has_label(block: dict) -> bool: + return len(block.keys()) == 1 + def __init__(self): pass @@ -376,6 +390,30 @@ def transform(self, hcl_dict: dict) -> Tree: start = Tree(Token("RULE", "start"), [body]) return start + @staticmethod + def _is_string_wrapped_tf(interp_s: str) -> bool: + """ + Determines whether a string is a complex HCL datastructure + wrapped in ${ interpolation } characters. + """ + if not interp_s.startswith("${") or not interp_s.endswith("}"): + return False + + nested_tokens = [] + for match in re.finditer(r"\$?\{|}", interp_s): + if match.group(0) in ["${", "{"]: + nested_tokens.append(match.group(0)) + elif match.group(0) == "}": + nested_tokens.pop() + + # if we exit ${ interpolation } before the end of the string, + # this interpolated string has string parts and can't represent + # a valid HCL expression on its own (without quotes) + if len(nested_tokens) == 0 and match.end() != len(interp_s): + return False + + return True + def _newline(self, level: int, comma: bool = False, count: int = 1) -> Tree: # some rules expect the `new_line_and_or_comma` token if comma: @@ -399,7 +437,7 @@ def _list_is_a_block(self, value: list) -> bool: return True - def _dict_is_a_block(self, sub_obj: any) -> bool: + def _dict_is_a_block(self, sub_obj: Any) -> bool: # if the list doesn't contain dictionaries, it's not a block if not isinstance(sub_obj, dict): return False @@ -426,13 +464,10 @@ def _dict_is_a_block(self, sub_obj: any) -> bool: # block, the array is just an array of objects, not a block return False - def _block_has_label(self, block: dict) -> bool: - return len(block.keys()) == 1 - - def _calculate_block_labels(self, block: dict) -> List[str]: - # if b doesn't have a label + def _calculate_block_labels(self, block: dict) -> Tuple[List[str], dict]: + # if block doesn't have a label if len(block.keys()) != 1: - return ([], block) + return [], block # otherwise, find the label curr_label = list(block)[0] @@ -443,48 +478,13 @@ def _calculate_block_labels(self, block: dict) -> List[str]: "__start_line__" in potential_body.keys() or "__end_line__" in potential_body.keys() ): - return ([curr_label], potential_body) + return [curr_label], potential_body # recurse and append the label next_label, block_body = self._calculate_block_labels(potential_body) - return ([curr_label] + next_label, block_body) - - def _is_string_wrapped_tf(self, interp_s: str) -> bool: - """ - Determines whether a string is a complex HCL datastructure - wrapped in ${ interpolation } characters. - """ - if not interp_s.startswith("${") or not interp_s.endswith("}"): - return False - - nested_tokens = [] - for match in re.finditer(r"\$?\{|\}", interp_s): - if match.group(0) in ["${", "{"]: - nested_tokens.append(match.group(0)) - elif match.group(0) == "}": - nested_tokens.pop() + return [curr_label] + next_label, block_body - # if we exit ${ interpolation } before the end of the string, - # this interpolated string has string parts and can't represent - # a valid HCL expression on its own (without quotes) - if len(nested_tokens) == 0 and match.end() != len(interp_s): - return False - - return True - - def _name_to_identifier(self, name: str) -> Tree: - """Converts a string to a NAME token within an identifier rule.""" - return Tree(Token("RULE", "identifier"), [Token("NAME", name)]) - - def _escape_interpolated_str(self, interp_s: str) -> str: - # begin by doing basic JSON string escaping, to add backslashes - interp_s = json.dumps(interp_s) - - # find each interpolation within the string and remove the backslashes - interp_s = reverse_quotes_within_interpolation(interp_s) - return interp_s - - def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> List[Tree]: + def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> Tree: # we add a newline at the top of a body within a block, not the root body # >2 here is to ignore the __start_line__ and __end_line__ metadata if level > 0 and len(hcl_dict) > 2: @@ -492,7 +492,7 @@ def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> List[Tree]: else: children = [] - # iterate thru each attribute or sub-block of this block + # iterate through each attribute or sub-block of this block for key, value in hcl_dict.items(): if key in ["__start_line__", "__end_line__"]: continue @@ -545,13 +545,13 @@ def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> List[Tree]: return Tree(Token("RULE", "body"), children) # pylint: disable=too-many-branches, too-many-return-statements - def _transform_value_to_expr_term(self, value, level) -> Token: + def _transform_value_to_expr_term(self, value, level) -> Union[Token, Tree]: """Transforms a value from a dictionary into an "expr_term" (a value in HCL2) Anything passed to this function is treated "naively". Any lists passed are assumed to be tuples, and any dicts passed are assumed to be objects. No more checks will be performed for either to see if they are "blocks" - as ehis check happens in `_transform_dict_to_body`. + as this check happens in `_transform_dict_to_body`. """ # for lists, recursively turn the child elements into expr_terms and @@ -575,7 +575,7 @@ def _transform_value_to_expr_term(self, value, level) -> Token: if len(value) > 0: elems.append(self._newline(level + 1)) - # iterate thru the items and add them to the object + # iterate through the items and add them to the object for i, (k, dict_v) in enumerate(value.items()): if k in ["__start_line__", "__end_line__"]: continue @@ -626,17 +626,24 @@ def _transform_value_to_expr_term(self, value, level) -> Token: # potentially unpack a complex syntax structure if self._is_string_wrapped_tf(value): # we have to unpack it by parsing it - wrapped_value = re.match(r"\$\{(.*)\}", value).group(1) + wrapped_value = re.match(r"\$\{(.*)}", value).group(1) # type:ignore ast = hcl2.parse(f"value = {wrapped_value}") - assert ast.data == Token("RULE", "start") + if ast.data != Token("RULE", "start"): + raise RuntimeError("Token must be `start` RULE") + body = ast.children[0] - assert body.data == Token("RULE", "body") + if body.data != Token("RULE", "body"): + raise RuntimeError("Token must be `body` RULE") + attribute = body.children[0] - assert attribute.data == Token("RULE", "attribute") - assert attribute.children[1] == Token("EQ", " =") + if attribute.data != Token("RULE", "attribute"): + raise RuntimeError("Token must be `attribute` RULE") + + if attribute.children[1] != Token("EQ", " ="): + raise RuntimeError("Token must be `EQ (=)` rule") + parsed_value = attribute.children[2] - assert isinstance(parsed_value, Tree) if parsed_value.data == Token("RULE", "expr_term"): return parsed_value diff --git a/hcl2/transformer.py b/hcl2/transformer.py index 120564d5..821fce4c 100644 --- a/hcl2/transformer.py +++ b/hcl2/transformer.py @@ -326,7 +326,7 @@ def process_escape_sequences(self, value: str) -> str: # for now, but this method can be extended in the future return value - def to_tf_inline(self, value: any) -> str: + def to_tf_inline(self, value: Any) -> str: """ Converts complex objects (e.g.) dicts to an "inline" HCL syntax for use in function calls and ${interpolation} strings diff --git a/test/unit/test_reconstruct_dict.py b/test/unit/test_reconstruct_dict.py index 4f3591c0..a65e8429 100644 --- a/test/unit/test_reconstruct_dict.py +++ b/test/unit/test_reconstruct_dict.py @@ -50,32 +50,36 @@ def check_terraform(self, hcl_path_str: str): try: hcl2_dict_correct = hcl2.load(hcl_file) except Exception as exc: - assert ( - False - ), f"failed to tokenize 'correct' terraform in `{hcl_path_str}`: {traceback.format_exc()}" + raise RuntimeError( + f"failed to tokenize 'correct' terraform in " + f"`{hcl_path_str}`: {traceback.format_exc()}" + ) from exc json_dict = json.load(json_file) try: hcl_ast = hcl2.reverse_transform(json_dict) except Exception as exc: - assert ( - False - ), f"failed to reverse transform HCL from `{json_path.name}`: {traceback.format_exc()}" + raise RuntimeError( + f"failed to reverse transform HCL from " + f"`{json_path.name}`: {traceback.format_exc()}" + ) from exc try: hcl_reconstructed = hcl2.writes(hcl_ast) except Exception as exc: - assert ( - False - ), f"failed to reconstruct terraform from AST from `{json_path.name}`: {traceback.format_exc()}" + raise RuntimeError( + f"failed to reconstruct terraform from AST from " + f"`{json_path.name}`: {traceback.format_exc()}" + ) from exc try: hcl2_dict_reconstructed = hcl2.loads(hcl_reconstructed) except Exception as exc: - assert ( - False - ), f"failed to tokenize 'reconstructed' terraform from AST from `{json_path.name}`: {exc},\n{hcl_reconstructed}" + raise RuntimeError( + f"failed to tokenize 'reconstructed' terraform from AST from " + f"`{json_path.name}`: {exc}, \n{hcl_reconstructed}" + ) from exc self.assertDictEqual( hcl2_dict_reconstructed, diff --git a/tree-to-hcl2-reconstruction.md b/tree-to-hcl2-reconstruction.md index 5089164a..88f88f5c 100644 --- a/tree-to-hcl2-reconstruction.md +++ b/tree-to-hcl2-reconstruction.md @@ -239,8 +239,8 @@ Once the AST has been generated, you can convert it back to valid HCL using require manual intervention of the AST produced after the `reverse_transform` step. - - Most notably, this means it's not possible to generate files containing - comments (both inline and block comments) +- Most notably, this means it's not possible to generate files containing + comments (both inline and block comments) - Even when parsing a file directly and writing it back out, some formatting information may be lost due to Terminals discarded during the parsing process.