Skip to content

Commit

Permalink
Fix a few formatting issues in reconstruction
Browse files Browse the repository at this point in the history
  • Loading branch information
weaversam8 committed Oct 24, 2024
1 parent 281b706 commit 1ad6758
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 64 deletions.
73 changes: 66 additions & 7 deletions hcl2/reconstructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
def reverse_quotes_within_interpolation(interp_s: str) -> str:
"""
A common operation is to `json.dumps(s)` where s is a string to output in
Terraform. This is useful for automatically escaping any quotes within the
HCL. This is useful for automatically escaping any quotes within the
string, but this escapes quotes within interpolation incorrectly. This
method removes any erroneous escapes within interpolated segments of a
string.
Expand Down Expand Up @@ -149,9 +149,9 @@ def __init__(
def _should_add_space(self, rule, current_terminal):
"""
This method documents the situations in which we add space around
certain tokens while reconstructing the generated Terraform.
certain tokens while reconstructing the generated HCL.
Additional rules can be added here if the generated Terraform has
Additional rules can be added here if the generated HCL has
improper whitespace (affecting parse OR affecting ability to perfectly
reconstruct a file down to the whitespace level.)
Expand Down Expand Up @@ -449,7 +449,31 @@ def _calculate_block_labels(self, block: dict) -> List[str]:
next_label, block_body = self._calculate_block_labels(potential_body)
return ([curr_label] + next_label, block_body)

def _is_string_wrapped_tf(self, interp_s: str) -> bool:
"""
Determines whether a string is a complex HCL datastructure
wrapped in ${ interpolation } characters.
"""
if not interp_s.startswith("${") or not interp_s.endswith("}"):
return False

nested_tokens = []
for match in re.finditer(r"\$?\{|\}", interp_s):
if match.group(0) in ["${", "{"]:
nested_tokens.append(match.group(0))
elif match.group(0) == "}":
nested_tokens.pop()

# if we exit ${ interpolation } before the end of the string,
# this interpolated string has string parts and can't represent
# a valid HCL expression on its own (without quotes)
if len(nested_tokens) == 0 and match.end() != len(interp_s):
return False

return True

def _name_to_identifier(self, name: str) -> Tree:
"""Converts a string to a NAME token within an identifier rule."""
return Tree(Token("RULE", "identifier"), [Token("NAME", name)])

def _escape_interpolated_str(self, interp_s: str) -> str:
Expand All @@ -462,7 +486,8 @@ def _escape_interpolated_str(self, interp_s: str) -> str:

def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> List[Tree]:
# we add a newline at the top of a body within a block, not the root body
if level > 0:
# >2 here is to ignore the __start_line__ and __end_line__ metadata
if level > 0 and len(hcl_dict) > 2:
children = [self._newline(level)]
else:
children = []
Expand Down Expand Up @@ -519,6 +544,7 @@ def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> List[Tree]:

return Tree(Token("RULE", "body"), children)

# pylint: disable=too-many-branches, too-many-return-statements
def _transform_value_to_expr_term(self, value, level) -> Token:
"""Transforms a value from a dictionary into an "expr_term" (a value in HCL2)
Expand All @@ -544,18 +570,29 @@ def _transform_value_to_expr_term(self, value, level) -> Token:
# and store within an object
if isinstance(value, dict):
elems = []
for k, dict_v in value.items():

# if the object has elements, put it on a newline
if len(value) > 0:
elems.append(self._newline(level + 1))

# iterate thru the items and add them to the object
for i, (k, dict_v) in enumerate(value.items()):
if k in ["__start_line__", "__end_line__"]:
continue
identifier = self._name_to_identifier(k)
value_expr_term = self._transform_value_to_expr_term(dict_v, level)
value_expr_term = self._transform_value_to_expr_term(dict_v, level + 1)
elems.append(
Tree(
Token("RULE", "object_elem"),
[identifier, Token("EQ", " ="), value_expr_term],
)
)
elems.append(self._newline(level, comma=True))

# add indentation appropriately
if i < len(value) - 1:
elems.append(self._newline(level + 1, comma=True))
else:
elems.append(self._newline(level, comma=True))
return Tree(
Token("RULE", "expr_term"), [Tree(Token("RULE", "object"), elems)]
)
Expand Down Expand Up @@ -586,6 +623,28 @@ def _transform_value_to_expr_term(self, value, level) -> Token:

# store strings as single literals
if isinstance(value, str):
# potentially unpack a complex syntax structure
if self._is_string_wrapped_tf(value):
# we have to unpack it by parsing it
wrapped_value = re.match(r"\$\{(.*)\}", value).group(1)
ast = hcl2.parse(f"value = {wrapped_value}")

assert ast.data == Token("RULE", "start")
body = ast.children[0]
assert body.data == Token("RULE", "body")
attribute = body.children[0]
assert attribute.data == Token("RULE", "attribute")
assert attribute.children[1] == Token("EQ", " =")
parsed_value = attribute.children[2]
assert isinstance(parsed_value, Tree)

if parsed_value.data == Token("RULE", "expr_term"):
return parsed_value

# wrap other types of syntax as an expression (in parenthesis)
return Tree(Token("RULE", "expr_term"), [parsed_value])

# otherwise it's just a string.
return Tree(
Token("RULE", "expr_term"),
[Token("STRING_LIT", self._escape_interpolated_str(value))],
Expand Down
6 changes: 2 additions & 4 deletions hcl2/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def process_escape_sequences(self, value: str) -> str:

def to_tf_inline(self, value: any) -> str:
"""
Converts complex objects (e.g.) dicts to an "inline" Terraform syntax
Converts complex objects (e.g.) dicts to an "inline" HCL syntax
for use in function calls and ${interpolation} strings
"""
if isinstance(value, dict):
Expand All @@ -344,9 +344,7 @@ def to_tf_inline(self, value: any) -> str:
if isinstance(value, int):
return str(value)

raise RuntimeError(
f"Invalid type to convert to inline Terraform: {type(value)}"
)
raise RuntimeError(f"Invalid type to convert to inline HCL: {type(value)}")

def identifier(self, value: Any) -> Any:
# Making identifier a token by capitalizing it to IDENTIFIER
Expand Down
68 changes: 15 additions & 53 deletions test/unit/test_reconstruct_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,36 +22,22 @@ class TestReconstruct(TestCase):

def test_write_terraform(self):
"""Test reconstructing a set of hcl2 files, to make sure they parse to the same structure"""

# the reconstruction process is not precise, so some files do not
# reconstruct any embedded HCL expressions exactly the same. this
# list captures those, and should be manually inspected regularly to
# ensure that files remain syntactically equivalent
inexact_files = [
# one level of interpolation is stripped from this file during
# reconstruction, since we don't have a way to distinguish it from
# a complex HCL expression. the output parses to the same value
# though
"multi_level_interpolation.tf",
]

for hcl_path in HCL2_FILES:
yield self.check_terraform, hcl_path

# def test_write_terraform_exact(self):
# """
# Test reconstructing a set of hcl2 files, to make sure they
# reconstruct exactly the same, including whitespace.
# """

# # the reconstruction process is not precise, so some files do not
# # reconstruct their whitespace exactly the same, but they are
# # syntactically equivalent. This list is a target for further
# # improvements to the whitespace handling of the reconstruction
# # algorithm.
# inexact_files = [
# # the reconstructor loses commas on the last element in an array,
# # even if they're in the input file
# "iam.tf",
# "variables.tf",
# # the reconstructor doesn't preserve indentation within comments
# # perfectly
# "multiline_expressions.tf",
# # the reconstructor doesn't preserve the line that a ternary is
# # broken on.
# "route_table.tf",
# ]

# for hcl_path in HCL2_FILES:
# if hcl_path not in inexact_files:
# yield self.check_whitespace, hcl_path
if hcl_path not in inexact_files:
yield self.check_terraform, hcl_path

def check_terraform(self, hcl_path_str: str):
"""
Expand Down Expand Up @@ -96,27 +82,3 @@ def check_terraform(self, hcl_path_str: str):
hcl2_dict_correct,
f"failed comparing {hcl_path_str} with reconstructed version from {json_path.name}",
)

# def check_whitespace(self, hcl_path_str: str):
# """Tests that the reconstructed file matches the original file exactly."""
# hcl_path = (HCL2_DIR / hcl_path_str).absolute()
# with hcl_path.open("r") as hcl_file:
# hcl_file_content = hcl_file.read()
# try:
# hcl_ast = hcl2.parses(hcl_file_content)
# except Exception as exc:
# assert False, f"failed to tokenize terraform in `{hcl_path_str}`: {exc}"

# try:
# hcl_reconstructed = hcl2.writes(hcl_ast)
# except Exception as exc:
# assert (
# False
# ), f"failed to reconstruct terraform in `{hcl_path_str}`: {exc}"

# self.assertMultiLineEqual(
# hcl_reconstructed,
# hcl_file_content,
# f"file {hcl_path_str} does not match its reconstructed version \
# exactly. this is usually whitespace related.",
# )

0 comments on commit 1ad6758

Please sign in to comment.