From 8bc44554d724866d10ba24af0bf41364fcfbc190 Mon Sep 17 00:00:00 2001
From: jackdewinter <jack.de.winter@outlook.com>
Date: Sun, 5 Mar 2023 21:29:42 -0800
Subject: [PATCH] =?UTF-8?q?https://github.com/jackdewinter/pymarkdown/issu?=
 =?UTF-8?q?es/600=20-=20refactored=20ht=E2=80=A6=20(#601)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* https://github.com/jackdewinter/pymarkdown/issues/600 - refactored html helper.

* https://github.com/jackdewinter/pymarkdown/issues/600  - fixed small flake8 error
---
 publish/coverage.json                         |   4 +-
 publish/pylint_suppression.json               |   3 +-
 pymarkdown/{ => html}/html_helper.py          | 294 +--------------
 pymarkdown/html/html_raw_helper.py            | 341 ++++++++++++++++++
 pymarkdown/inline/inline_autolink_helper.py   |   4 +-
 .../leaf_blocks/leaf_block_processor.py       |   2 +-
 pymarkdown/list_block_processor.py            |   2 +-
 pymarkdown/tokenized_markdown.py              |   2 +-
 test/basic/test_complete_html_tags.py         |   2 +-
 test/basic/test_html_tags.py                  |   2 +-
 10 files changed, 353 insertions(+), 303 deletions(-)
 rename pymarkdown/{ => html}/html_helper.py (68%)
 create mode 100644 pymarkdown/html/html_raw_helper.py

diff --git a/publish/coverage.json b/publish/coverage.json
index 2898cd702..229739c93 100644
--- a/publish/coverage.json
+++ b/publish/coverage.json
@@ -6,8 +6,8 @@
         "totalCovered": 3581
     },
     "lineLevel": {
-        "totalMeasured": 14479,
-        "totalCovered": 14479
+        "totalMeasured": 14501,
+        "totalCovered": 14501
     }
 }
 
diff --git a/publish/pylint_suppression.json b/publish/pylint_suppression.json
index 0b4616dc1..883b273f6 100644
--- a/publish/pylint_suppression.json
+++ b/publish/pylint_suppression.json
@@ -66,9 +66,10 @@
             "too-many-arguments": 2
         },
         "pymarkdown/extensions/task_list_items.py": {},
-        "pymarkdown/html_helper.py": {
+        "pymarkdown/html/html_helper.py": {
             "too-many-arguments": 1
         },
+        "pymarkdown/html/html_raw_helper.py": {},
         "pymarkdown/inline/inline_autolink_helper.py": {},
         "pymarkdown/inline/inline_backslash_helper.py": {},
         "pymarkdown/inline/inline_backtick_helper.py": {},
diff --git a/pymarkdown/html_helper.py b/pymarkdown/html/html_helper.py
similarity index 68%
rename from pymarkdown/html_helper.py
rename to pymarkdown/html/html_helper.py
index 09b175111..5c07c25dd 100644
--- a/pymarkdown/html_helper.py
+++ b/pymarkdown/html/html_helper.py
@@ -6,10 +6,8 @@
 from typing import List, Optional, Tuple, cast
 
 from pymarkdown.block_quote_data import BlockQuoteData
-from pymarkdown.constants import Constants
 from pymarkdown.container_helper import ContainerHelper
-from pymarkdown.inline.inline_request import InlineRequest
-from pymarkdown.inline_markdown_token import RawHtmlMarkdownToken, TextMarkdownToken
+from pymarkdown.inline_markdown_token import TextMarkdownToken
 from pymarkdown.leaf_markdown_token import HtmlBlockMarkdownToken
 from pymarkdown.markdown_token import MarkdownToken
 from pymarkdown.parser_helper import ParserHelper
@@ -21,8 +19,6 @@
 
 POGGER = ParserLogger(logging.getLogger(__name__))
 
-# pylint: disable=too-many-lines
-
 
 class HtmlHelper:
     """
@@ -45,20 +41,14 @@ class HtmlHelper:
     __html_attribute_value_double = '"'
     __html_attribute_name_value_separator = "="
     __html_attribute_separator = ParserHelper.space_character
-    __valid_tag_name_start = string.ascii_letters
     __valid_tag_name_characters = f"{string.ascii_letters}{string.digits}-"
-    __tag_attribute_name_characters = f"{string.ascii_letters}{string.digits}_.:-"
-    __unquoted_attribute_value_stop = f"\"'=<>`{Constants.ascii_whitespace}"
-    __tag_attribute_name_start = f"{string.ascii_letters}_:"
     __html_block_1_start_tag_prefix = ["script", "pre", "style"]
     __html_tag_attribute_value_terminators = " \"'=<>`"
     __html_block_2_to_5_start = "!"
     __html_block_2_continued_start = "--"
-    __html_block_2_xx = f"{__html_block_2_to_5_start}{__html_block_2_continued_start}"
     __html_block_3_continued_start = "?"
     __html_block_4_continued_start = string.ascii_uppercase
     __html_block_5_continued_start = "[CDATA["
-    __html_block_5_xx = f"{__html_block_2_to_5_start}{__html_block_5_continued_start}"
     __html_block_1_end_tags = ["</script>", "</pre>", "</style>"]
     __html_block_2_end = "-->"
     __html_block_3_end = "?>"
@@ -68,13 +58,6 @@ class HtmlHelper:
     __attribute_start_characters = "abcdefghijklmnopqrstuvwxyz1234567890:_"
     __attribute_other_characters = f"{__attribute_start_characters}.-"
 
-    __raw_declaration_start_character = "!"
-    __raw_declaration_whitespace = ParserHelper.space_character
-    __raw_html_exclusion_1 = ">"
-    __raw_html_exclusion_2 = "->"
-    __raw_html_exclusion_3 = "-"
-    __raw_html_exclusion_4 = "--"
-
     __html_block_6_start = [
         "address",
         "article",
@@ -351,281 +334,6 @@ def is_complete_html_start_tag(
             non_whitespace_index,
         )
 
-    @staticmethod
-    def __parse_raw_tag_name(text_to_parse: str, start_index: int) -> str:
-        """
-        Parse a HTML tag name from the string.
-        """
-        if ParserHelper.is_character_at_index_one_of(
-            text_to_parse, start_index, HtmlHelper.__valid_tag_name_start
-        ):
-            index, __ = ParserHelper.collect_while_one_of_characters(
-                text_to_parse, start_index + 1, HtmlHelper.__valid_tag_name_characters
-            )
-            return text_to_parse[:index]
-        return ""
-
-    @staticmethod
-    def __parse_tag_attributes(
-        text_to_parse: str, start_index: int
-    ) -> Tuple[Optional[int], Optional[str]]:
-        """
-        Handle the parsing of the attributes for an open tag.
-        """
-        parse_index, _ = ParserHelper.collect_while_one_of_characters(
-            text_to_parse, start_index, HtmlHelper.__tag_attribute_name_characters
-        )
-        assert parse_index is not None
-        end_name_index, extracted_whitespace = ParserHelper.extract_ascii_whitespace(
-            text_to_parse, parse_index
-        )
-        assert end_name_index is not None
-        if ParserHelper.is_character_at_index(
-            text_to_parse,
-            end_name_index,
-            HtmlHelper.__html_attribute_name_value_separator,
-        ):
-            (
-                value_start_index,
-                _,
-            ) = ParserHelper.extract_ascii_whitespace(text_to_parse, end_name_index + 1)
-            assert value_start_index is not None
-            value_end_index: Optional[int] = None
-            if ParserHelper.is_character_at_index_one_of(
-                text_to_parse,
-                value_start_index,
-                HtmlHelper.__html_attribute_value_single,
-            ):
-                value_end_index, _ = ParserHelper.collect_until_character(
-                    text_to_parse,
-                    value_start_index + 1,
-                    HtmlHelper.__html_attribute_value_single,
-                )
-                assert value_end_index is not None
-                if not ParserHelper.is_character_at_index(
-                    text_to_parse,
-                    value_end_index,
-                    HtmlHelper.__html_attribute_value_single,
-                ):
-                    return None, None
-                value_end_index += 1
-            elif ParserHelper.is_character_at_index_one_of(
-                text_to_parse,
-                value_start_index,
-                HtmlHelper.__html_attribute_value_double,
-            ):
-                value_end_index, _ = ParserHelper.collect_until_character(
-                    text_to_parse,
-                    value_start_index + 1,
-                    HtmlHelper.__html_attribute_value_double,
-                )
-                assert value_end_index is not None
-                if not ParserHelper.is_character_at_index(
-                    text_to_parse,
-                    value_end_index,
-                    HtmlHelper.__html_attribute_value_double,
-                ):
-                    return None, None
-                value_end_index += 1
-            else:
-                value_end_index, _ = ParserHelper.collect_until_one_of_characters(
-                    text_to_parse,
-                    value_start_index,
-                    HtmlHelper.__unquoted_attribute_value_stop,
-                )
-            assert value_end_index is not None
-            (
-                end_name_index,
-                extracted_whitespace,
-            ) = ParserHelper.extract_ascii_whitespace(text_to_parse, value_end_index)
-
-        return end_name_index, extracted_whitespace
-
-    @staticmethod
-    def __parse_raw_open_tag(text_to_parse: str) -> Tuple[Optional[str], int]:
-        """
-        Parse the current line as if it is an open tag, and determine if it is valid.
-        """
-
-        end_parse_index, valid_raw_html, tag_name = (
-            -1,
-            None,
-            HtmlHelper.__parse_raw_tag_name(text_to_parse, 0),
-        )
-        if tag_name:
-            parse_index, extracted_whitespace = ParserHelper.extract_ascii_whitespace(
-                text_to_parse, len(tag_name)
-            )
-            assert parse_index is not None
-            while extracted_whitespace and ParserHelper.is_character_at_index_one_of(
-                text_to_parse,
-                parse_index,
-                HtmlHelper.__tag_attribute_name_start,
-            ):
-                (
-                    parse_index,
-                    extracted_whitespace,
-                ) = HtmlHelper.__parse_tag_attributes(text_to_parse, parse_index)
-                if parse_index is None:
-                    return None, -1
-
-            if ParserHelper.is_character_at_index(
-                text_to_parse, parse_index, HtmlHelper.__html_tag_start
-            ):
-                parse_index += 1
-
-            if ParserHelper.is_character_at_index(
-                text_to_parse, parse_index, HtmlHelper.__html_tag_end
-            ):
-                valid_raw_html = text_to_parse[:parse_index]
-                end_parse_index = parse_index + 1
-
-        return valid_raw_html, end_parse_index
-
-    @staticmethod
-    def __parse_raw_close_tag(text_to_parse: str) -> Optional[str]:
-        """
-        Parse the current line as if it is a close tag, and determine if it is valid.
-        """
-        valid_raw_html = None
-        if ParserHelper.is_character_at_index(
-            text_to_parse, 0, HtmlHelper.__html_tag_start
-        ):
-            if tag_name := HtmlHelper.__parse_raw_tag_name(text_to_parse, 1):
-                parse_index: Optional[int] = len(tag_name)
-                assert parse_index is not None
-                text_to_parse_size = len(text_to_parse)
-                if parse_index != text_to_parse_size:
-                    parse_index, _ = ParserHelper.extract_spaces(
-                        text_to_parse, parse_index
-                    )
-                if parse_index == text_to_parse_size:
-                    valid_raw_html = text_to_parse
-        return valid_raw_html
-
-    @staticmethod
-    def __parse_raw_declaration(text_to_parse: str) -> Optional[str]:
-        """
-        Parse a possible raw html declaration sequence, and return if it is valid.
-        """
-
-        valid_raw_html = None
-        if ParserHelper.is_character_at_index_one_of(
-            text_to_parse, 0, HtmlHelper.__raw_declaration_start_character
-        ):
-            (
-                parse_index,
-                declaration_name,
-            ) = ParserHelper.collect_while_one_of_characters(
-                text_to_parse, 1, HtmlHelper.__html_block_4_continued_start
-            )
-            assert parse_index is not None
-            if declaration_name:
-                whitespace_count, _ = ParserHelper.collect_while_character(
-                    text_to_parse, parse_index, HtmlHelper.__raw_declaration_whitespace
-                )
-                if whitespace_count:
-                    valid_raw_html = text_to_parse
-        return valid_raw_html
-
-    @staticmethod
-    def __process_raw_special(
-        remaining_line: str,
-        special_start: str,
-        special_end: str,
-        do_extra_check: bool = False,
-    ) -> Tuple[Optional[str], int]:
-        """
-        Parse a possible raw html special sequence, and return if it is valid.
-        """
-        valid_raw_html: Optional[str] = None
-        parse_index = -1
-        if remaining_line.startswith(special_start):
-            special_start_size = len(special_start)
-            remaining_line = remaining_line[special_start_size:]
-            parse_index = remaining_line.find(special_end)
-            if parse_index != -1:
-                remaining_line = remaining_line[:parse_index]
-                parse_index = parse_index + special_start_size + len(special_end)
-                if (not do_extra_check) or (
-                    not (
-                        remaining_line[0] == HtmlHelper.__raw_html_exclusion_1
-                        or remaining_line.startswith(HtmlHelper.__raw_html_exclusion_2)
-                        or remaining_line[-1] == HtmlHelper.__raw_html_exclusion_3
-                        or HtmlHelper.__raw_html_exclusion_4 in remaining_line
-                    )
-                ):
-                    valid_raw_html = (
-                        f"{special_start}{remaining_line}{special_end[:-1]}"
-                    )
-        return valid_raw_html, parse_index
-
-    @staticmethod
-    def parse_raw_html(
-        only_between_angles: str,
-        remaining_line: str,
-        line_number: int,
-        column_number: int,
-        inline_request: InlineRequest,
-    ) -> Tuple[Optional[RawHtmlMarkdownToken], int]:
-        """
-        Given an open HTML tag character (<), try the various possibilities for
-        types of tag, and determine if any of them parse validly.
-        """
-
-        valid_raw_html, remaining_line_parse_index = HtmlHelper.__parse_raw_open_tag(
-            remaining_line
-        )
-        if not valid_raw_html:
-            valid_raw_html = HtmlHelper.__parse_raw_close_tag(only_between_angles)
-        if not valid_raw_html:
-            (
-                valid_raw_html,
-                remaining_line_parse_index,
-            ) = HtmlHelper.__process_raw_special(
-                remaining_line,
-                HtmlHelper.__html_block_2_xx,
-                HtmlHelper.__html_block_2_end,
-                True,
-            )
-        if not valid_raw_html:
-            (
-                valid_raw_html,
-                remaining_line_parse_index,
-            ) = HtmlHelper.__process_raw_special(
-                remaining_line,
-                HtmlHelper.__html_block_3_continued_start,
-                HtmlHelper.__html_block_3_end,
-            )
-        if not valid_raw_html:
-            (
-                valid_raw_html,
-                remaining_line_parse_index,
-            ) = HtmlHelper.__process_raw_special(
-                remaining_line,
-                HtmlHelper.__html_block_5_xx,
-                HtmlHelper.__html_block_5_end,
-            )
-        if not valid_raw_html:
-            valid_raw_html = HtmlHelper.__parse_raw_declaration(only_between_angles)
-
-        if not valid_raw_html:
-            return None, -1
-        if inline_request.para_owner:
-            (
-                valid_raw_html,
-                inline_request.para_owner.rehydrate_index,
-            ) = ParserHelper.recombine_string_with_whitespace(
-                valid_raw_html,
-                inline_request.para_owner.extracted_whitespace,
-                inline_request.para_owner.rehydrate_index,
-                add_replace_marker_if_empty=True,
-            )
-        return (
-            RawHtmlMarkdownToken(valid_raw_html, line_number, column_number),
-            remaining_line_parse_index,
-        )
-
     @staticmethod
     def __check_for_special_html_blocks(
         line_to_parse: str, character_index: int
diff --git a/pymarkdown/html/html_raw_helper.py b/pymarkdown/html/html_raw_helper.py
new file mode 100644
index 000000000..90e9526cc
--- /dev/null
+++ b/pymarkdown/html/html_raw_helper.py
@@ -0,0 +1,341 @@
+"""
+Module to provide helper functions for parsing the raw html inline blocks.
+"""
+import logging
+import string
+from typing import Optional, Tuple
+
+from pymarkdown.constants import Constants
+from pymarkdown.inline.inline_request import InlineRequest
+from pymarkdown.inline_markdown_token import RawHtmlMarkdownToken
+from pymarkdown.parser_helper import ParserHelper
+from pymarkdown.parser_logger import ParserLogger
+
+POGGER = ParserLogger(logging.getLogger(__name__))
+
+# pylint: disable=too-many-lines
+
+
+class HtmlRawHelper:
+    """
+    Class to provide helper functions for parsing the raw html inline blocks.
+    """
+
+    __raw_declaration_start_character = "!"
+    __raw_declaration_whitespace = ParserHelper.space_character
+
+    __raw_html_exclusion_1 = ">"
+    __raw_html_exclusion_2 = "->"
+    __raw_html_exclusion_3 = "-"
+    __raw_html_exclusion_4 = "--"
+
+    __html_block_2_to_5_start = "!"
+
+    __html_block_2_continued_start = "--"
+    __html_block_2_xx = f"{__html_block_2_to_5_start}{__html_block_2_continued_start}"
+    __html_block_2_end = "-->"
+
+    __html_block_3_continued_start = "?"
+    __html_block_3_end = "?>"
+
+    __html_block_4_continued_start = string.ascii_uppercase
+
+    __html_block_5_continued_start = "[CDATA["
+    __html_block_5_xx = f"{__html_block_2_to_5_start}{__html_block_5_continued_start}"
+    __html_block_5_end = "]]>"
+
+    __html_tag_start = "/"
+    __html_tag_end = ">"
+
+    __tag_attribute_name_start = f"{string.ascii_letters}_:"
+    __tag_attribute_name_characters = f"{string.ascii_letters}{string.digits}_.:-"
+
+    __valid_tag_name_start = string.ascii_letters
+    __valid_tag_name_characters = f"{string.ascii_letters}{string.digits}-"
+
+    __html_attribute_value_single = "'"
+    __html_attribute_value_double = '"'
+    __html_attribute_name_value_separator = "="
+
+    __unquoted_attribute_value_stop = f"\"'=<>`{Constants.ascii_whitespace}"
+
+    @staticmethod
+    def parse_raw_html(
+        only_between_angles: str,
+        remaining_line: str,
+        line_number: int,
+        column_number: int,
+        inline_request: InlineRequest,
+    ) -> Tuple[Optional[RawHtmlMarkdownToken], int]:
+        """
+        Given an open HTML tag character (<), try the various possibilities for
+        types of tag, and determine if any of them parse validly.
+        """
+
+        valid_raw_html, remaining_line_parse_index = HtmlRawHelper.__parse_raw_open_tag(
+            remaining_line
+        )
+        if not valid_raw_html:
+            valid_raw_html = HtmlRawHelper.__parse_raw_close_tag(only_between_angles)
+        if not valid_raw_html:
+            (
+                valid_raw_html,
+                remaining_line_parse_index,
+            ) = HtmlRawHelper.__process_raw_special(
+                remaining_line,
+                HtmlRawHelper.__html_block_2_xx,
+                HtmlRawHelper.__html_block_2_end,
+                True,
+            )
+        if not valid_raw_html:
+            (
+                valid_raw_html,
+                remaining_line_parse_index,
+            ) = HtmlRawHelper.__process_raw_special(
+                remaining_line,
+                HtmlRawHelper.__html_block_3_continued_start,
+                HtmlRawHelper.__html_block_3_end,
+            )
+        if not valid_raw_html:
+            (
+                valid_raw_html,
+                remaining_line_parse_index,
+            ) = HtmlRawHelper.__process_raw_special(
+                remaining_line,
+                HtmlRawHelper.__html_block_5_xx,
+                HtmlRawHelper.__html_block_5_end,
+            )
+        if not valid_raw_html:
+            valid_raw_html = HtmlRawHelper.__parse_raw_declaration(only_between_angles)
+
+        if not valid_raw_html:
+            return None, -1
+        if inline_request.para_owner:
+            (
+                valid_raw_html,
+                inline_request.para_owner.rehydrate_index,
+            ) = ParserHelper.recombine_string_with_whitespace(
+                valid_raw_html,
+                inline_request.para_owner.extracted_whitespace,
+                inline_request.para_owner.rehydrate_index,
+                add_replace_marker_if_empty=True,
+            )
+        return (
+            RawHtmlMarkdownToken(valid_raw_html, line_number, column_number),
+            remaining_line_parse_index,
+        )
+
+    @staticmethod
+    def __parse_raw_open_tag(text_to_parse: str) -> Tuple[Optional[str], int]:
+        """
+        Parse the current line as if it is an open tag, and determine if it is valid.
+        """
+
+        end_parse_index, valid_raw_html, tag_name = (
+            -1,
+            None,
+            HtmlRawHelper.__parse_raw_tag_name(text_to_parse, 0),
+        )
+        if tag_name:
+            parse_index, extracted_whitespace = ParserHelper.extract_ascii_whitespace(
+                text_to_parse, len(tag_name)
+            )
+            assert parse_index is not None
+            while extracted_whitespace and ParserHelper.is_character_at_index_one_of(
+                text_to_parse,
+                parse_index,
+                HtmlRawHelper.__tag_attribute_name_start,
+            ):
+                (
+                    parse_index,
+                    extracted_whitespace,
+                ) = HtmlRawHelper.__parse_tag_attributes(text_to_parse, parse_index)
+                if parse_index is None:
+                    return None, -1
+
+            if ParserHelper.is_character_at_index(
+                text_to_parse, parse_index, HtmlRawHelper.__html_tag_start
+            ):
+                parse_index += 1
+
+            if ParserHelper.is_character_at_index(
+                text_to_parse, parse_index, HtmlRawHelper.__html_tag_end
+            ):
+                valid_raw_html = text_to_parse[:parse_index]
+                end_parse_index = parse_index + 1
+
+        return valid_raw_html, end_parse_index
+
+    @staticmethod
+    def __parse_raw_close_tag(text_to_parse: str) -> Optional[str]:
+        """
+        Parse the current line as if it is a close tag, and determine if it is valid.
+        """
+        valid_raw_html = None
+        if ParserHelper.is_character_at_index(
+            text_to_parse, 0, HtmlRawHelper.__html_tag_start
+        ):
+            if tag_name := HtmlRawHelper.__parse_raw_tag_name(text_to_parse, 1):
+                parse_index: Optional[int] = len(tag_name)
+                assert parse_index is not None
+                text_to_parse_size = len(text_to_parse)
+                if parse_index != text_to_parse_size:
+                    parse_index, _ = ParserHelper.extract_spaces(
+                        text_to_parse, parse_index
+                    )
+                if parse_index == text_to_parse_size:
+                    valid_raw_html = text_to_parse
+        return valid_raw_html
+
+    @staticmethod
+    def __process_raw_special(
+        remaining_line: str,
+        special_start: str,
+        special_end: str,
+        do_extra_check: bool = False,
+    ) -> Tuple[Optional[str], int]:
+        """
+        Parse a possible raw html special sequence, and return if it is valid.
+        """
+        valid_raw_html: Optional[str] = None
+        parse_index = -1
+        if remaining_line.startswith(special_start):
+            special_start_size = len(special_start)
+            remaining_line = remaining_line[special_start_size:]
+            parse_index = remaining_line.find(special_end)
+            if parse_index != -1:
+                remaining_line = remaining_line[:parse_index]
+                parse_index = parse_index + special_start_size + len(special_end)
+                if (not do_extra_check) or (
+                    not (
+                        remaining_line[0] == HtmlRawHelper.__raw_html_exclusion_1
+                        or remaining_line.startswith(
+                            HtmlRawHelper.__raw_html_exclusion_2
+                        )
+                        or remaining_line[-1] == HtmlRawHelper.__raw_html_exclusion_3
+                        or HtmlRawHelper.__raw_html_exclusion_4 in remaining_line
+                    )
+                ):
+                    valid_raw_html = (
+                        f"{special_start}{remaining_line}{special_end[:-1]}"
+                    )
+        return valid_raw_html, parse_index
+
+    @staticmethod
+    def __parse_raw_declaration(text_to_parse: str) -> Optional[str]:
+        """
+        Parse a possible raw html declaration sequence, and return if it is valid.
+        """
+
+        valid_raw_html = None
+        if ParserHelper.is_character_at_index_one_of(
+            text_to_parse, 0, HtmlRawHelper.__raw_declaration_start_character
+        ):
+            (
+                parse_index,
+                declaration_name,
+            ) = ParserHelper.collect_while_one_of_characters(
+                text_to_parse, 1, HtmlRawHelper.__html_block_4_continued_start
+            )
+            assert parse_index is not None
+            if declaration_name:
+                whitespace_count, _ = ParserHelper.collect_while_character(
+                    text_to_parse,
+                    parse_index,
+                    HtmlRawHelper.__raw_declaration_whitespace,
+                )
+                if whitespace_count:
+                    valid_raw_html = text_to_parse
+        return valid_raw_html
+
+    @staticmethod
+    def __parse_raw_tag_name(text_to_parse: str, start_index: int) -> str:
+        """
+        Parse a HTML tag name from the string.
+        """
+        if ParserHelper.is_character_at_index_one_of(
+            text_to_parse, start_index, HtmlRawHelper.__valid_tag_name_start
+        ):
+            index, __ = ParserHelper.collect_while_one_of_characters(
+                text_to_parse,
+                start_index + 1,
+                HtmlRawHelper.__valid_tag_name_characters,
+            )
+            return text_to_parse[:index]
+        return ""
+
+    @staticmethod
+    def __parse_tag_attributes(
+        text_to_parse: str, start_index: int
+    ) -> Tuple[Optional[int], Optional[str]]:
+        """
+        Handle the parsing of the attributes for an open tag.
+        """
+        parse_index, _ = ParserHelper.collect_while_one_of_characters(
+            text_to_parse, start_index, HtmlRawHelper.__tag_attribute_name_characters
+        )
+        assert parse_index is not None
+        end_name_index, extracted_whitespace = ParserHelper.extract_ascii_whitespace(
+            text_to_parse, parse_index
+        )
+        assert end_name_index is not None
+        if ParserHelper.is_character_at_index(
+            text_to_parse,
+            end_name_index,
+            HtmlRawHelper.__html_attribute_name_value_separator,
+        ):
+            (
+                value_start_index,
+                _,
+            ) = ParserHelper.extract_ascii_whitespace(text_to_parse, end_name_index + 1)
+            assert value_start_index is not None
+            value_end_index: Optional[int] = None
+            if ParserHelper.is_character_at_index_one_of(
+                text_to_parse,
+                value_start_index,
+                HtmlRawHelper.__html_attribute_value_single,
+            ):
+                value_end_index, _ = ParserHelper.collect_until_character(
+                    text_to_parse,
+                    value_start_index + 1,
+                    HtmlRawHelper.__html_attribute_value_single,
+                )
+                assert value_end_index is not None
+                if not ParserHelper.is_character_at_index(
+                    text_to_parse,
+                    value_end_index,
+                    HtmlRawHelper.__html_attribute_value_single,
+                ):
+                    return None, None
+                value_end_index += 1
+            elif ParserHelper.is_character_at_index_one_of(
+                text_to_parse,
+                value_start_index,
+                HtmlRawHelper.__html_attribute_value_double,
+            ):
+                value_end_index, _ = ParserHelper.collect_until_character(
+                    text_to_parse,
+                    value_start_index + 1,
+                    HtmlRawHelper.__html_attribute_value_double,
+                )
+                assert value_end_index is not None
+                if not ParserHelper.is_character_at_index(
+                    text_to_parse,
+                    value_end_index,
+                    HtmlRawHelper.__html_attribute_value_double,
+                ):
+                    return None, None
+                value_end_index += 1
+            else:
+                value_end_index, _ = ParserHelper.collect_until_one_of_characters(
+                    text_to_parse,
+                    value_start_index,
+                    HtmlRawHelper.__unquoted_attribute_value_stop,
+                )
+            assert value_end_index is not None
+            (
+                end_name_index,
+                extracted_whitespace,
+            ) = ParserHelper.extract_ascii_whitespace(text_to_parse, value_end_index)
+
+        return end_name_index, extracted_whitespace
diff --git a/pymarkdown/inline/inline_autolink_helper.py b/pymarkdown/inline/inline_autolink_helper.py
index 949bce772..530c9c1ff 100644
--- a/pymarkdown/inline/inline_autolink_helper.py
+++ b/pymarkdown/inline/inline_autolink_helper.py
@@ -6,7 +6,7 @@
 import string
 from typing import Optional, cast
 
-from pymarkdown.html_helper import HtmlHelper
+from pymarkdown.html.html_raw_helper import HtmlRawHelper
 from pymarkdown.inline.inline_request import InlineRequest
 from pymarkdown.inline.inline_response import InlineResponse
 from pymarkdown.inline_markdown_token import (
@@ -70,7 +70,7 @@ def handle_angle_brackets(inline_request: InlineRequest) -> InlineResponse:
                     between_brackets, inline_request.line_number, new_column_number
                 )
             if not new_token:
-                new_token, after_index = HtmlHelper.parse_raw_html(
+                new_token, after_index = HtmlRawHelper.parse_raw_html(
                     between_brackets,
                     remaining_line,
                     inline_request.line_number,
diff --git a/pymarkdown/leaf_blocks/leaf_block_processor.py b/pymarkdown/leaf_blocks/leaf_block_processor.py
index 935e12c0d..853cd4055 100644
--- a/pymarkdown/leaf_blocks/leaf_block_processor.py
+++ b/pymarkdown/leaf_blocks/leaf_block_processor.py
@@ -5,7 +5,7 @@
 from typing import List, Optional
 
 from pymarkdown.container_grab_bag import ContainerGrabBag
-from pymarkdown.html_helper import HtmlHelper
+from pymarkdown.html.html_helper import HtmlHelper
 from pymarkdown.leaf_blocks.atx_leaf_block_processor import AtxLeafBlockProcessor
 from pymarkdown.leaf_blocks.fenced_leaf_block_processor import FencedLeafBlockProcessor
 from pymarkdown.leaf_blocks.leaf_block_helper import LeafBlockHelper
diff --git a/pymarkdown/list_block_processor.py b/pymarkdown/list_block_processor.py
index 7b0bdabd1..81f641c58 100644
--- a/pymarkdown/list_block_processor.py
+++ b/pymarkdown/list_block_processor.py
@@ -15,7 +15,7 @@
     OrderedListStartMarkdownToken,
     UnorderedListStartMarkdownToken,
 )
-from pymarkdown.html_helper import HtmlHelper
+from pymarkdown.html.html_helper import HtmlHelper
 from pymarkdown.leaf_blocks.atx_leaf_block_processor import AtxLeafBlockProcessor
 from pymarkdown.leaf_blocks.fenced_leaf_block_processor import FencedLeafBlockProcessor
 from pymarkdown.leaf_blocks.leaf_block_processor import LeafBlockProcessor
diff --git a/pymarkdown/tokenized_markdown.py b/pymarkdown/tokenized_markdown.py
index e7ba9d26b..75ff4990e 100644
--- a/pymarkdown/tokenized_markdown.py
+++ b/pymarkdown/tokenized_markdown.py
@@ -17,7 +17,7 @@
 from pymarkdown.extension_manager.extension_manager import ExtensionManager
 from pymarkdown.extensions.front_matter_extension import FrontMatterExtension
 from pymarkdown.extensions.pragma_token import PragmaToken
-from pymarkdown.html_helper import HtmlHelper
+from pymarkdown.html.html_helper import HtmlHelper
 from pymarkdown.inline.inline_character_reference_helper import (
     InlineCharacterReferenceHelper,
 )
diff --git a/test/basic/test_complete_html_tags.py b/test/basic/test_complete_html_tags.py
index b63a4fddb..a43aa34ff 100644
--- a/test/basic/test_complete_html_tags.py
+++ b/test/basic/test_complete_html_tags.py
@@ -1,7 +1,7 @@
 """
 Tests for the functions that deal with parsing of complete html tags.
 """
-from pymarkdown.html_helper import HtmlHelper
+from pymarkdown.html.html_helper import HtmlHelper
 
 
 def test_simple_complete_html_end_tag():
diff --git a/test/basic/test_html_tags.py b/test/basic/test_html_tags.py
index 5530aa4eb..d70528f43 100644
--- a/test/basic/test_html_tags.py
+++ b/test/basic/test_html_tags.py
@@ -1,7 +1,7 @@
 """
 Tests for the functions that deal with parsing of html tags.
 """
-from pymarkdown.html_helper import HtmlHelper
+from pymarkdown.html.html_helper import HtmlHelper
 
 
 def test_empty_tag_name():