Skip to content

Commit

Permalink
Remove tags_to_newline option on strip_html_tags
Browse files Browse the repository at this point in the history
It turns out that LMS that support HTML also keep the new lines. We only
need to remove the HTML tags to display the text in our textarea.
  • Loading branch information
marcospri committed Dec 18, 2023
1 parent 24ed438 commit ac3f738
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 19 deletions.
12 changes: 3 additions & 9 deletions lms/services/html_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,21 @@


class WhiteSpaceHTMLParser(HTMLParser): # pylint:disable=abstract-method
def __init__(self, tags_to_newline):
def __init__(self):
super().__init__()
self._chunks = []
self._tags_to_new_line = tags_to_newline or []

def handle_data(self, data):
self._chunks.append(data)

def handle_endtag(self, tag):
if tag in self._tags_to_new_line:
self._chunks.append("\n")

def get_text(self) -> str:
joined = "".join(self._chunks).strip()
# Remove any superfluous white space added after new lines
return re.sub(r"\n\s", "\n", joined)


def strip_html_tags(html: str, tags_to_newline=None) -> str:
parser = WhiteSpaceHTMLParser(tags_to_newline)
def strip_html_tags(html: str) -> str:
parser = WhiteSpaceHTMLParser()
parser.feed(html)
parser.close()

return parser.get_text()
17 changes: 7 additions & 10 deletions tests/unit/lms/services/html_service_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,13 @@


@pytest.mark.parametrize(
"text,expected,tags_to_new_line",
"text,expected",
[
("<b>COLON :</b>", "COLON :", None),
("A <em>B</em>", "A B", None),
(" C <em>D</em> E", "C D E", None),
("A<B", "A<B", None),
("<p>PARAGRAPH</p>OTHER", "PARAGRAPH\nOTHER", ["p"]),
("<p>PARAGRAPH</p>OTHER<br/>ANOTHER", "PARAGRAPH\nOTHER\nANOTHER", ["p", "br"]),
("<div>1\n <p>2</p>\n<p>3 </p>\n</div>", "1\n2\n3", ["p", "br"]),
("<b>COLON :</b>", "COLON :"),
("A <em>B</em>", "A B"),
(" C <em>D</em> E", "C D E"),
("A<B", "A<B"),
],
)
def test_strip_html_tags(text, expected, tags_to_new_line):
assert strip_html_tags(text, tags_to_new_line) == expected
def test_strip_html_tags(text, expected):
assert strip_html_tags(text) == expected

0 comments on commit ac3f738

Please sign in to comment.