Skip to content

Commit

Permalink
fix: an error on empty paragraph surrounded by non-paragraphs
Browse files Browse the repository at this point in the history
  • Loading branch information
hukkin committed Nov 11, 2024
1 parent b8c3561 commit 27e2ba3
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 14 deletions.
2 changes: 2 additions & 0 deletions docs/users/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ Note that there is currently no guarantee for a stable Markdown formatting style
- Incorrect line wrap on lines right after a hard break.
Thank you, [MDW](https://github.com/mdeweerd), for the issue.
- Adding an extra leading space to paragraphs that start with space in line wrap modes.
- An error on empty paragraph (Unicode space only) surrounded by non-paragraph elements.
Thank you, [Nico Schlömer](https://github.com/nschloe), for the issue.
- Added
- Plugin interface: `mdformat.plugins.ParserExtensionInterface.add_cli_argument_group`.
With this plugins can now read CLI arguments merged with values from `.mdformat.toml`.
Expand Down
24 changes: 11 additions & 13 deletions src/mdformat/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@

NULL_CTX = nullcontext()
EMPTY_MAP: MappingProxyType = MappingProxyType({})

RE_NEWLINES = re.compile(r"\r\n|\r|\n")
RE_HTML_START_SPACE_PREFIX = re.compile(r" (<[a-zA-Z][-a-zA-Z0-9]*>)")
RE_HTML_END_SPACE_SUFFIX = re.compile(r"(</[a-zA-Z][-a-zA-Z0-9]*>) ")


def build_mdit(
Expand Down Expand Up @@ -62,13 +65,6 @@ def is_md_equal(
for key, text in [("md1", md1), ("md2", md2)]:
html = mdit.render(text)

# The HTML can start with whitespace if Markdown starts with raw HTML
# preceded by whitespace. This whitespace should be safe to lstrip.
# Also, the trailing newline we add at the end of a document that ends
# in a raw html block not followed by a newline, seems to propagate to
# an HTML rendering. This newline should be safe to rstrip.
html = html.strip()

# Remove codeblocks because code formatter plugins do arbitrary changes.
for codeclass in codeformatters:
html = re.sub(
Expand All @@ -85,17 +81,19 @@ def is_md_equal(
html = html.replace("<p> ", "<p>")
html = html.replace(" </p>", "</p>")

# Also strip whitespace leading/trailing the <p> elements so that we can
# safely remove empty paragraphs below without introducing extra whitespace.
html = html.replace(" <p>", "<p>")
html = html.replace("</p> ", "</p>")
# Also remove whitespace preceding opening tags, and trailing
# closing tags, so that we can safely remove empty paragraphs
# below without introducing extra whitespace.
html = RE_HTML_END_SPACE_SUFFIX.sub(r"\g<1>", html)
html = RE_HTML_START_SPACE_PREFIX.sub(r"\g<1>", html)

# empty p elements should be ignored by user agents
# (https://www.w3.org/TR/REC-html40/struct/text.html#edef-P)
html = html.replace("<p></p>", "")

# If it's nothing but whitespace, it's equal
html = re.sub(r"^\s+$", "", html)
# Leading and trailing whitespace should be safe to ignore. This
# also makes any documents that are whitespace-only equal.
html = html.strip()

html_texts[key] = html

Expand Down
2 changes: 1 addition & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_fmt_string():
pytest.param("a\n\n\xa0\n\nb"), # lone NBSP between two paragraphs
pytest.param("\xa0\n\n# heading"), # lone NBSP followed by a heading
pytest.param(
"```\na\n```\n\u2003\n# A\n", marks=pytest.mark.xfail()
"```\na\n```\n\u2003\n# A\n"
), # em space surrounded by code and header
],
)
Expand Down

0 comments on commit 27e2ba3

Please sign in to comment.