From 5c75d23a02c52ee8b226ce53e3fb5339e4593757 Mon Sep 17 00:00:00 2001 From: Joshua Katz Date: Sat, 4 Mar 2023 12:22:11 -0500 Subject: [PATCH 1/6] Ignore nested STRONGs during rendering STRONG, in most rendering engines, becomes bold. Bold cannot be applied to text two times in most languages. This caps the number of times we attempt to bold text when rendering. Running `python3 -c 'pad = "_" * 100000; print(pad + "." + pad, end="")' | time ./build/src/cmark-gfm --to $LANG` Before: ``` ./build/src/cmark-gfm --to plaintext > /dev/null 12.29s user 0.00s system 99% cpu 12.321 total ./build/src/cmark-gfm --to commonmark > /dev/null 25.97s user 0.01s system 99% cpu 26.026 total ./build/src/cmark-gfm --to html > /dev/null 0.01s user 0.00s system 43% cpu 0.033 total ./build/src/cmark-gfm --to man > /dev/null 12.91s user 0.00s system 99% cpu 12.938 total ./build/src/cmark-gfm --to latex > /dev/null 13.13s user 0.01s system 99% cpu 13.159 total ``` After: ``` ./build/src/cmark-gfm --to plaintext > /dev/null 0.01s user 0.01s system 39% cpu 0.030 total ./build/src/cmark-gfm --to commonmark > /dev/null 0.01s user 0.00s system 41% cpu 0.031 total ./build/src/cmark-gfm --to html > /dev/null 0.01s user 0.00s system 38% cpu 0.030 total ./build/src/cmark-gfm --to man > /dev/null 0.01s user 0.01s system 40% cpu 0.030 total ./build/src/cmark-gfm --to latex > /dev/null 0.01s user 0.00s system 39% cpu 0.033 total ``` --- src/commonmark.c | 29 +++++++++++++++++------------ src/html.c | 10 ++++++---- src/latex.c | 10 ++++++---- src/man.c | 10 ++++++---- src/plaintext.c | 19 +++++++++++-------- 5 files changed, 46 insertions(+), 32 deletions(-) diff --git a/src/commonmark.c b/src/commonmark.c index 2e0719443..8d0644d57 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -189,14 +189,17 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, // Don't adjust tight list status til we've started the list. // Otherwise we loose the blank line between a paragraph and // a following list. - if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) { - tmp = get_containing_block(node); - renderer->in_tight_list_item = - tmp && // tmp might be NULL if there is no containing block - ((tmp->type == CMARK_NODE_ITEM && - cmark_node_get_list_tight(tmp->parent)) || - (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM && - cmark_node_get_list_tight(tmp->parent->parent))); + if (entering) { + if (node->parent && node->parent->type == CMARK_NODE_ITEM) { + renderer->in_tight_list_item = node->parent->parent->as.list.tight; + } + } else { + if (node->type == CMARK_NODE_LIST) { + renderer->in_tight_list_item = + node->parent && + node->parent->type == CMARK_NODE_ITEM && + node->parent->parent->as.list.tight; + } } if (node->extension && node->extension->commonmark_render_func) { @@ -405,10 +408,12 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, break; case CMARK_NODE_STRONG: - if (entering) { - LIT("**"); - } else { - LIT("**"); + if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) { + if (entering) { + LIT("**"); + } else { + LIT("**"); + } } break; diff --git a/src/html.c b/src/html.c index 529e0ea31..22513c939 100644 --- a/src/html.c +++ b/src/html.c @@ -364,10 +364,12 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, break; case CMARK_NODE_STRONG: - if (entering) { - cmark_strbuf_puts(html, ""); - } else { - cmark_strbuf_puts(html, ""); + if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) { + if (entering) { + cmark_strbuf_puts(html, ""); + } else { + cmark_strbuf_puts(html, ""); + } } break; diff --git a/src/latex.c b/src/latex.c index 8be15b0d5..1a6367a4e 100644 --- a/src/latex.c +++ b/src/latex.c @@ -385,10 +385,12 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, break; case CMARK_NODE_STRONG: - if (entering) { - LIT("\\textbf{"); - } else { - LIT("}"); + if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) { + if (entering) { + LIT("\\textbf{"); + } else { + LIT("}"); + } } break; diff --git a/src/man.c b/src/man.c index 441a96e49..da0706509 100644 --- a/src/man.c +++ b/src/man.c @@ -225,10 +225,12 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, break; case CMARK_NODE_STRONG: - if (entering) { - LIT("\\f[B]"); - } else { - LIT("\\f[]"); + if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) { + if (entering) { + LIT("\\f[B]"); + } else { + LIT("\\f[]"); + } } break; diff --git a/src/plaintext.c b/src/plaintext.c index b25e4a396..d9c6a060b 100644 --- a/src/plaintext.c +++ b/src/plaintext.c @@ -46,14 +46,17 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, // Don't adjust tight list status til we've started the list. // Otherwise we loose the blank line between a paragraph and // a following list. - if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) { - tmp = get_containing_block(node); - renderer->in_tight_list_item = - tmp && // tmp might be NULL if there is no containing block - ((tmp->type == CMARK_NODE_ITEM && - cmark_node_get_list_tight(tmp->parent)) || - (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM && - cmark_node_get_list_tight(tmp->parent->parent))); + if (entering) { + if (node->parent && node->parent->type == CMARK_NODE_ITEM) { + renderer->in_tight_list_item = node->parent->parent->as.list.tight; + } + } else { + if (node->type == CMARK_NODE_LIST) { + renderer->in_tight_list_item = + node->parent && + node->parent->type == CMARK_NODE_ITEM && + node->parent->parent->as.list.tight; + } } if (node->extension && node->extension->plaintext_render_func) { From 828322d1ee4facdab56f0d3edccb13e9af90dcd2 Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Sat, 25 Mar 2023 14:24:25 +0000 Subject: [PATCH 2/6] Update expected output --- test/spec.txt | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/test/spec.txt b/test/spec.txt index eaab9032d..d42f3369e 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -6926,7 +6926,7 @@ foo__bar__ ```````````````````````````````` example __foo, __bar__, baz__ . -

foo, bar, baz

+

foo, bar, baz

```````````````````````````````` @@ -7197,7 +7197,7 @@ foo***bar***baz ```````````````````````````````` example foo******bar*********baz . -

foobar***baz

+

foobar***baz

```````````````````````````````` @@ -7268,21 +7268,21 @@ __foo _bar_ baz__ ```````````````````````````````` example __foo __bar__ baz__ . -

foo bar baz

+

foo bar baz

```````````````````````````````` ```````````````````````````````` example ____foo__ bar__ . -

foo bar

+

foo bar

```````````````````````````````` ```````````````````````````````` example **foo **bar**** . -

foo bar

+

foo bar

```````````````````````````````` @@ -7567,14 +7567,14 @@ switching delimiters: ```````````````````````````````` example ****foo**** . -

foo

+

foo

```````````````````````````````` ```````````````````````````````` example ____foo____ . -

foo

+

foo

```````````````````````````````` @@ -7585,7 +7585,7 @@ delimiters: ```````````````````````````````` example ******foo****** . -

foo

+

foo

```````````````````````````````` @@ -7601,7 +7601,7 @@ Rule 14: ```````````````````````````````` example _____foo_____ . -

foo

+

foo

```````````````````````````````` From f7e31f8d7af9e9e7b76a68055a9d9b4a25e26286 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 23 Aug 2020 10:58:08 -0700 Subject: [PATCH 3/6] Add MAX_INDENT for xml. Otherwise we can get quadratic increase in size with deeply nested structures. See #355. --- src/xml.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/xml.c b/src/xml.c index 2975bf96c..5753e5ab9 100644 --- a/src/xml.c +++ b/src/xml.c @@ -11,6 +11,7 @@ #include "syntax_extension.h" #define BUFFER_SIZE 100 +#define MAX_INDENT 40 // Functions to convert cmark_nodes to XML strings. @@ -26,7 +27,7 @@ struct render_state { static CMARK_INLINE void indent(struct render_state *state) { int i; - for (i = 0; i < state->indent; i++) { + for (i = 0; i < state->indent && i < MAX_INDENT; i++) { cmark_strbuf_putc(state->xml, ' '); } } From 763587e8775350b8cb4a2aa0f4cec3685aa96e8b Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Tue, 28 Mar 2023 11:07:06 +0100 Subject: [PATCH 4/6] Fix quadratic performance issue in list numbering. --- src/commonmark.c | 8 ++------ src/man.c | 8 ++------ src/plaintext.c | 8 ++------ src/render.c | 4 ++-- src/render.h | 1 + 5 files changed, 9 insertions(+), 20 deletions(-) diff --git a/src/commonmark.c b/src/commonmark.c index 8d0644d57..843cf710f 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -231,19 +231,15 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, LIT(""); BLANKLINE(); } + renderer->list_number = cmark_node_get_list_start(node); break; case CMARK_NODE_ITEM: if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { marker_width = 4; } else { - list_number = cmark_node_get_list_start(node->parent); + list_number = renderer->list_number++; list_delim = cmark_node_get_list_delim(node->parent); - tmp = node; - while (tmp->prev) { - tmp = tmp->prev; - list_number += 1; - } // we ensure a width of at least 4 so // we get nice transition from single digits // to double diff --git a/src/man.c b/src/man.c index da0706509..c6383bf86 100644 --- a/src/man.c +++ b/src/man.c @@ -114,6 +114,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, break; case CMARK_NODE_LIST: + renderer->list_number = cmark_node_get_list_start(node); break; case CMARK_NODE_ITEM: @@ -123,12 +124,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { LIT("\\[bu] 2"); } else { - list_number = cmark_node_get_list_start(node->parent); - tmp = node; - while (tmp->prev) { - tmp = tmp->prev; - list_number += 1; - } + list_number = renderer->list_number++; char list_number_s[LIST_NUMBER_SIZE]; snprintf(list_number_s, LIST_NUMBER_SIZE, "\"%d.\" 4", list_number); LIT(list_number_s); diff --git a/src/plaintext.c b/src/plaintext.c index d9c6a060b..e708165c7 100644 --- a/src/plaintext.c +++ b/src/plaintext.c @@ -76,19 +76,15 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, node->next->type == CMARK_NODE_LIST)) { CR(); } + renderer->list_number = cmark_node_get_list_start(node); break; case CMARK_NODE_ITEM: if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { marker_width = 4; } else { - list_number = cmark_node_get_list_start(node->parent); + list_number = renderer->list_number++; list_delim = cmark_node_get_list_delim(node->parent); - tmp = node; - while (tmp->prev) { - tmp = tmp->prev; - list_number += 1; - } // we ensure a width of at least 4 so // we get nice transition from single digits // to double diff --git a/src/render.c b/src/render.c index 02e9e838b..c6dd23473 100644 --- a/src/render.c +++ b/src/render.c @@ -177,8 +177,8 @@ char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, cmark_renderer renderer = {mem, &buf, &pref, 0, width, 0, 0, true, true, false, - false, outc, S_cr, S_blankline, S_out, - 0}; + false, 0, outc, S_cr, S_blankline, + S_out, 0}; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); diff --git a/src/render.h b/src/render.h index 4a68d1e07..aa5162f94 100644 --- a/src/render.h +++ b/src/render.h @@ -23,6 +23,7 @@ struct cmark_renderer { bool begin_content; bool no_linebreaks; bool in_tight_list_item; + int list_number; void (*outc)(struct cmark_renderer *, cmark_node *, cmark_escaping, int32_t, unsigned char); void (*cr)(struct cmark_renderer *); void (*blankline)(struct cmark_renderer *); From 78e1cc07692c1cd7d12c3e64da8047e82e84b6cb Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Tue, 28 Mar 2023 14:07:09 +0100 Subject: [PATCH 5/6] Add ancestor_extension field. --- src/node.h | 8 ++++++++ src/render.c | 13 ++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/node.h b/src/node.h index e025e25d6..fa47e4468 100644 --- a/src/node.h +++ b/src/node.h @@ -82,6 +82,14 @@ struct cmark_node { cmark_syntax_extension *extension; + /** + * Used during cmark_render() to cache the most recent non-NULL + * extension, if you go up the parent chain like this: + * + * node->parent->...parent->extension + */ + cmark_syntax_extension *ancestor_extension; + union { int ref_ix; int def_count; diff --git a/src/render.c b/src/render.c index c6dd23473..d7a83ebfb 100644 --- a/src/render.c +++ b/src/render.c @@ -31,13 +31,7 @@ static void S_out(cmark_renderer *renderer, cmark_node *node, cmark_chunk remainder = cmark_chunk_literal(""); int k = renderer->buffer->size - 1; - cmark_syntax_extension *ext = NULL; - cmark_node *n = node; - while (n && !ext) { - ext = n->extension; - if (!ext) - n = n->parent; - } + cmark_syntax_extension *ext = node->ancestor_extension; if (ext && !ext->commonmark_escape_func) ext = NULL; @@ -182,6 +176,11 @@ char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); + if (cur->extension) { + cur->ancestor_extension = cur->extension; + } else if (cur->parent) { + cur->ancestor_extension = cur->parent->ancestor_extension; + } if (!render_node(&renderer, cur, ev_type, options)) { // a false value causes us to skip processing // the node's contents. this is used for From bd4f96e7fd06ce8cb61e6f4ba41972d2a8b91d24 Mon Sep 17 00:00:00 2001 From: Kevin Backhouse Date: Wed, 29 Mar 2023 21:30:22 +0100 Subject: [PATCH 6/6] Remove dead code --- src/commonmark.c | 15 --------------- src/man.c | 1 - src/plaintext.c | 15 --------------- 3 files changed, 31 deletions(-) diff --git a/src/commonmark.c b/src/commonmark.c index 843cf710f..f2210cdfb 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -153,23 +153,8 @@ static bool is_autolink(cmark_node *node) { link_text->as.literal.len) == 0); } -// if node is a block node, returns node. -// otherwise returns first block-level node that is an ancestor of node. -// if there is no block-level ancestor, returns NULL. -static cmark_node *get_containing_block(cmark_node *node) { - while (node) { - if (CMARK_NODE_BLOCK_P(node)) { - return node; - } else { - node = node->parent; - } - } - return NULL; -} - static int S_render_node(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { - cmark_node *tmp; int list_number; cmark_delim_type list_delim; int numticks; diff --git a/src/man.c b/src/man.c index c6383bf86..e40e46ce2 100644 --- a/src/man.c +++ b/src/man.c @@ -74,7 +74,6 @@ static void S_outc(cmark_renderer *renderer, cmark_node *node, static int S_render_node(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { - cmark_node *tmp; int list_number; bool entering = (ev_type == CMARK_EVENT_ENTER); bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); diff --git a/src/plaintext.c b/src/plaintext.c index e708165c7..a40476210 100644 --- a/src/plaintext.c +++ b/src/plaintext.c @@ -16,23 +16,8 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node, cmark_render_code_point(renderer, c); } -// if node is a block node, returns node. -// otherwise returns first block-level node that is an ancestor of node. -// if there is no block-level ancestor, returns NULL. -static cmark_node *get_containing_block(cmark_node *node) { - while (node) { - if (CMARK_NODE_BLOCK_P(node)) { - return node; - } else { - node = node->parent; - } - } - return NULL; -} - static int S_render_node(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { - cmark_node *tmp; int list_number; cmark_delim_type list_delim; int i;