From 6942d508f0c0aea17b72e1ce4a612425082316e5 Mon Sep 17 00:00:00 2001 From: Stefan Monnier Date: Mon, 13 Apr 2026 16:45:27 -0400 Subject: [PATCH] SETUP_SYNTAX_TABLE: syntax-propertize more lazily (bug#80600) Many regexps do not pay attention to syntax tables at all because they don't use things like \s, \b, \<, or \_<. So delay the actual initialization of the machinery for `syntax-table` properties (especially the call to `syntax-propertize`) until the first time we actually need to consult the syntax of a character. * src/syntax.c (uninitialized_interval): New constant. (RE_SETUP_SYNTAX_TABLE_FOR_OBJECT, SETUP_SYNTAX_TABLE): Set `forw/backward_i` to that new constant to indicate they haven't actually be initialized yet. Set `start` and `stop` fields right away, instead of delegating it to `update_syntax_table`, so that `e_property` and `b_property` can be set independently. (update_syntax_table): Remove the `init` argument. Instead, discover the need for initialization based on the fact that `forw/backward_i` are uninitialized. Presume `start` and `stop` fields have been set already. (parse_sexp_propertize): Update call accordingly. (update_syntax_table_forward): Remove the `init` argument. * src/syntax.h (update_syntax_table, update_syntax_table_forward): Remove the `init` argument. (UPDATE_SYNTAX_TABLE_FORWARD, UPDATE_SYNTAX_TABLE_BACKWARD): Update calls accordingly. --- src/syntax.c | 44 ++++++++++++++++++++++++++++++-------------- src/syntax.h | 8 ++++---- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/src/syntax.c b/src/syntax.c index 64d938b57c1..04b48e91529 100644 --- a/src/syntax.c +++ b/src/syntax.c @@ -235,6 +235,9 @@ SYNTAX_MATCH (int c) return CONSP (ent) ? XCDR (ent) : Qnil; } +/* A "dummy" value we never dereference, distinct from NULL. */ +#define uninitialized_interval ((INTERVAL) (intptr_t) 1) + /* This should be called with FROM at the start of forward search, or after the last position of the backward search. It makes sure that the first char is picked up with correct table, so @@ -252,11 +255,14 @@ SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count) gl_state.object = Qnil; if (parse_sexp_lookup_properties) { + gl_state.start = gl_state.b_property; + gl_state.stop = gl_state.e_property; + gl_state.backward_i = gl_state.forward_i = uninitialized_interval; if (count > 0) - update_syntax_table_forward (from, true, Qnil); + update_syntax_table_forward (from, Qnil); else if (from > BEGV) { - update_syntax_table (from - 1, count, true, Qnil); + update_syntax_table (from - 1, count, Qnil); parse_sexp_propertize (from - 1); } } @@ -295,8 +301,14 @@ RE_SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object, gl_state.e_property = 1 + SCHARS (gl_state.object); } if (parse_sexp_lookup_properties) - update_syntax_table (RE_SYNTAX_TABLE_BYTE_TO_CHAR (frombyte), - 1, 1, gl_state.object); + { + gl_state.start = gl_state.b_property; + gl_state.stop = gl_state.e_property; + /* Don't cover any char, to make sure we'll call `update_syntax_table` + * the very next time we need the syntax of a character. */ + gl_state.e_property = gl_state.b_property; + gl_state.backward_i = gl_state.forward_i = uninitialized_interval; + } } /* Update gl_state to an appropriate interval which contains CHARPOS. The @@ -313,7 +325,7 @@ RE_SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object, start/end of OBJECT. */ void -update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init, +update_syntax_table (ptrdiff_t charpos, EMACS_INT count, Lisp_Object object) { Lisp_Object tmp_table; @@ -321,22 +333,27 @@ update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init, bool invalidate = true; INTERVAL i; - if (init) + if (gl_state.backward_i == uninitialized_interval) { + eassert (gl_state.forward_i == uninitialized_interval); gl_state.old_prop = Qnil; - gl_state.start = gl_state.b_property; - gl_state.stop = gl_state.e_property; i = interval_of (charpos, object); gl_state.backward_i = gl_state.forward_i = i; invalidate = false; if (!i) - return; + { + gl_state.b_property = gl_state.start; + gl_state.e_property = gl_state.stop; + return; + } i = gl_state.forward_i; gl_state.b_property = i->position; gl_state.e_property = INTERVAL_LAST_POS (i); } else { + eassert (gl_state.backward_i != uninitialized_interval); + eassert (gl_state.forward_i != uninitialized_interval); i = count > 0 ? gl_state.forward_i : gl_state.backward_i; /* We are guaranteed to be called with CHARPOS either in i, @@ -489,13 +506,12 @@ parse_sexp_propertize (ptrdiff_t charpos) e_property_truncated, so the e_property_truncated flag may occasionally be left raised spuriously. This should be rare. */ gl_state.e_property_truncated = false; - update_syntax_table_forward (charpos, false, Qnil); + update_syntax_table_forward (charpos, Qnil); } } void -update_syntax_table_forward (ptrdiff_t charpos, bool init, - Lisp_Object object) +update_syntax_table_forward (ptrdiff_t charpos, Lisp_Object object) { if (gl_state.e_property_truncated) { @@ -505,7 +521,7 @@ update_syntax_table_forward (ptrdiff_t charpos, bool init, } else { - update_syntax_table (charpos, 1, init, object); + update_syntax_table (charpos, 1, object); if (NILP (object) && gl_state.e_property > syntax_propertize__done) parse_sexp_propertize (charpos); } @@ -2187,7 +2203,7 @@ skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim) while (!parse_sexp_lookup_properties || pos < gl_state.e_property); - update_syntax_table_forward (pos, false, gl_state.object); + update_syntax_table_forward (pos, gl_state.object); } } else diff --git a/src/syntax.h b/src/syntax.h index 9ed917b1a93..1e32a485eb1 100644 --- a/src/syntax.h +++ b/src/syntax.h @@ -26,8 +26,8 @@ along with GNU Emacs. If not, see . */ INLINE_HEADER_BEGIN -extern void update_syntax_table (ptrdiff_t, EMACS_INT, bool, Lisp_Object); -extern void update_syntax_table_forward (ptrdiff_t, bool, Lisp_Object); +extern void update_syntax_table (ptrdiff_t, EMACS_INT, Lisp_Object); +extern void update_syntax_table_forward (ptrdiff_t, Lisp_Object); /* The standard syntax table is stored where it will automatically be used in all new buffers. */ @@ -175,7 +175,7 @@ INLINE void UPDATE_SYNTAX_TABLE_FORWARD (ptrdiff_t charpos) { /* Performs just-in-time syntax-propertization. */ if (parse_sexp_lookup_properties && charpos >= gl_state.e_property) - update_syntax_table_forward (charpos, false, gl_state.object); + update_syntax_table_forward (charpos, gl_state.object); } /* Make syntax table state (gl_state) good for CHARPOS, assuming it is @@ -185,7 +185,7 @@ INLINE void UPDATE_SYNTAX_TABLE_BACKWARD (ptrdiff_t charpos) { if (parse_sexp_lookup_properties && charpos < gl_state.b_property) - update_syntax_table (charpos, -1, false, gl_state.object); + update_syntax_table (charpos, -1, gl_state.object); } /* Make syntax table good for CHARPOS. */