SETUP_SYNTAX_TABLE: syntax-propertize more lazily (bug#80600)

Many regexps do not pay attention to syntax tables at all
because they don't use things like \s, \b, \<, or \_<.
So delay the actual initialization of the machinery for
`syntax-table` properties (especially the call to `syntax-propertize`)
until the first time we actually need to consult the syntax of
a character.

* src/syntax.c (uninitialized_interval): New constant.
(RE_SETUP_SYNTAX_TABLE_FOR_OBJECT, SETUP_SYNTAX_TABLE):
Set `forw/backward_i` to that new constant to indicate they haven't
actually be initialized yet.
Set `start` and `stop` fields right away, instead of delegating it to
`update_syntax_table`, so that `e_property` and `b_property`
can be set independently.
(update_syntax_table): Remove the `init` argument.  Instead, discover
the need for initialization based on the fact that `forw/backward_i`
are uninitialized.  Presume `start` and `stop` fields have been
set already.
(parse_sexp_propertize): Update call accordingly.
(update_syntax_table_forward): Remove the `init` argument.

* src/syntax.h (update_syntax_table, update_syntax_table_forward):
Remove the `init` argument.
(UPDATE_SYNTAX_TABLE_FORWARD, UPDATE_SYNTAX_TABLE_BACKWARD):
Update calls accordingly.
This commit is contained in:
Stefan Monnier
2026-04-13 16:45:27 -04:00
parent 6b479f201c
commit 6942d508f0
2 changed files with 34 additions and 18 deletions

View File

@@ -235,6 +235,9 @@ SYNTAX_MATCH (int c)
return CONSP (ent) ? XCDR (ent) : Qnil;
}
/* A "dummy" value we never dereference, distinct from NULL. */
#define uninitialized_interval ((INTERVAL) (intptr_t) 1)
/* This should be called with FROM at the start of forward
search, or after the last position of the backward search. It
makes sure that the first char is picked up with correct table, so
@@ -252,11 +255,14 @@ SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
gl_state.object = Qnil;
if (parse_sexp_lookup_properties)
{
gl_state.start = gl_state.b_property;
gl_state.stop = gl_state.e_property;
gl_state.backward_i = gl_state.forward_i = uninitialized_interval;
if (count > 0)
update_syntax_table_forward (from, true, Qnil);
update_syntax_table_forward (from, Qnil);
else if (from > BEGV)
{
update_syntax_table (from - 1, count, true, Qnil);
update_syntax_table (from - 1, count, Qnil);
parse_sexp_propertize (from - 1);
}
}
@@ -295,8 +301,14 @@ RE_SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
gl_state.e_property = 1 + SCHARS (gl_state.object);
}
if (parse_sexp_lookup_properties)
update_syntax_table (RE_SYNTAX_TABLE_BYTE_TO_CHAR (frombyte),
1, 1, gl_state.object);
{
gl_state.start = gl_state.b_property;
gl_state.stop = gl_state.e_property;
/* Don't cover any char, to make sure we'll call `update_syntax_table`
* the very next time we need the syntax of a character. */
gl_state.e_property = gl_state.b_property;
gl_state.backward_i = gl_state.forward_i = uninitialized_interval;
}
}
/* Update gl_state to an appropriate interval which contains CHARPOS. The
@@ -313,7 +325,7 @@ RE_SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
start/end of OBJECT. */
void
update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
update_syntax_table (ptrdiff_t charpos, EMACS_INT count,
Lisp_Object object)
{
Lisp_Object tmp_table;
@@ -321,22 +333,27 @@ update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
bool invalidate = true;
INTERVAL i;
if (init)
if (gl_state.backward_i == uninitialized_interval)
{
eassert (gl_state.forward_i == uninitialized_interval);
gl_state.old_prop = Qnil;
gl_state.start = gl_state.b_property;
gl_state.stop = gl_state.e_property;
i = interval_of (charpos, object);
gl_state.backward_i = gl_state.forward_i = i;
invalidate = false;
if (!i)
return;
{
gl_state.b_property = gl_state.start;
gl_state.e_property = gl_state.stop;
return;
}
i = gl_state.forward_i;
gl_state.b_property = i->position;
gl_state.e_property = INTERVAL_LAST_POS (i);
}
else
{
eassert (gl_state.backward_i != uninitialized_interval);
eassert (gl_state.forward_i != uninitialized_interval);
i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
/* We are guaranteed to be called with CHARPOS either in i,
@@ -489,13 +506,12 @@ parse_sexp_propertize (ptrdiff_t charpos)
e_property_truncated, so the e_property_truncated flag may
occasionally be left raised spuriously. This should be rare. */
gl_state.e_property_truncated = false;
update_syntax_table_forward (charpos, false, Qnil);
update_syntax_table_forward (charpos, Qnil);
}
}
void
update_syntax_table_forward (ptrdiff_t charpos, bool init,
Lisp_Object object)
update_syntax_table_forward (ptrdiff_t charpos, Lisp_Object object)
{
if (gl_state.e_property_truncated)
{
@@ -505,7 +521,7 @@ update_syntax_table_forward (ptrdiff_t charpos, bool init,
}
else
{
update_syntax_table (charpos, 1, init, object);
update_syntax_table (charpos, 1, object);
if (NILP (object) && gl_state.e_property > syntax_propertize__done)
parse_sexp_propertize (charpos);
}
@@ -2187,7 +2203,7 @@ skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
while (!parse_sexp_lookup_properties
|| pos < gl_state.e_property);
update_syntax_table_forward (pos, false, gl_state.object);
update_syntax_table_forward (pos, gl_state.object);
}
}
else

View File

@@ -26,8 +26,8 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
INLINE_HEADER_BEGIN
extern void update_syntax_table (ptrdiff_t, EMACS_INT, bool, Lisp_Object);
extern void update_syntax_table_forward (ptrdiff_t, bool, Lisp_Object);
extern void update_syntax_table (ptrdiff_t, EMACS_INT, Lisp_Object);
extern void update_syntax_table_forward (ptrdiff_t, Lisp_Object);
/* The standard syntax table is stored where it will automatically
be used in all new buffers. */
@@ -175,7 +175,7 @@ INLINE void
UPDATE_SYNTAX_TABLE_FORWARD (ptrdiff_t charpos)
{ /* Performs just-in-time syntax-propertization. */
if (parse_sexp_lookup_properties && charpos >= gl_state.e_property)
update_syntax_table_forward (charpos, false, gl_state.object);
update_syntax_table_forward (charpos, gl_state.object);
}
/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
@@ -185,7 +185,7 @@ INLINE void
UPDATE_SYNTAX_TABLE_BACKWARD (ptrdiff_t charpos)
{
if (parse_sexp_lookup_properties && charpos < gl_state.b_property)
update_syntax_table (charpos, -1, false, gl_state.object);
update_syntax_table (charpos, -1, gl_state.object);
}
/* Make syntax table good for CHARPOS. */