[latex3-commits] [git/LaTeX3-latex3-latex3] main: Document preference of alternatives in l3regex (fixes #904) (5aebcb94f)
Bruno Le Floch
blflatex at gmail.com
Mon May 10 23:22:12 CEST 2021
Repository : https://github.com/latex3/latex3
On branch : main
Link : https://github.com/latex3/latex3/commit/5aebcb94f1b10d8fab03fe28f4ed826b8c4f3bf5
>---------------------------------------------------------------
commit 5aebcb94f1b10d8fab03fe28f4ed826b8c4f3bf5
Author: Bruno Le Floch <blflatex at gmail.com>
Date: Mon May 10 23:22:12 2021 +0200
Document preference of alternatives in l3regex (fixes #904)
>---------------------------------------------------------------
5aebcb94f1b10d8fab03fe28f4ed826b8c4f3bf5
l3kernel/l3regex.dtx | 6 +++++-
l3kernel/l3token.dtx | 9 +++++++++
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 0824cbcb9..b6ea1f31e 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -270,11 +270,15 @@
% \item[\{$n,m$\}] At least $n$, no more than $m$, greedy.
% \item[\{$n,m$\}?] At least $n$, no more than $m$, lazy.
% \end{l3regex-syntax}
+% For greedy quantifiers the regex code will first investigate matches
+% that involve as many repetitions as possible, while for lazy
+% quantifiers it investigates matches with as few repetitions as
+% possible first.
%
% Alternation and capturing groups.
% \begin{l3regex-syntax}
% \item[A\char`|B\char`|C] Either one of \texttt{A}, \texttt{B},
-% or \texttt{C}.
+% or \texttt{C}, investigating \texttt{A} first.
% \item[(\ldots{})] Capturing group.
% \item[(?:\ldots{})] Non-capturing group.
% \item[(?\char`|\ldots{})] Non-capturing group which resets
diff --git a/l3kernel/l3token.dtx b/l3kernel/l3token.dtx
index 3db1f706c..6911ab17f 100644
--- a/l3kernel/l3token.dtx
+++ b/l3kernel/l3token.dtx
@@ -1080,6 +1080,15 @@
% \cs{peek_regex:nTF} as control sequences, while functions that
% inspect individual tokens (for instance \cs{peek_charcode:NTF})
% only take into account their meaning.
+%
+% The \cs{peek_regex:nTF} function only inspects as few tokens as
+% necessary to determine whether the regular expression matches.
+% For instance \cs{peek_regex:nTF} \verb"{ abc | [a-z] }" |{ } { }|
+% |abc| will only inspect the first token~|a| even though the first
+% branch |abc| of the alternative is preferred in functions such as
+% \cs{peek_regex_remove_once:n}. This may have an effect on
+% tokenization if the input stream has not yet been tokenized and
+% category codes are changed.
% \end{texnote}
% \end{function}
%
More information about the latex3-commits
mailing list.