[latex3-commits] [git/LaTeX3-latex3-latex3] main: Document preference of alternatives in l3regex (fixes #904) (5aebcb94f)

Mon May 10 23:22:12 CEST 2021

Repository : https://github.com/latex3/latex3
On branch  : main
Link       : https://github.com/latex3/latex3/commit/5aebcb94f1b10d8fab03fe28f4ed826b8c4f3bf5

>---------------------------------------------------------------

commit 5aebcb94f1b10d8fab03fe28f4ed826b8c4f3bf5
Author: Bruno Le Floch <blflatex at gmail.com>
Date:   Mon May 10 23:22:12 2021 +0200

    Document preference of alternatives in l3regex (fixes #904)


>---------------------------------------------------------------

5aebcb94f1b10d8fab03fe28f4ed826b8c4f3bf5
 l3kernel/l3regex.dtx | 6 +++++-
 l3kernel/l3token.dtx | 9 +++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 0824cbcb9..b6ea1f31e 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -270,11 +270,15 @@
 %   \item[\{$n,m$\}] At least $n$, no more than $m$, greedy.
 %   \item[\{$n,m$\}?] At least $n$, no more than $m$, lazy.
 % \end{l3regex-syntax}
+% For greedy quantifiers the regex code will first investigate matches
+% that involve as many repetitions as possible, while for lazy
+% quantifiers it investigates matches with as few repetitions as
+% possible first.
 %
 % Alternation and capturing groups.
 % \begin{l3regex-syntax}
 %   \item[A\char`|B\char`|C] Either one of \texttt{A}, \texttt{B},
-%     or \texttt{C}.
+%     or \texttt{C}, investigating \texttt{A} first.
 %   \item[(\ldots{})] Capturing group.
 %   \item[(?:\ldots{})] Non-capturing group.
 %   \item[(?\char`|\ldots{})] Non-capturing group which resets
diff --git a/l3kernel/l3token.dtx b/l3kernel/l3token.dtx
index 3db1f706c..6911ab17f 100644
--- a/l3kernel/l3token.dtx
+++ b/l3kernel/l3token.dtx
@@ -1080,6 +1080,15 @@
 %     \cs{peek_regex:nTF} as control sequences, while functions that
 %     inspect individual tokens (for instance \cs{peek_charcode:NTF})
 %     only take into account their meaning.
+%
+%     The \cs{peek_regex:nTF} function only inspects as few tokens as
+%     necessary to determine whether the regular expression matches.
+%     For instance \cs{peek_regex:nTF} \verb"{ abc | [a-z] }" |{ } { }|
+%     |abc| will only inspect the first token~|a| even though the first
+%     branch |abc| of the alternative is preferred in functions such as
+%     \cs{peek_regex_remove_once:n}.  This may have an effect on
+%     tokenization if the input stream has not yet been tokenized and
+%     category codes are changed.
 %   \end{texnote}
 % \end{function}
 %