[latex3-commits] [git/LaTeX3-latex3-latex3] master: Implement, document, and test \peek_regex_replace_once:nnTF (90a1bfd17)

Joseph Wright joseph.wright at morningstar2.co.uk
Thu Dec 3 17:27:34 CET 2020


Repository : https://github.com/latex3/latex3
On branch  : master
Link       : https://github.com/latex3/latex3/commit/90a1bfd17e04b8fea4905fc1499462eb006b9eda

>---------------------------------------------------------------

commit 90a1bfd17e04b8fea4905fc1499462eb006b9eda
Author: Bruno Le Floch <blflatex at gmail.com>
Date:   Thu Jul 23 02:38:02 2020 +0200

    Implement, document, and test \peek_regex_replace_once:nnTF


>---------------------------------------------------------------

90a1bfd17e04b8fea4905fc1499462eb006b9eda
 l3kernel/CHANGELOG.md             |   3 +-
 l3kernel/l3regex.dtx              | 341 ++++++++++++++++++++++++++++++++------
 l3kernel/l3token.dtx              |  30 +++-
 l3kernel/testfiles/m3regex011.lvt |  14 ++
 l3kernel/testfiles/m3regex011.tlg |  21 +++
 5 files changed, 355 insertions(+), 54 deletions(-)

diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index 110e6f7a3..b4d610564 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -9,7 +9,8 @@ this project uses date-based 'snapshot' version identifiers.
 
 ### Added
 - `\peek_analysis_map_inlione:n`
-- `\peek_regex:nTF` and `\peek_regex_remove:nTF`
+  `\peek_regex_replace_once:nnTF`
+- `\peek_regex:nTF`, `\peek_regex_remove:nTF`, and
 
 ### Unchanged
 - Extend `\text_expand:n` to cover `\@protected at testopt`
diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 50aec9a53..d1ec33dea 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -5133,6 +5133,14 @@
 %    \end{macrocode}
 % \end{macro}
 %
+% \begin{macro}{\@@_replacement_exp_not:V}
+%   This is used for the implementation of~|\u|, and it gets redefined
+%   for \cs{peek_regex_replace_once:nnTF}.
+%    \begin{macrocode}
+\cs_new_eq:NN \@@_replacement_exp_not:V \exp_not:V
+%    \end{macrocode}
+% \end{macro}
+%
 % \subsubsection{Query and brace balance}
 %
 % \begin{macro}[rEXP]{\@@_query_range:nn}
@@ -5287,6 +5295,14 @@
 % \end{macro}
 % \end{macro}
 %
+% \begin{macro}{\@@_replacement_put:n}
+%   This gets redefined for \cs{peek_regex_replace_once:nnTF}.
+%    \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement_put:n
+  { \tl_build_put_right:Nn \l_@@_build_tl }
+%    \end{macrocode}
+% \end{macro}
+%
 % \begin{macro}{\@@_replacement_normal:n}
 %   Most characters are simply sent to the output by
 %   \cs{tl_build_put_right:Nn}, unless a particular category code has been
@@ -5296,11 +5312,13 @@
 %   sequence is non-empty there: it contains an empty entry
 %   corresponding to the initial value of
 %   \cs{l_@@_replacement_category_tl}.
+%   The argument |#1| can be a space, otherwise it is a single
+%   character.
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_replacement_normal:n #1
   {
     \tl_if_empty:NTF \l_@@_replacement_category_tl
-      { \tl_build_put_right:Nn \l_@@_build_tl {#1} }
+      { \@@_replacement_put:n {#1} }
       { % (
         \token_if_eq_charcode:NNTF #1 )
           {
@@ -5343,7 +5361,7 @@
 %
 % \subsubsection{Submatches}
 %
-% \begin{macro}{\@@_replacement_put_submatch:n}
+% \begin{macro}{\@@_replacement_put_submatch:n, \@@_replacement_put_submatch_aux:n}
 %   Insert a submatch in the replacement text. This is dropped if the
 %   submatch number is larger than the number of capturing groups.
 %   Unless the submatch appears inside a |\c{...}| or |\u{...}|
@@ -5356,15 +5374,19 @@
 \cs_new_protected:Npn \@@_replacement_put_submatch:n #1
   {
     \if_int_compare:w #1 < \l_@@_capturing_group_int
-      \tl_build_put_right:Nn \l_@@_build_tl
-        { \@@_query_submatch:n { \int_eval:n { #1 + ##1 } } }
-      \if_int_compare:w \l_@@_replacement_csnames_int = 0 \exp_stop_f:
-        \tl_put_right:Nn \l_@@_balance_tl
-          {
-            + \@@_submatch_balance:n
-              { \exp_not:N \int_eval:n { #1 + ##1 } }
-          }
-      \fi:
+      \@@_replacement_put_submatch_aux:n {#1}
+    \fi:
+  }
+\cs_new_protected:Npn \@@_replacement_put_submatch_aux:n #1
+  {
+    \tl_build_put_right:Nn \l_@@_build_tl
+      { \@@_query_submatch:n { \int_eval:n { #1 + ##1 } } }
+    \if_int_compare:w \l_@@_replacement_csnames_int = 0 \exp_stop_f:
+      \tl_put_right:Nn \l_@@_balance_tl
+        {
+          + \@@_submatch_balance:n
+            { \exp_not:N \int_eval:n { #1 + ##1 } }
+        }
     \fi:
   }
 %    \end{macrocode}
@@ -5470,7 +5492,7 @@
   {
     \@@_two_if_eq:NNNNTF
       #1 #2 \@@_replacement_normal:n \c_left_brace_str
-      { \@@_replacement_cu_aux:Nw \exp_not:V }
+      { \@@_replacement_cu_aux:Nw \@@_replacement_exp_not:V }
       { \@@_replacement_error:NNN u #1#2 }
   }
 %    \end{macrocode}
@@ -5561,7 +5583,7 @@
   \cs_new_protected:Npn \@@_replacement_char:nNN #1#2#3
     {
       \tex_lccode:D 0 = `#3 \scan_stop:
-      \tex_lowercase:D { \tl_build_put_right:Nn \l_@@_build_tl {#1} }
+      \tex_lowercase:D { \@@_replacement_put:n {#1} }
     }
 %    \end{macrocode}
 % \end{macro}
@@ -5607,7 +5629,7 @@
   \cs_new_protected:Npn \@@_replacement_c_C:w #1#2
     {
       \tl_build_put_right:Nn \l_@@_build_tl
-        { \exp_not:N \exp_not:N \exp_not:c {#2} }
+        { \exp_not:N \@@_replacement_exp_not:N \exp_not:c {#2} }
     }
 %    \end{macrocode}
 % \end{macro}
@@ -5694,7 +5716,7 @@
         \__kernel_msg_error:nn { kernel } { replacement-null-space }
       \fi:
       \tex_lccode:D `\ = `#2 \scan_stop:
-      \tex_lowercase:D { \tl_build_put_right:Nn \l_@@_build_tl {~} }
+      \tex_lowercase:D { \@@_replacement_put:n {~} }
     }
 %    \end{macrocode}
 % \end{macro}
@@ -6225,13 +6247,13 @@
     \group_begin:
       \@@_single_match:
       #1
-      \@@_replacement:n {#2}
       \exp_args:No \@@_match:n {#3}
       \if_meaning:w \c_false_bool \g_@@_success_bool
         \group_end:
       \else:
         \@@_extract:
         \exp_args:No \@@_query_set:n {#3}
+        \@@_replacement:n {#2}
         \int_set:Nn \l_@@_balance_int
           {
             \@@_replacement_balance_one_match:n
@@ -6271,9 +6293,9 @@
     \group_begin:
       \@@_multi_match:n { \@@_extract: }
       #1
-      \@@_replacement:n {#2}
       \exp_args:No \@@_match:n {#3}
       \exp_args:No \@@_query_set:n {#3}
+      \@@_replacement:n {#2}
       \int_set:Nn \l_@@_balance_int
         {
           0
@@ -6343,28 +6365,28 @@
 %    \end{macrocode}
 % \end{variable}
 %
-% \begin{variable}{\l_@@_reinsert_tl}
-% \begin{macro}[EXP]{\@@_reinsert:n}
-%   Token list such that hitting it with \cs{exp:w} will expand to all
-%   tokens we found.  It is constructed using the |tl_build| machinery
-%   and takes the form of one call to \cs{@@_reinsert:n} for each token
-%   to reinsert.  The argument is something that \texttt{o}-expands to
-%   the single token we wish to put back.
+% \begin{variable}{\l_@@_replacement_tl}
+%   When peeking in \cs{peek_regex_replace_once:nnTF} we need to store
+%   the replacement text.
 %    \begin{macrocode}
-\tl_new:N \l_@@_reinsert_tl
-\cs_new:Npn \@@_reinsert:n #1
-  {
-    \exp_after:wN \exp_after:wN
-    \exp_after:wN \exp_end:
-    \exp_after:wN \exp_after:wN
-    #1
-    \exp:w
-  }
+\tl_new:N \l_@@_replacement_tl
+%    \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_input_tl}
+% \begin{macro}{\@@_input_item:n}
+%   Stores each token found as \cs{@@_input_item:n} \Arg{tokens}, where
+%   the \meta{tokens} \texttt{o}-expand to the token found, as for
+%   \cs{tl_analysis_map_inline:nn}.
+%    \begin{macrocode}
+\tl_new:N \l_@@_input_tl
+\cs_new_eq:NN \@@_input_item:n ?
 %    \end{macrocode}
 % \end{macro}
 % \end{variable}
 %
-% \begin{macro}[TF]{\peek_regex:n, \peek_regex:N, \peek_regex_remove:n, \peek_regex_remove:N}
+% \begin{macro}[TF]
+%   {\peek_regex:n, \peek_regex:N, \peek_regex_remove:n, \peek_regex_remove:N}
 %   The |T| and |F| functions just call the corresponding |TF| function.
 %   The four |TF| functions differ along two axes: whether to remove the
 %   token or not, distinguished by using \cs{@@_peek_end:} or
@@ -6379,24 +6401,27 @@
 %    \begin{macrocode}
 \cs_new_protected:Npn \peek_regex:nTF #1
   {
-    \@@_peek:nnTF { \@@_peek_end: }
+    \@@_peek:nnTF
       { \@@_build_aux:Nn \c_false_bool {#1} }
+      { \@@_peek_end: }
   }
 \cs_new_protected:Npn \peek_regex:nT #1#2
   { \peek_regex:nTF {#1} {#2} { } }
 \cs_new_protected:Npn \peek_regex:nF #1 { \peek_regex:nTF {#1} { } }
 \cs_new_protected:Npn \peek_regex:NTF #1
   {
-    \@@_peek:nnTF { \@@_peek_end: }
+    \@@_peek:nnTF
       { \@@_build_aux:NN \c_false_bool #1 }
+      { \@@_peek_end: }
   }
 \cs_new_protected:Npn \peek_regex:NT #1#2
   { \peek_regex:NTF #1 {#2} { } }
 \cs_new_protected:Npn \peek_regex:NF #1 { \peek_regex:NTF {#1} { } }
 \cs_new_protected:Npn \peek_regex_remove:nTF #1
   {
-    \@@_peek:nnTF { \@@_peek_remove_end:n {##1} }
+    \@@_peek:nnTF
       { \@@_build_aux:Nn \c_false_bool {#1} }
+      { \@@_peek_remove_end:n {##1} }
   }
 \cs_new_protected:Npn \peek_regex_remove:nT #1#2
   { \peek_regex_remove:nTF {#1} {#2} { } }
@@ -6404,8 +6429,9 @@
   { \peek_regex_remove:nTF {#1} { } }
 \cs_new_protected:Npn \peek_regex_remove:NTF #1
   {
-    \@@_peek:nnTF { \@@_peek_remove_end:n {##1} }
+    \@@_peek:nnTF
       { \@@_build_aux:NN \c_false_bool #1 }
+      { \@@_peek_remove_end:n {##1} }
   }
 \cs_new_protected:Npn \peek_regex_remove:NT #1#2
   { \peek_regex_remove:NTF #1 {#2} { } }
@@ -6414,12 +6440,12 @@
 %    \end{macrocode}
 % \end{macro}
 %
-% \begin{macro}{\@@_peek:nnTF}
+% \begin{macro}{\@@_peek:nnTF, \@@_peek_aux:nnTF}
 %   Store the user's true/false codes (plus \cs{group_end:}) into two
-%   token lists.  Then build the automaton with |#2|, without submatch
+%   token lists.  Then build the automaton with |#1|, without submatch
 %   tracking, and aiming for a single match.  Then start matching by
 %   setting up a few variables like for any regex matching like
-%   \cs{regex_match:nnTF}, with the addition of \cs{l_@@_reinsert_tl}
+%   \cs{regex_match:nnTF}, with the addition of \cs{l_@@_input_tl}
 %   that keeps track of the tokens seen, to reinsert them at the
 %   end.  Instead of \cs{tl_analysis_map_inline:nn} on the input, we
 %   call \cs{peek_analysis_map_inline:n} to go through tokens in the
@@ -6427,31 +6453,38 @@
 %   \cs{@@_maplike_break:} we need to catch that and break the
 %   \cs{peek_analysis_map_inline:n} loop instead.
 %    \begin{macrocode}
-\cs_new_protected:Npn \@@_peek:nnTF #1#2#3#4
+\cs_new_protected:Npn \@@_peek:nnTF #1
+  {
+    \@@_peek_aux:nnTF
+      {
+        \@@_disable_submatches:
+        #1
+      }
+  }
+\cs_new_protected:Npn \@@_peek_aux:nnTF #1#2#3#4
   {
     \group_begin:
       \tl_set:Nn \l_@@_peek_true_tl { \group_end: #3 }
       \tl_set:Nn \l_@@_peek_false_tl { \group_end: #4 }
-      \@@_disable_submatches:
       \@@_single_match:
-      #2
+      #1
       \@@_match_init:
-      \tl_build_clear:N \l_@@_reinsert_tl
+      \tl_build_clear:N \l_@@_input_tl
       \@@_match_once_init:
       \peek_analysis_map_inline:n
         {
-          \tl_build_put_right:Nn \l_@@_reinsert_tl
-            { \@@_reinsert:n {##1} }
+          \tl_build_put_right:Nn \l_@@_input_tl
+            { \@@_input_item:n {##1} }
           \@@_match_one_token:nnN {##1} {##2} ##3
           \use_none:nnn
           \prg_break_point:Nn \@@_maplike_break:
-            { \peek_analysis_map_break:n {#1} }
+            { \peek_analysis_map_break:n {#2} }
         }
   }
 %    \end{macrocode}
 % \end{macro}
 %
-% \begin{macro}{ \@@_peek_end:, \@@_peek_remove_end:n, \@@_peek_reinsert:N}
+% \begin{macro}{\@@_peek_end:, \@@_peek_remove_end:n}
 %   Once the regex matches (or permanently fails to match) we call
 %   \cs{@@_peek_end:}, or \cs{@@_peek_remove_end:n} with argument the
 %   last token seen (or rather tokens that \texttt{o}-expand and
@@ -6473,11 +6506,217 @@
       { \exp_after:wN \l_@@_peek_true_tl #1 }
       { \@@_peek_reinsert:N \l_@@_peek_false_tl }
   }
+%    \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_peek_reinsert:N, \@@_reinsert_item:n}
+%   Insert the true/false code |#1|, followed by the tokens found, which
+%   were stored in \cs{l_@@_input_tl}.  For this, loop through that
+%   token list using \cs{@@_reinsert_item:n}, which expands |#1| once to
+%   get a single token, and jumps over it to expand what follows, with
+%   suitable \cs{exp:w} and \cs{exp_end:}.  We cannot just use
+%   \cs{use:e} on the whole token list because the result may be
+%   unbalanced, which would stop the primitive prematurely, or let it
+%   continue beyond where we would like.
+%    \begin{macrocode}
 \cs_new_protected:Npn \@@_peek_reinsert:N #1
   {
-    \tl_build_end:N \l_@@_reinsert_tl
+    \tl_build_end:N \l_@@_input_tl
+    \cs_set_eq:NN \@@_input_item:n \@@_reinsert_item:n
+    \exp_after:wN #1 \exp:w \l_@@_input_tl \exp_end:
+  }
+\cs_new_protected:Npn \@@_reinsert_item:n #1
+  {
+    \exp_after:wN \exp_after:wN
+    \exp_after:wN \exp_end:
+    \exp_after:wN \exp_after:wN
+    #1
+    \exp:w
+  }
+%    \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[noTF]
+%   {\peek_regex_replace_once:nn, \peek_regex_replace_once:Nn}
+%   Similar to \cs{peek_regex:nTF} above.
+%    \begin{macrocode}
+\cs_new_protected:Npn \peek_regex_replace_once:nnTF #1
+  { \@@_peek_replace:nnTF { \@@_build_aux:Nn \c_false_bool {#1} } }
+\cs_new_protected:Npn \peek_regex_replace_once:nnT #1#2#3
+  { \peek_regex_replace_once:nnTF {#1} {#2} {#3} { } }
+\cs_new_protected:Npn \peek_regex_replace_once:nnF #1#2
+  { \peek_regex_replace_once:nnTF {#1} {#2} { } }
+\cs_new_protected:Npn \peek_regex_replace_once:nn #1#2
+  { \peek_regex_replace_once:nnTF {#1} {#2} { } { } }
+\cs_new_protected:Npn \peek_regex_replace_once:NnTF #1
+  { \@@_peek_replace:nnTF { \@@_build_aux:NN \c_false_bool #1 } }
+\cs_new_protected:Npn \peek_regex_replace_once:NnT #1#2#3
+  { \peek_regex_replace_once:NnTF #1 {#2} {#3} { } }
+\cs_new_protected:Npn \peek_regex_replace_once:NnF #1#2
+  { \peek_regex_replace_once:NnTF #1 {#2} { } }
+\cs_new_protected:Npn \peek_regex_replace_once:Nn #1#2
+  { \peek_regex_replace_once:NnTF #1 {#2} { } { } }
+%    \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_peek_replace:nnTF}
+%   Same as \cs{@@_peek:nnTF} (used for \cs{peek_regex:nTF} above), but
+%   without disabling submatches, and with a different end.  The
+%   replacement text |#2| is stored, to be analyzed later.
+%    \begin{macrocode}
+\cs_new_protected:Npn \@@_peek_replace:nnTF #1#2
+  {
+    \tl_set:Nn \l_@@_replacement_tl {#2}
+    \@@_peek_aux:nnTF {#1} { \@@_peek_replace_end: }
+  }
+%    \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_peek_replace_end:}
+%   If the match failed \cs{@@_peek_reinsert:N} reinserts the tokens
+%   found.  Otherwise, finish storing the submatch information using
+%   \cs{@@_extract:}, and store the input into \tn{toks}.  Redefine a
+%   few auxiliaries to change slightly their expansion behaviour as
+%   explained below.  Analyse the replacement text with
+%   \cs{@@_replacement:n}, which as usual defines
+%   \cs{@@_replacement_do_one_match:n} to insert the tokens from the
+%   start of the match attempt to the beginning of the match, followed
+%   by the replacement text.  The \cs{use:x} expands for instance the
+%   trailing \cs{@@_query_range:nn} down to a sequence of
+%   \cs{@@_reinsert_item:n} \Arg{tokens} where \meta{tokens}
+%   \texttt{o}-expand to a single token that we want to insert.  After
+%   \texttt{x}-expansion, \cs{use:x} does \cs{use:n}, so we have
+%   \cs{exp_after:wN} \cs{l_@@_peek_true_tl} \cs{exp:w} \ldots{}
+%   \cs{exp_end:}.  This is set up such as to obtain
+%   \cs{l_@@_peek_true_tl} followed by the replaced tokens (possibly
+%   unbalanced) in the input stream.
+%    \begin{macrocode}
+\cs_new_protected:Npn \@@_peek_replace_end:
+  {
+    \bool_if:NTF \g_@@_success_bool
+      {
+        \@@_extract:
+        \@@_query_set_from_input_tl:
+        \cs_set_eq:NN \@@_replacement_put:n \@@_peek_replacement_put:n
+        \cs_set_eq:NN \@@_replacement_put_submatch_aux:n
+          \@@_peek_replacement_put_submatch_aux:n
+        \cs_set_eq:NN \@@_input_item:n \@@_reinsert_item:n
+        \cs_set_eq:NN \@@_replacement_exp_not:N \@@_peek_replacement_token:n
+        \cs_set_eq:NN \@@_replacement_exp_not:V \@@_peek_replacement_var:N
+        \exp_args:No \@@_replacement:n { \l_@@_replacement_tl }
+        \use:x
+          {
+            \exp_not:n { \exp_after:wN \l_@@_peek_true_tl \exp:w }
+            \@@_replacement_do_one_match:n
+              { \l_@@_zeroth_submatch_int }
+            \@@_query_range:nn
+              {
+                \__kernel_intarray_item:Nn \g_@@_submatch_end_intarray
+                  { \l_@@_zeroth_submatch_int }
+              }
+              { \l_@@_max_pos_int }
+            \exp_end:
+          }
+      }
+      { \@@_peek_reinsert:N \l_@@_peek_false_tl }
+  }
+%    \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_query_set_from_input_tl:, \@@_query_set_item:n}
+%   The input was stored into \cs{l_@@_input_tl} as successive items
+%   \cs{@@_input_item:n} \Arg{tokens}.  Store that in successive
+%   \tn{toks}.  It's not clear whether the empty entries before and
+%   after are both useful.
+%    \begin{macrocode}
+\cs_new_protected:Npn \@@_query_set_from_input_tl:
+  {
+    \tl_build_end:N \l_@@_input_tl
+    \int_zero:N \l_@@_curr_pos_int
+    \cs_set_eq:NN \@@_input_item:n \@@_query_set_item:n
+    \@@_query_set_item:n { }
+    \l_@@_input_tl
+    \@@_query_set_item:n { }
+    \int_set_eq:NN \l_@@_max_pos_int \l_@@_curr_pos_int
+  }
+\cs_new_protected:Npn \@@_query_set_item:n #1
+  {
+    \int_incr:N \l_@@_curr_pos_int
+    \@@_toks_set:Nn \l_@@_curr_pos_int { \@@_input_item:n {#1} }
+  }
+%    \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_peek_replacement_put:n}
+%   While building the replacement function
+%   \cs{@@_replacement_do_one_match:n}, we often want to put simple
+%   material, given as |#1|, whose \texttt{x}-expansion
+%   \texttt{o}-expands to a single token.  Normally we can just add the
+%   token to \cs{l_@@_build_tl}, but for
+%   \cs{peek_regex_replace_once:nnTF} we eventually want to do some
+%   strange expansion that is basically using \cs{exp_after:wN} to jump
+%   through numerous tokens (we cannot use \texttt{x}-expansion like for
+%   \cs{regex_replace_once:nnNTF} because it is ok for the result to be
+%   unbalanced since we insert it in the input stream rather than
+%   storing it.  When within a csname we don't do any such shenanigan
+%   because \cs{cs:w} \ldotS{} \cs{cs_end:} does all the expansion we
+%   need.
+%    \begin{macrocode}
+\cs_new_protected:Npn \@@_peek_replacement_put:n #1
+  {
+    \if_case:w \l_@@_replacement_csnames_int
+      \tl_build_put_right:Nn \l_@@_build_tl
+        { \exp_not:N \@@_reinsert_item:n {#1} }
+    \else:
+      \tl_build_put_right:Nn \l_@@_build_tl {#1}
+    \fi:
+  }
+%    \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_peek_replacement_token:n}
+%   When hit with \cs{exp:w}, \cs{@@_peek_replacement_token:n}
+%   \Arg{token} stops \cs{exp_end:} and does \cs{exp_after:wN}
+%   \meta{token} \cs{exp:w} to continue expansion after it.
+%    \begin{macrocode}
+\cs_new_protected:Npn \@@_peek_replacement_token:n #1
+  { \exp_after:wN \exp_end: \exp_after:wN #1 \exp:w }
+%    \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_peek_replacement_put_submatch_aux:n}
+%   While analyzing the replacement we also have to insert submatches
+%   found in the query.  Since query items \cs{@@_input_item:n}
+%   \Arg{tokens} expand correctly only when surrounded by \cs{exp:w}
+%   \ldots{} \cs{exp_end:}, and since these expansion controls are not
+%   there within csnames (because \cs{cs:w} \ldots{} \cs{cs_end:} make
+%   them unnecessary in most cases), we have to put \cs{exp:w} and
+%   \cs{exp_end:} by hand here.
+%    \begin{macrocode}
+\cs_new_protected:Npn \@@_peek_replacement_put_submatch_aux:n #1
+  {
+    \if_case:w \l_@@_replacement_csnames_int
+      \tl_build_put_right:Nn \l_@@_build_tl
+        { \@@_query_submatch:n { \int_eval:n { #1 + ##1 } } }
+    \else:
+      \tl_build_put_right:Nn \l_@@_build_tl
+        { \exp:w \@@_query_submatch:n { \int_eval:n { #1 + ##1 } } \exp_end: }
+    \fi:
+  }
+%    \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_peek_replacement_var:N}
+%   This is used for |\u| outside csnames.  It makes sure to continue
+%   expansion with \cs{exp:w} before expanding the variable~|#1| and
+%   stopping the \cs{exp:w} that precedes.
+%    \begin{macrocode}
+\cs_new_protected:Npn \@@_peek_replacement_var:N #1
+  {
+    \exp_after:wN \exp_last_unbraced:NV
+    \exp_after:wN \exp_end:
     \exp_after:wN #1
-    \exp:w \l_@@_reinsert_tl \exp_end:
+    \exp:w
   }
 %    \end{macrocode}
 % \end{macro}
diff --git a/l3kernel/l3token.dtx b/l3kernel/l3token.dtx
index 4abb84099..ae457c478 100644
--- a/l3kernel/l3token.dtx
+++ b/l3kernel/l3token.dtx
@@ -1064,8 +1064,8 @@
 %   \meta{regex}.  If the test is true, the \meta{tokens} are removed
 %   from the input stream and the \meta{true code} is inserted, while if
 %   the test is false, the \meta{false code} is inserted followed by the
-%   \meta{tokens} that have been read in the process of matching the
-%   \meta{regex}.  See \pkg{l3regex} for documentation of the syntax of
+%   \meta{tokens} that were originally in the input stream.
+%   See \pkg{l3regex} for documentation of the syntax of
 %   regular expressions.  The \meta{regular expression} is implicitly
 %   anchored at the start, so for instance
 %   \cs{peek_regex_remove:nTF}~|{|~|a|~|}| is essentially equivalent to
@@ -1078,6 +1078,32 @@
 %   \end{texnote}
 % \end{function}
 %
+% \begin{function}[added = 2020-07-23, noTF]
+%   {\peek_regex_replace_once:nn, \peek_regex_replace_once:Nn}
+%   \begin{syntax}
+%     \cs{peek_regex_replace_once:nnTF} \Arg{regex} \Arg{replacement} \Arg{true code} \Arg{false code}
+%   \end{syntax}
+%   If the \meta{tokens} that follow in the input stream match the
+%   \meta{regex}, replaces them according to the \meta{replacement} as
+%   for \cs{regex_replace_once:nnN}, and leaves the result in the input
+%   stream, after the \meta{true code}.  Otherwise, leaves \meta{false
+%   code} followed by the \meta{tokens} that were originally in the
+%   input stream, with no modifications.  See \pkg{l3regex} for
+%   documentation of the syntax of regular expressions and of the
+%   \meta{replacement}: for instance |\0| in the \meta{replacement} is
+%   replaced by the tokens that were matched in the input stream.  The
+%   \meta{regular expression} is implicitly anchored at the start.  In
+%   contrast to \cs{regex_replace_once:nnN}, no error arises if the
+%   \meta{replacement} leads to an unbalanced token list: the tokens are
+%   inserted into the input stream without issue.
+%   \begin{texnote}
+%     Implicit character tokens are correctly considered by
+%     \cs{peek_regex_replace_once:nnTF} as control sequences, while
+%     functions that inspect individual tokens (for instance
+%     \cs{peek_charcode:NTF}) only take into account their meaning.
+%   \end{texnote}
+% \end{function}
+%
 % \section{Description of all possible tokens}
 % \label{sec:l3token:all-tokens}
 %
diff --git a/l3kernel/testfiles/m3regex011.lvt b/l3kernel/testfiles/m3regex011.lvt
index 64b483004..a2837f0ed 100644
--- a/l3kernel/testfiles/m3regex011.lvt
+++ b/l3kernel/testfiles/m3regex011.lvt
@@ -69,5 +69,19 @@
     \peek_regex_remove:NF \l_tmpa_regex { \ERROR \test:w } \test:w a \s_stop
   }
 
+\TEST { Peek~regex~replace }
+  {
+    \if_false: { \fi:
+    \peek_regex_replace_once:nnTF { } { \cB\{ } { \TRUE \test:w } { \ERROR \test:w } \aaa } b \s_stop
+    \cs_show:c { a ~ a }
+    \peek_regex_replace_once:nnT { a } { \cC" \c{\0\u{c_space_tl}\0} } { \TRUE \test:w } a \s_stop
+    \cs_show:c { a ~ a }
+    \peek_regex_replace_once:nnTF { \c{a} \{ } { \0\0\cE\} }
+      { \TRUE \test:w } { \ERROR \test:w } \a { b } \s_stop
+    \peek_regex_replace_once:nnTF { \cL. } { \cL(X } { \ERROR \test:w } { \FALSE \test:w } \aaa \s_stop
+    \peek_regex_replace_once:nnT { b } { \cL(X } { \ERROR \test:w } \test:w a \s_stop
+    \peek_regex_replace_once:nnF { \c[^C] . (. a()) } { \cB\< } { \FALSE \test:w } \test:w { a b } c \s_stop
+  }
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \END
diff --git a/l3kernel/testfiles/m3regex011.tlg b/l3kernel/testfiles/m3regex011.tlg
index 5395e3941..b0b2a1f48 100644
--- a/l3kernel/testfiles/m3regex011.tlg
+++ b/l3kernel/testfiles/m3regex011.tlg
@@ -60,3 +60,24 @@ TRUE
 TRUE
 ||
 ============================================================
+============================================================
+TEST 5: Peek regex replace
+============================================================
+TRUE
+|{\aaa }b|
+> \a a=undefined.
+<recently read> }
+l. ...  }
+TRUE
+|\"\a a |
+> \a a=undefined.
+<recently read> }
+l. ...  }
+TRUE
+|\a {\a {}b}|
+FALSE
+|\aaa |
+|a|
+FALSE
+|\test:w {ab}c|
+============================================================





More information about the latex3-commits mailing list.