[latex3-commits] [git/LaTeX3-latex3-latex3] master: Implement, document, and test \peek_regex_replace_once:nnTF (90a1bfd17)
Joseph Wright
joseph.wright at morningstar2.co.uk
Thu Dec 3 17:27:34 CET 2020
Repository : https://github.com/latex3/latex3
On branch : master
Link : https://github.com/latex3/latex3/commit/90a1bfd17e04b8fea4905fc1499462eb006b9eda
>---------------------------------------------------------------
commit 90a1bfd17e04b8fea4905fc1499462eb006b9eda
Author: Bruno Le Floch <blflatex at gmail.com>
Date: Thu Jul 23 02:38:02 2020 +0200
Implement, document, and test \peek_regex_replace_once:nnTF
>---------------------------------------------------------------
90a1bfd17e04b8fea4905fc1499462eb006b9eda
l3kernel/CHANGELOG.md | 3 +-
l3kernel/l3regex.dtx | 341 ++++++++++++++++++++++++++++++++------
l3kernel/l3token.dtx | 30 +++-
l3kernel/testfiles/m3regex011.lvt | 14 ++
l3kernel/testfiles/m3regex011.tlg | 21 +++
5 files changed, 355 insertions(+), 54 deletions(-)
diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index 110e6f7a3..b4d610564 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -9,7 +9,8 @@ this project uses date-based 'snapshot' version identifiers.
### Added
- `\peek_analysis_map_inlione:n`
-- `\peek_regex:nTF` and `\peek_regex_remove:nTF`
+ `\peek_regex_replace_once:nnTF`
+- `\peek_regex:nTF`, `\peek_regex_remove:nTF`, and
### Unchanged
- Extend `\text_expand:n` to cover `\@protected at testopt`
diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 50aec9a53..d1ec33dea 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -5133,6 +5133,14 @@
% \end{macrocode}
% \end{macro}
%
+% \begin{macro}{\@@_replacement_exp_not:V}
+% This is used for the implementation of~|\u|, and it gets redefined
+% for \cs{peek_regex_replace_once:nnTF}.
+% \begin{macrocode}
+\cs_new_eq:NN \@@_replacement_exp_not:V \exp_not:V
+% \end{macrocode}
+% \end{macro}
+%
% \subsubsection{Query and brace balance}
%
% \begin{macro}[rEXP]{\@@_query_range:nn}
@@ -5287,6 +5295,14 @@
% \end{macro}
% \end{macro}
%
+% \begin{macro}{\@@_replacement_put:n}
+% This gets redefined for \cs{peek_regex_replace_once:nnTF}.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement_put:n
+ { \tl_build_put_right:Nn \l_@@_build_tl }
+% \end{macrocode}
+% \end{macro}
+%
% \begin{macro}{\@@_replacement_normal:n}
% Most characters are simply sent to the output by
% \cs{tl_build_put_right:Nn}, unless a particular category code has been
@@ -5296,11 +5312,13 @@
% sequence is non-empty there: it contains an empty entry
% corresponding to the initial value of
% \cs{l_@@_replacement_category_tl}.
+% The argument |#1| can be a space, otherwise it is a single
+% character.
% \begin{macrocode}
\cs_new_protected:Npn \@@_replacement_normal:n #1
{
\tl_if_empty:NTF \l_@@_replacement_category_tl
- { \tl_build_put_right:Nn \l_@@_build_tl {#1} }
+ { \@@_replacement_put:n {#1} }
{ % (
\token_if_eq_charcode:NNTF #1 )
{
@@ -5343,7 +5361,7 @@
%
% \subsubsection{Submatches}
%
-% \begin{macro}{\@@_replacement_put_submatch:n}
+% \begin{macro}{\@@_replacement_put_submatch:n, \@@_replacement_put_submatch_aux:n}
% Insert a submatch in the replacement text. This is dropped if the
% submatch number is larger than the number of capturing groups.
% Unless the submatch appears inside a |\c{...}| or |\u{...}|
@@ -5356,15 +5374,19 @@
\cs_new_protected:Npn \@@_replacement_put_submatch:n #1
{
\if_int_compare:w #1 < \l_@@_capturing_group_int
- \tl_build_put_right:Nn \l_@@_build_tl
- { \@@_query_submatch:n { \int_eval:n { #1 + ##1 } } }
- \if_int_compare:w \l_@@_replacement_csnames_int = 0 \exp_stop_f:
- \tl_put_right:Nn \l_@@_balance_tl
- {
- + \@@_submatch_balance:n
- { \exp_not:N \int_eval:n { #1 + ##1 } }
- }
- \fi:
+ \@@_replacement_put_submatch_aux:n {#1}
+ \fi:
+ }
+\cs_new_protected:Npn \@@_replacement_put_submatch_aux:n #1
+ {
+ \tl_build_put_right:Nn \l_@@_build_tl
+ { \@@_query_submatch:n { \int_eval:n { #1 + ##1 } } }
+ \if_int_compare:w \l_@@_replacement_csnames_int = 0 \exp_stop_f:
+ \tl_put_right:Nn \l_@@_balance_tl
+ {
+ + \@@_submatch_balance:n
+ { \exp_not:N \int_eval:n { #1 + ##1 } }
+ }
\fi:
}
% \end{macrocode}
@@ -5470,7 +5492,7 @@
{
\@@_two_if_eq:NNNNTF
#1 #2 \@@_replacement_normal:n \c_left_brace_str
- { \@@_replacement_cu_aux:Nw \exp_not:V }
+ { \@@_replacement_cu_aux:Nw \@@_replacement_exp_not:V }
{ \@@_replacement_error:NNN u #1#2 }
}
% \end{macrocode}
@@ -5561,7 +5583,7 @@
\cs_new_protected:Npn \@@_replacement_char:nNN #1#2#3
{
\tex_lccode:D 0 = `#3 \scan_stop:
- \tex_lowercase:D { \tl_build_put_right:Nn \l_@@_build_tl {#1} }
+ \tex_lowercase:D { \@@_replacement_put:n {#1} }
}
% \end{macrocode}
% \end{macro}
@@ -5607,7 +5629,7 @@
\cs_new_protected:Npn \@@_replacement_c_C:w #1#2
{
\tl_build_put_right:Nn \l_@@_build_tl
- { \exp_not:N \exp_not:N \exp_not:c {#2} }
+ { \exp_not:N \@@_replacement_exp_not:N \exp_not:c {#2} }
}
% \end{macrocode}
% \end{macro}
@@ -5694,7 +5716,7 @@
\__kernel_msg_error:nn { kernel } { replacement-null-space }
\fi:
\tex_lccode:D `\ = `#2 \scan_stop:
- \tex_lowercase:D { \tl_build_put_right:Nn \l_@@_build_tl {~} }
+ \tex_lowercase:D { \@@_replacement_put:n {~} }
}
% \end{macrocode}
% \end{macro}
@@ -6225,13 +6247,13 @@
\group_begin:
\@@_single_match:
#1
- \@@_replacement:n {#2}
\exp_args:No \@@_match:n {#3}
\if_meaning:w \c_false_bool \g_@@_success_bool
\group_end:
\else:
\@@_extract:
\exp_args:No \@@_query_set:n {#3}
+ \@@_replacement:n {#2}
\int_set:Nn \l_@@_balance_int
{
\@@_replacement_balance_one_match:n
@@ -6271,9 +6293,9 @@
\group_begin:
\@@_multi_match:n { \@@_extract: }
#1
- \@@_replacement:n {#2}
\exp_args:No \@@_match:n {#3}
\exp_args:No \@@_query_set:n {#3}
+ \@@_replacement:n {#2}
\int_set:Nn \l_@@_balance_int
{
0
@@ -6343,28 +6365,28 @@
% \end{macrocode}
% \end{variable}
%
-% \begin{variable}{\l_@@_reinsert_tl}
-% \begin{macro}[EXP]{\@@_reinsert:n}
-% Token list such that hitting it with \cs{exp:w} will expand to all
-% tokens we found. It is constructed using the |tl_build| machinery
-% and takes the form of one call to \cs{@@_reinsert:n} for each token
-% to reinsert. The argument is something that \texttt{o}-expands to
-% the single token we wish to put back.
+% \begin{variable}{\l_@@_replacement_tl}
+% When peeking in \cs{peek_regex_replace_once:nnTF} we need to store
+% the replacement text.
% \begin{macrocode}
-\tl_new:N \l_@@_reinsert_tl
-\cs_new:Npn \@@_reinsert:n #1
- {
- \exp_after:wN \exp_after:wN
- \exp_after:wN \exp_end:
- \exp_after:wN \exp_after:wN
- #1
- \exp:w
- }
+\tl_new:N \l_@@_replacement_tl
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}{\l_@@_input_tl}
+% \begin{macro}{\@@_input_item:n}
+% Stores each token found as \cs{@@_input_item:n} \Arg{tokens}, where
+% the \meta{tokens} \texttt{o}-expand to the token found, as for
+% \cs{tl_analysis_map_inline:nn}.
+% \begin{macrocode}
+\tl_new:N \l_@@_input_tl
+\cs_new_eq:NN \@@_input_item:n ?
% \end{macrocode}
% \end{macro}
% \end{variable}
%
-% \begin{macro}[TF]{\peek_regex:n, \peek_regex:N, \peek_regex_remove:n, \peek_regex_remove:N}
+% \begin{macro}[TF]
+% {\peek_regex:n, \peek_regex:N, \peek_regex_remove:n, \peek_regex_remove:N}
% The |T| and |F| functions just call the corresponding |TF| function.
% The four |TF| functions differ along two axes: whether to remove the
% token or not, distinguished by using \cs{@@_peek_end:} or
@@ -6379,24 +6401,27 @@
% \begin{macrocode}
\cs_new_protected:Npn \peek_regex:nTF #1
{
- \@@_peek:nnTF { \@@_peek_end: }
+ \@@_peek:nnTF
{ \@@_build_aux:Nn \c_false_bool {#1} }
+ { \@@_peek_end: }
}
\cs_new_protected:Npn \peek_regex:nT #1#2
{ \peek_regex:nTF {#1} {#2} { } }
\cs_new_protected:Npn \peek_regex:nF #1 { \peek_regex:nTF {#1} { } }
\cs_new_protected:Npn \peek_regex:NTF #1
{
- \@@_peek:nnTF { \@@_peek_end: }
+ \@@_peek:nnTF
{ \@@_build_aux:NN \c_false_bool #1 }
+ { \@@_peek_end: }
}
\cs_new_protected:Npn \peek_regex:NT #1#2
{ \peek_regex:NTF #1 {#2} { } }
\cs_new_protected:Npn \peek_regex:NF #1 { \peek_regex:NTF {#1} { } }
\cs_new_protected:Npn \peek_regex_remove:nTF #1
{
- \@@_peek:nnTF { \@@_peek_remove_end:n {##1} }
+ \@@_peek:nnTF
{ \@@_build_aux:Nn \c_false_bool {#1} }
+ { \@@_peek_remove_end:n {##1} }
}
\cs_new_protected:Npn \peek_regex_remove:nT #1#2
{ \peek_regex_remove:nTF {#1} {#2} { } }
@@ -6404,8 +6429,9 @@
{ \peek_regex_remove:nTF {#1} { } }
\cs_new_protected:Npn \peek_regex_remove:NTF #1
{
- \@@_peek:nnTF { \@@_peek_remove_end:n {##1} }
+ \@@_peek:nnTF
{ \@@_build_aux:NN \c_false_bool #1 }
+ { \@@_peek_remove_end:n {##1} }
}
\cs_new_protected:Npn \peek_regex_remove:NT #1#2
{ \peek_regex_remove:NTF #1 {#2} { } }
@@ -6414,12 +6440,12 @@
% \end{macrocode}
% \end{macro}
%
-% \begin{macro}{\@@_peek:nnTF}
+% \begin{macro}{\@@_peek:nnTF, \@@_peek_aux:nnTF}
% Store the user's true/false codes (plus \cs{group_end:}) into two
-% token lists. Then build the automaton with |#2|, without submatch
+% token lists. Then build the automaton with |#1|, without submatch
% tracking, and aiming for a single match. Then start matching by
% setting up a few variables like for any regex matching like
-% \cs{regex_match:nnTF}, with the addition of \cs{l_@@_reinsert_tl}
+% \cs{regex_match:nnTF}, with the addition of \cs{l_@@_input_tl}
% that keeps track of the tokens seen, to reinsert them at the
% end. Instead of \cs{tl_analysis_map_inline:nn} on the input, we
% call \cs{peek_analysis_map_inline:n} to go through tokens in the
@@ -6427,31 +6453,38 @@
% \cs{@@_maplike_break:} we need to catch that and break the
% \cs{peek_analysis_map_inline:n} loop instead.
% \begin{macrocode}
-\cs_new_protected:Npn \@@_peek:nnTF #1#2#3#4
+\cs_new_protected:Npn \@@_peek:nnTF #1
+ {
+ \@@_peek_aux:nnTF
+ {
+ \@@_disable_submatches:
+ #1
+ }
+ }
+\cs_new_protected:Npn \@@_peek_aux:nnTF #1#2#3#4
{
\group_begin:
\tl_set:Nn \l_@@_peek_true_tl { \group_end: #3 }
\tl_set:Nn \l_@@_peek_false_tl { \group_end: #4 }
- \@@_disable_submatches:
\@@_single_match:
- #2
+ #1
\@@_match_init:
- \tl_build_clear:N \l_@@_reinsert_tl
+ \tl_build_clear:N \l_@@_input_tl
\@@_match_once_init:
\peek_analysis_map_inline:n
{
- \tl_build_put_right:Nn \l_@@_reinsert_tl
- { \@@_reinsert:n {##1} }
+ \tl_build_put_right:Nn \l_@@_input_tl
+ { \@@_input_item:n {##1} }
\@@_match_one_token:nnN {##1} {##2} ##3
\use_none:nnn
\prg_break_point:Nn \@@_maplike_break:
- { \peek_analysis_map_break:n {#1} }
+ { \peek_analysis_map_break:n {#2} }
}
}
% \end{macrocode}
% \end{macro}
%
-% \begin{macro}{ \@@_peek_end:, \@@_peek_remove_end:n, \@@_peek_reinsert:N}
+% \begin{macro}{\@@_peek_end:, \@@_peek_remove_end:n}
% Once the regex matches (or permanently fails to match) we call
% \cs{@@_peek_end:}, or \cs{@@_peek_remove_end:n} with argument the
% last token seen (or rather tokens that \texttt{o}-expand and
@@ -6473,11 +6506,217 @@
{ \exp_after:wN \l_@@_peek_true_tl #1 }
{ \@@_peek_reinsert:N \l_@@_peek_false_tl }
}
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_peek_reinsert:N, \@@_reinsert_item:n}
+% Insert the true/false code |#1|, followed by the tokens found, which
+% were stored in \cs{l_@@_input_tl}. For this, loop through that
+% token list using \cs{@@_reinsert_item:n}, which expands |#1| once to
+% get a single token, and jumps over it to expand what follows, with
+% suitable \cs{exp:w} and \cs{exp_end:}. We cannot just use
+% \cs{use:e} on the whole token list because the result may be
+% unbalanced, which would stop the primitive prematurely, or let it
+% continue beyond where we would like.
+% \begin{macrocode}
\cs_new_protected:Npn \@@_peek_reinsert:N #1
{
- \tl_build_end:N \l_@@_reinsert_tl
+ \tl_build_end:N \l_@@_input_tl
+ \cs_set_eq:NN \@@_input_item:n \@@_reinsert_item:n
+ \exp_after:wN #1 \exp:w \l_@@_input_tl \exp_end:
+ }
+\cs_new_protected:Npn \@@_reinsert_item:n #1
+ {
+ \exp_after:wN \exp_after:wN
+ \exp_after:wN \exp_end:
+ \exp_after:wN \exp_after:wN
+ #1
+ \exp:w
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}[noTF]
+% {\peek_regex_replace_once:nn, \peek_regex_replace_once:Nn}
+% Similar to \cs{peek_regex:nTF} above.
+% \begin{macrocode}
+\cs_new_protected:Npn \peek_regex_replace_once:nnTF #1
+ { \@@_peek_replace:nnTF { \@@_build_aux:Nn \c_false_bool {#1} } }
+\cs_new_protected:Npn \peek_regex_replace_once:nnT #1#2#3
+ { \peek_regex_replace_once:nnTF {#1} {#2} {#3} { } }
+\cs_new_protected:Npn \peek_regex_replace_once:nnF #1#2
+ { \peek_regex_replace_once:nnTF {#1} {#2} { } }
+\cs_new_protected:Npn \peek_regex_replace_once:nn #1#2
+ { \peek_regex_replace_once:nnTF {#1} {#2} { } { } }
+\cs_new_protected:Npn \peek_regex_replace_once:NnTF #1
+ { \@@_peek_replace:nnTF { \@@_build_aux:NN \c_false_bool #1 } }
+\cs_new_protected:Npn \peek_regex_replace_once:NnT #1#2#3
+ { \peek_regex_replace_once:NnTF #1 {#2} {#3} { } }
+\cs_new_protected:Npn \peek_regex_replace_once:NnF #1#2
+ { \peek_regex_replace_once:NnTF #1 {#2} { } }
+\cs_new_protected:Npn \peek_regex_replace_once:Nn #1#2
+ { \peek_regex_replace_once:NnTF #1 {#2} { } { } }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_peek_replace:nnTF}
+% Same as \cs{@@_peek:nnTF} (used for \cs{peek_regex:nTF} above), but
+% without disabling submatches, and with a different end. The
+% replacement text |#2| is stored, to be analyzed later.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_peek_replace:nnTF #1#2
+ {
+ \tl_set:Nn \l_@@_replacement_tl {#2}
+ \@@_peek_aux:nnTF {#1} { \@@_peek_replace_end: }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_peek_replace_end:}
+% If the match failed \cs{@@_peek_reinsert:N} reinserts the tokens
+% found. Otherwise, finish storing the submatch information using
+% \cs{@@_extract:}, and store the input into \tn{toks}. Redefine a
+% few auxiliaries to change slightly their expansion behaviour as
+% explained below. Analyse the replacement text with
+% \cs{@@_replacement:n}, which as usual defines
+% \cs{@@_replacement_do_one_match:n} to insert the tokens from the
+% start of the match attempt to the beginning of the match, followed
+% by the replacement text. The \cs{use:x} expands for instance the
+% trailing \cs{@@_query_range:nn} down to a sequence of
+% \cs{@@_reinsert_item:n} \Arg{tokens} where \meta{tokens}
+% \texttt{o}-expand to a single token that we want to insert. After
+% \texttt{x}-expansion, \cs{use:x} does \cs{use:n}, so we have
+% \cs{exp_after:wN} \cs{l_@@_peek_true_tl} \cs{exp:w} \ldots{}
+% \cs{exp_end:}. This is set up such as to obtain
+% \cs{l_@@_peek_true_tl} followed by the replaced tokens (possibly
+% unbalanced) in the input stream.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_peek_replace_end:
+ {
+ \bool_if:NTF \g_@@_success_bool
+ {
+ \@@_extract:
+ \@@_query_set_from_input_tl:
+ \cs_set_eq:NN \@@_replacement_put:n \@@_peek_replacement_put:n
+ \cs_set_eq:NN \@@_replacement_put_submatch_aux:n
+ \@@_peek_replacement_put_submatch_aux:n
+ \cs_set_eq:NN \@@_input_item:n \@@_reinsert_item:n
+ \cs_set_eq:NN \@@_replacement_exp_not:N \@@_peek_replacement_token:n
+ \cs_set_eq:NN \@@_replacement_exp_not:V \@@_peek_replacement_var:N
+ \exp_args:No \@@_replacement:n { \l_@@_replacement_tl }
+ \use:x
+ {
+ \exp_not:n { \exp_after:wN \l_@@_peek_true_tl \exp:w }
+ \@@_replacement_do_one_match:n
+ { \l_@@_zeroth_submatch_int }
+ \@@_query_range:nn
+ {
+ \__kernel_intarray_item:Nn \g_@@_submatch_end_intarray
+ { \l_@@_zeroth_submatch_int }
+ }
+ { \l_@@_max_pos_int }
+ \exp_end:
+ }
+ }
+ { \@@_peek_reinsert:N \l_@@_peek_false_tl }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_query_set_from_input_tl:, \@@_query_set_item:n}
+% The input was stored into \cs{l_@@_input_tl} as successive items
+% \cs{@@_input_item:n} \Arg{tokens}. Store that in successive
+% \tn{toks}. It's not clear whether the empty entries before and
+% after are both useful.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_query_set_from_input_tl:
+ {
+ \tl_build_end:N \l_@@_input_tl
+ \int_zero:N \l_@@_curr_pos_int
+ \cs_set_eq:NN \@@_input_item:n \@@_query_set_item:n
+ \@@_query_set_item:n { }
+ \l_@@_input_tl
+ \@@_query_set_item:n { }
+ \int_set_eq:NN \l_@@_max_pos_int \l_@@_curr_pos_int
+ }
+\cs_new_protected:Npn \@@_query_set_item:n #1
+ {
+ \int_incr:N \l_@@_curr_pos_int
+ \@@_toks_set:Nn \l_@@_curr_pos_int { \@@_input_item:n {#1} }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_peek_replacement_put:n}
+% While building the replacement function
+% \cs{@@_replacement_do_one_match:n}, we often want to put simple
+% material, given as |#1|, whose \texttt{x}-expansion
+% \texttt{o}-expands to a single token. Normally we can just add the
+% token to \cs{l_@@_build_tl}, but for
+% \cs{peek_regex_replace_once:nnTF} we eventually want to do some
+% strange expansion that is basically using \cs{exp_after:wN} to jump
+% through numerous tokens (we cannot use \texttt{x}-expansion like for
+% \cs{regex_replace_once:nnNTF} because it is ok for the result to be
+% unbalanced since we insert it in the input stream rather than
+% storing it. When within a csname we don't do any such shenanigan
+% because \cs{cs:w} \ldotS{} \cs{cs_end:} does all the expansion we
+% need.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_peek_replacement_put:n #1
+ {
+ \if_case:w \l_@@_replacement_csnames_int
+ \tl_build_put_right:Nn \l_@@_build_tl
+ { \exp_not:N \@@_reinsert_item:n {#1} }
+ \else:
+ \tl_build_put_right:Nn \l_@@_build_tl {#1}
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_peek_replacement_token:n}
+% When hit with \cs{exp:w}, \cs{@@_peek_replacement_token:n}
+% \Arg{token} stops \cs{exp_end:} and does \cs{exp_after:wN}
+% \meta{token} \cs{exp:w} to continue expansion after it.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_peek_replacement_token:n #1
+ { \exp_after:wN \exp_end: \exp_after:wN #1 \exp:w }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_peek_replacement_put_submatch_aux:n}
+% While analyzing the replacement we also have to insert submatches
+% found in the query. Since query items \cs{@@_input_item:n}
+% \Arg{tokens} expand correctly only when surrounded by \cs{exp:w}
+% \ldots{} \cs{exp_end:}, and since these expansion controls are not
+% there within csnames (because \cs{cs:w} \ldots{} \cs{cs_end:} make
+% them unnecessary in most cases), we have to put \cs{exp:w} and
+% \cs{exp_end:} by hand here.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_peek_replacement_put_submatch_aux:n #1
+ {
+ \if_case:w \l_@@_replacement_csnames_int
+ \tl_build_put_right:Nn \l_@@_build_tl
+ { \@@_query_submatch:n { \int_eval:n { #1 + ##1 } } }
+ \else:
+ \tl_build_put_right:Nn \l_@@_build_tl
+ { \exp:w \@@_query_submatch:n { \int_eval:n { #1 + ##1 } } \exp_end: }
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{macro}{\@@_peek_replacement_var:N}
+% This is used for |\u| outside csnames. It makes sure to continue
+% expansion with \cs{exp:w} before expanding the variable~|#1| and
+% stopping the \cs{exp:w} that precedes.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_peek_replacement_var:N #1
+ {
+ \exp_after:wN \exp_last_unbraced:NV
+ \exp_after:wN \exp_end:
\exp_after:wN #1
- \exp:w \l_@@_reinsert_tl \exp_end:
+ \exp:w
}
% \end{macrocode}
% \end{macro}
diff --git a/l3kernel/l3token.dtx b/l3kernel/l3token.dtx
index 4abb84099..ae457c478 100644
--- a/l3kernel/l3token.dtx
+++ b/l3kernel/l3token.dtx
@@ -1064,8 +1064,8 @@
% \meta{regex}. If the test is true, the \meta{tokens} are removed
% from the input stream and the \meta{true code} is inserted, while if
% the test is false, the \meta{false code} is inserted followed by the
-% \meta{tokens} that have been read in the process of matching the
-% \meta{regex}. See \pkg{l3regex} for documentation of the syntax of
+% \meta{tokens} that were originally in the input stream.
+% See \pkg{l3regex} for documentation of the syntax of
% regular expressions. The \meta{regular expression} is implicitly
% anchored at the start, so for instance
% \cs{peek_regex_remove:nTF}~|{|~|a|~|}| is essentially equivalent to
@@ -1078,6 +1078,32 @@
% \end{texnote}
% \end{function}
%
+% \begin{function}[added = 2020-07-23, noTF]
+% {\peek_regex_replace_once:nn, \peek_regex_replace_once:Nn}
+% \begin{syntax}
+% \cs{peek_regex_replace_once:nnTF} \Arg{regex} \Arg{replacement} \Arg{true code} \Arg{false code}
+% \end{syntax}
+% If the \meta{tokens} that follow in the input stream match the
+% \meta{regex}, replaces them according to the \meta{replacement} as
+% for \cs{regex_replace_once:nnN}, and leaves the result in the input
+% stream, after the \meta{true code}. Otherwise, leaves \meta{false
+% code} followed by the \meta{tokens} that were originally in the
+% input stream, with no modifications. See \pkg{l3regex} for
+% documentation of the syntax of regular expressions and of the
+% \meta{replacement}: for instance |\0| in the \meta{replacement} is
+% replaced by the tokens that were matched in the input stream. The
+% \meta{regular expression} is implicitly anchored at the start. In
+% contrast to \cs{regex_replace_once:nnN}, no error arises if the
+% \meta{replacement} leads to an unbalanced token list: the tokens are
+% inserted into the input stream without issue.
+% \begin{texnote}
+% Implicit character tokens are correctly considered by
+% \cs{peek_regex_replace_once:nnTF} as control sequences, while
+% functions that inspect individual tokens (for instance
+% \cs{peek_charcode:NTF}) only take into account their meaning.
+% \end{texnote}
+% \end{function}
+%
% \section{Description of all possible tokens}
% \label{sec:l3token:all-tokens}
%
diff --git a/l3kernel/testfiles/m3regex011.lvt b/l3kernel/testfiles/m3regex011.lvt
index 64b483004..a2837f0ed 100644
--- a/l3kernel/testfiles/m3regex011.lvt
+++ b/l3kernel/testfiles/m3regex011.lvt
@@ -69,5 +69,19 @@
\peek_regex_remove:NF \l_tmpa_regex { \ERROR \test:w } \test:w a \s_stop
}
+\TEST { Peek~regex~replace }
+ {
+ \if_false: { \fi:
+ \peek_regex_replace_once:nnTF { } { \cB\{ } { \TRUE \test:w } { \ERROR \test:w } \aaa } b \s_stop
+ \cs_show:c { a ~ a }
+ \peek_regex_replace_once:nnT { a } { \cC" \c{\0\u{c_space_tl}\0} } { \TRUE \test:w } a \s_stop
+ \cs_show:c { a ~ a }
+ \peek_regex_replace_once:nnTF { \c{a} \{ } { \0\0\cE\} }
+ { \TRUE \test:w } { \ERROR \test:w } \a { b } \s_stop
+ \peek_regex_replace_once:nnTF { \cL. } { \cL(X } { \ERROR \test:w } { \FALSE \test:w } \aaa \s_stop
+ \peek_regex_replace_once:nnT { b } { \cL(X } { \ERROR \test:w } \test:w a \s_stop
+ \peek_regex_replace_once:nnF { \c[^C] . (. a()) } { \cB\< } { \FALSE \test:w } \test:w { a b } c \s_stop
+ }
+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\END
diff --git a/l3kernel/testfiles/m3regex011.tlg b/l3kernel/testfiles/m3regex011.tlg
index 5395e3941..b0b2a1f48 100644
--- a/l3kernel/testfiles/m3regex011.tlg
+++ b/l3kernel/testfiles/m3regex011.tlg
@@ -60,3 +60,24 @@ TRUE
TRUE
||
============================================================
+============================================================
+TEST 5: Peek regex replace
+============================================================
+TRUE
+|{\aaa }b|
+> \a a=undefined.
+<recently read> }
+l. ... }
+TRUE
+|\"\a a |
+> \a a=undefined.
+<recently read> }
+l. ... }
+TRUE
+|\a {\a {}b}|
+FALSE
+|\aaa |
+|a|
+FALSE
+|\test:w {ab}c|
+============================================================
More information about the latex3-commits
mailing list.