[latex3-commits] [git/LaTeX3-latex3-latex3] gh433-regex-case: Correct computation of brace balance in regex_case functions (see #433) (911fedf33)
Bruno Le Floch
blflatex at gmail.com
Sun May 16 22:01:02 CEST 2021
Repository : https://github.com/latex3/latex3
On branch : gh433-regex-case
Link : https://github.com/latex3/latex3/commit/911fedf33f94a59b210b81ac392f27460b633b9d
>---------------------------------------------------------------
commit 911fedf33f94a59b210b81ac392f27460b633b9d
Author: Bruno Le Floch <blflatex at gmail.com>
Date: Sun May 16 22:01:02 2021 +0200
Correct computation of brace balance in regex_case functions (see #433)
>---------------------------------------------------------------
911fedf33f94a59b210b81ac392f27460b633b9d
l3kernel/l3regex.dtx | 45 ++++++++++++++++++++++-----------------
l3kernel/testfiles/m3regex012.lvt | 16 ++++++++++++++
l3kernel/testfiles/m3regex012.tlg | 38 +++++++++++++++++++++++++++++++++
3 files changed, 80 insertions(+), 19 deletions(-)
diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 345310e4f..e7aa9614d 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -874,7 +874,7 @@
% \texttt{curr_state} and \texttt{curr_submatches}.
% \item If possible, when a state is reused by the same thread, kill
% other subthreads.
-% \item Use an array rather than \cs[no-index]{l__regex_balance_tl}
+% \item Use an array rather than \cs[no-index]{g__regex_balance_tl}
% to build the function \cs[no-index]{__regex_replacement_balance_one_match:n}.
% \item Reduce the number of epsilon-transitions in alternatives.
% \item Optimize simple strings: use less states (|abcade| should give
@@ -5627,12 +5627,12 @@
% \end{macrocode}
% \end{variable}
%
-% \begin{variable}{\l_@@_balance_tl}
+% \begin{variable}{\g_@@_balance_tl}
% This token list holds the replacement text for
% \cs{@@_replacement_balance_one_match:n} while it is being built
% incrementally.
% \begin{macrocode}
-\tl_new:N \l_@@_balance_tl
+\tl_new:N \g_@@_balance_tl
% \end{macrocode}
% \end{variable}
%
@@ -5779,7 +5779,7 @@
% \begin{macro}{\@@_replacement_apply:Nn, \@@_replacement_set:n}
% The replacement text is built incrementally. We keep track in
% \cs{l_@@_balance_int} of the balance of explicit begin- and
-% end-group tokens and we store in \cs{l_@@_balance_tl} some
+% end-group tokens and we store in \cs{g_@@_balance_tl} some
% code to compute the brace balance from submatches (see its
% description). Detect unescaped right braces, and escaped characters,
% with trailing \cs{prg_do_nothing:} because some of the later
@@ -5794,7 +5794,7 @@
\group_begin:
\tl_build_begin:N \l_@@_build_tl
\int_zero:N \l_@@_balance_int
- \tl_clear:N \l_@@_balance_tl
+ \tl_gclear:N \g_@@_balance_tl
\@@_escape_use:nnnn
{
\if_charcode:w \c_right_brace_str ##1
@@ -5824,12 +5824,8 @@
{ \seq_count:N \l_@@_replacement_category_seq }
\seq_clear:N \l_@@_replacement_category_seq
}
- \cs_gset:Npx \@@_replacement_balance_one_match:n ##1
- {
- + \int_use:N \l_@@_balance_int
- \l_@@_balance_tl
- - \@@_submatch_balance:n {##1}
- }
+ \tl_gput_right:Nx \g_@@_balance_tl
+ { + \int_use:N \l_@@_balance_int }
\tl_build_end:N \l_@@_build_tl
\exp_args:NNo
\group_end:
@@ -5851,6 +5847,12 @@
}
#1
}
+ \exp_args:Nno \use:n
+ { \cs_gset:Npn \@@_replacement_balance_one_match:n ##1 }
+ {
+ \g_@@_balance_tl
+ - \@@_submatch_balance:n {##1}
+ }
}
% \end{macrocode}
% \end{macro}
@@ -5859,22 +5861,30 @@
% \begin{macro}{\@@_case_replacement:n, \@@_case_replacement:x}
% \begin{macrocode}
\tl_new:N \g_@@_case_replacement_tl
+\tl_new:N \g_@@_case_balance_tl
\cs_new_protected:Npn \@@_case_replacement:n #1
{
- \tl_gset:Nn \g_@@_case_replacement_tl
+ \tl_gset:Nn \g_@@_case_balance_tl
{
\if_case:w
\__kernel_intarray_item:Nn
\g_@@_submatch_case_intarray {##1}
}
+ \tl_gset_eq:NN \g_@@_case_replacement_tl \g_@@_case_balance_tl
\tl_map_tokens:nn {#1}
{ \@@_replacement_apply:Nn \@@_case_replacement_aux:n }
+ \tl_gset:No \g_@@_balance_tl
+ { \g_@@_case_balance_tl \fi: }
\exp_args:No \@@_replacement_set:n
{ \g_@@_case_replacement_tl \fi: }
}
\cs_generate_variant:Nn \@@_case_replacement:n { x }
\cs_new_protected:Npn \@@_case_replacement_aux:n #1
- { \tl_gput_right:Nn \g_@@_case_replacement_tl { \or: #1 } }
+ {
+ \tl_gput_right:Nn \g_@@_case_replacement_tl { \or: #1 }
+ \tl_gput_right:No \g_@@_case_balance_tl
+ { \exp_after:wN \or: \g_@@_balance_tl }
+ }
% \end{macrocode}
% \end{macro}
%
@@ -5978,7 +5988,7 @@
% construction, it must be taken into account in the brace balance.
% Later on, |##1| will be replaced by a pointer to the $0$-th submatch for a
% given match. There is an \cs{exp_not:N} here as at the point-of-use
-% of \cs{l_@@_balance_tl} there is an \texttt{x}-type expansion which is needed
+% of \cs{g_@@_balance_tl} there is an \texttt{x}-type expansion which is needed
% to get |##1| in correctly.
% \begin{macrocode}
\cs_new_protected:Npn \@@_replacement_put_submatch:n #1
@@ -5995,11 +6005,8 @@
\tl_build_put_right:Nn \l_@@_build_tl
{ \@@_query_submatch:n { \int_eval:n { #1 + ##1 } } }
\if_int_compare:w \l_@@_replacement_csnames_int = 0 \exp_stop_f:
- \tl_put_right:Nn \l_@@_balance_tl
- {
- + \@@_submatch_balance:n
- { \exp_not:N \int_eval:n { #1 + ##1 } }
- }
+ \tl_gput_right:Nn \g_@@_balance_tl
+ { + \@@_submatch_balance:n { \int_eval:n { #1 + ##1 } } }
\fi:
}
% \end{macrocode}
diff --git a/l3kernel/testfiles/m3regex012.lvt b/l3kernel/testfiles/m3regex012.lvt
index 49023a0a5..e0187f90b 100644
--- a/l3kernel/testfiles/m3regex012.lvt
+++ b/l3kernel/testfiles/m3regex012.lvt
@@ -107,6 +107,22 @@
{ . } { [\0] }
} \l_tmpa_tl
{ \TYPE { \l_tmpa_tl } } { \ERROR }
+ \tl_set:Nn \l_tmpa_tl { a ( b ( c ) d ( ) ) e }
+ \regex_case_replace_all:nN
+ {
+ { \( } { \{ }
+ { \) } { \} }
+ }
+ \l_tmpa_tl
+ \tl_analysis_log:N \l_tmpa_tl
+ \tl_set:Nn \l_tmpa_tl { a { } b | { | | } | | e }
+ \regex_case_replace_all:nN
+ {
+ { \| } { \} }
+ { . (.) } { \0\1 }
+ }
+ \l_tmpa_tl
+ \tl_analysis_log:N \l_tmpa_tl
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
diff --git a/l3kernel/testfiles/m3regex012.tlg b/l3kernel/testfiles/m3regex012.tlg
index 06988b349..5e35c3df2 100644
--- a/l3kernel/testfiles/m3regex012.tlg
+++ b/l3kernel/testfiles/m3regex012.tlg
@@ -47,6 +47,44 @@ YabcYYYz => [Y,Ya,a]bc[Y,YY,Y][Y,Yz,z]
yabcbc => [Y,ya,a]b(abc,cbc,)
``Hello''---[,][ ]``world''---[!]
---``Hello''---[,][ ]---``world''---[!]
+The token list \l_tmpa_tl contains the tokens:
+> a (the letter a)
+> { (begin-group character {)
+> b (the letter b)
+> { (begin-group character {)
+> c (the letter c)
+> } (end-group character })
+> d (the letter d)
+> { (begin-group character {)
+> } (end-group character })
+> } (end-group character })
+> e (the letter e)
+! LaTeX3 Error: Missing brace inserted when replacing.
+For immediate help type H <return>.
+ ...
+l. ... }
+LaTeX was asked to do some regular expression operation, and the resulting
+token list would not have the same number of begin-group and end-group tokens.
+Braces were inserted: 2 left, 0 right.
+The token list \l_tmpa_tl contains the tokens:
+> { (begin-group character {)
+> { (begin-group character {)
+> a (the letter a)
+> { (begin-group character {)
+> { (begin-group character {)
+> } (end-group character })
+> b (the letter b)
+> b (the letter b)
+> } (end-group character })
+> { (begin-group character {)
+> | (the character |)
+> | (the character |)
+> } (end-group character })
+> } (end-group character })
+> | (the character |)
+> | (the character |)
+> } (end-group character })
+> e (the letter e)
============================================================
============================================================
TEST 4: regex_case errors
More information about the latex3-commits
mailing list.