[latex3-commits] [git/LaTeX3-latex3-latex3] gh433-regex-case: Correct computation of brace balance in regex_case functions (see #433) (911fedf33)

Bruno Le Floch blflatex at gmail.com
Sun May 16 22:01:02 CEST 2021


Repository : https://github.com/latex3/latex3
On branch  : gh433-regex-case
Link       : https://github.com/latex3/latex3/commit/911fedf33f94a59b210b81ac392f27460b633b9d

>---------------------------------------------------------------

commit 911fedf33f94a59b210b81ac392f27460b633b9d
Author: Bruno Le Floch <blflatex at gmail.com>
Date:   Sun May 16 22:01:02 2021 +0200

    Correct computation of brace balance in regex_case functions (see #433)


>---------------------------------------------------------------

911fedf33f94a59b210b81ac392f27460b633b9d
 l3kernel/l3regex.dtx              | 45 ++++++++++++++++++++++-----------------
 l3kernel/testfiles/m3regex012.lvt | 16 ++++++++++++++
 l3kernel/testfiles/m3regex012.tlg | 38 +++++++++++++++++++++++++++++++++
 3 files changed, 80 insertions(+), 19 deletions(-)

diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 345310e4f..e7aa9614d 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -874,7 +874,7 @@
 %     \texttt{curr_state} and \texttt{curr_submatches}.
 %   \item If possible, when a state is reused by the same thread, kill
 %     other subthreads.
-%   \item Use an array rather than \cs[no-index]{l__regex_balance_tl}
+%   \item Use an array rather than \cs[no-index]{g__regex_balance_tl}
 %     to build the function \cs[no-index]{__regex_replacement_balance_one_match:n}.
 %   \item Reduce the number of epsilon-transitions in alternatives.
 %   \item Optimize simple strings: use less states (|abcade| should give
@@ -5627,12 +5627,12 @@
 %    \end{macrocode}
 % \end{variable}
 %
-% \begin{variable}{\l_@@_balance_tl}
+% \begin{variable}{\g_@@_balance_tl}
 %   This token list holds the replacement text for
 %   \cs{@@_replacement_balance_one_match:n} while it is being built
 %   incrementally.
 %    \begin{macrocode}
-\tl_new:N \l_@@_balance_tl
+\tl_new:N \g_@@_balance_tl
 %    \end{macrocode}
 % \end{variable}
 %
@@ -5779,7 +5779,7 @@
 % \begin{macro}{\@@_replacement_apply:Nn, \@@_replacement_set:n}
 %   The replacement text is built incrementally. We keep track in
 %   \cs{l_@@_balance_int} of the balance of explicit begin- and
-%   end-group tokens and we store in \cs{l_@@_balance_tl} some
+%   end-group tokens and we store in \cs{g_@@_balance_tl} some
 %   code to compute the brace balance from submatches (see its
 %   description). Detect unescaped right braces, and escaped characters,
 %   with trailing \cs{prg_do_nothing:} because some of the later
@@ -5794,7 +5794,7 @@
     \group_begin:
       \tl_build_begin:N \l_@@_build_tl
       \int_zero:N \l_@@_balance_int
-      \tl_clear:N \l_@@_balance_tl
+      \tl_gclear:N \g_@@_balance_tl
       \@@_escape_use:nnnn
         {
           \if_charcode:w \c_right_brace_str ##1
@@ -5824,12 +5824,8 @@
             { \seq_count:N \l_@@_replacement_category_seq }
           \seq_clear:N \l_@@_replacement_category_seq
         }
-      \cs_gset:Npx \@@_replacement_balance_one_match:n ##1
-        {
-          + \int_use:N \l_@@_balance_int
-          \l_@@_balance_tl
-          - \@@_submatch_balance:n {##1}
-        }
+      \tl_gput_right:Nx \g_@@_balance_tl
+        { + \int_use:N \l_@@_balance_int }
       \tl_build_end:N \l_@@_build_tl
       \exp_args:NNo
     \group_end:
@@ -5851,6 +5847,12 @@
           }
         #1
       }
+    \exp_args:Nno \use:n
+      { \cs_gset:Npn \@@_replacement_balance_one_match:n ##1 }
+      {
+        \g_@@_balance_tl
+        - \@@_submatch_balance:n {##1}
+      }
   }
 %    \end{macrocode}
 % \end{macro}
@@ -5859,22 +5861,30 @@
 % \begin{macro}{\@@_case_replacement:n, \@@_case_replacement:x}
 %    \begin{macrocode}
 \tl_new:N \g_@@_case_replacement_tl
+\tl_new:N \g_@@_case_balance_tl
 \cs_new_protected:Npn \@@_case_replacement:n #1
   {
-    \tl_gset:Nn \g_@@_case_replacement_tl
+    \tl_gset:Nn \g_@@_case_balance_tl
       {
         \if_case:w
           \__kernel_intarray_item:Nn
             \g_@@_submatch_case_intarray {##1}
       }
+    \tl_gset_eq:NN \g_@@_case_replacement_tl \g_@@_case_balance_tl
     \tl_map_tokens:nn {#1}
       { \@@_replacement_apply:Nn \@@_case_replacement_aux:n }
+    \tl_gset:No \g_@@_balance_tl
+      { \g_@@_case_balance_tl \fi: }
     \exp_args:No \@@_replacement_set:n
       { \g_@@_case_replacement_tl \fi: }
   }
 \cs_generate_variant:Nn \@@_case_replacement:n { x }
 \cs_new_protected:Npn \@@_case_replacement_aux:n #1
-  { \tl_gput_right:Nn \g_@@_case_replacement_tl { \or: #1 } }
+  {
+    \tl_gput_right:Nn \g_@@_case_replacement_tl { \or: #1 }
+    \tl_gput_right:No \g_@@_case_balance_tl
+      { \exp_after:wN \or: \g_@@_balance_tl }
+  }
 %    \end{macrocode}
 % \end{macro}
 %
@@ -5978,7 +5988,7 @@
 %   construction, it must be taken into account in the brace balance.
 %   Later on, |##1| will be replaced by a pointer to the $0$-th submatch for a
 %   given match.  There is an \cs{exp_not:N} here as at the point-of-use
-%   of \cs{l_@@_balance_tl} there is an \texttt{x}-type expansion which is needed
+%   of \cs{g_@@_balance_tl} there is an \texttt{x}-type expansion which is needed
 %   to get |##1| in correctly.
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_replacement_put_submatch:n #1
@@ -5995,11 +6005,8 @@
     \tl_build_put_right:Nn \l_@@_build_tl
       { \@@_query_submatch:n { \int_eval:n { #1 + ##1 } } }
     \if_int_compare:w \l_@@_replacement_csnames_int = 0 \exp_stop_f:
-      \tl_put_right:Nn \l_@@_balance_tl
-        {
-          + \@@_submatch_balance:n
-            { \exp_not:N \int_eval:n { #1 + ##1 } }
-        }
+      \tl_gput_right:Nn \g_@@_balance_tl
+        { + \@@_submatch_balance:n { \int_eval:n { #1 + ##1 } } }
     \fi:
   }
 %    \end{macrocode}
diff --git a/l3kernel/testfiles/m3regex012.lvt b/l3kernel/testfiles/m3regex012.lvt
index 49023a0a5..e0187f90b 100644
--- a/l3kernel/testfiles/m3regex012.lvt
+++ b/l3kernel/testfiles/m3regex012.lvt
@@ -107,6 +107,22 @@
         { . } { [\0] }
       } \l_tmpa_tl
       { \TYPE { \l_tmpa_tl } } { \ERROR }
+    \tl_set:Nn \l_tmpa_tl { a ( b ( c ) d ( ) ) e }
+    \regex_case_replace_all:nN
+      {
+        { \( } { \{ }
+        { \) } { \} }
+      }
+      \l_tmpa_tl
+    \tl_analysis_log:N \l_tmpa_tl
+    \tl_set:Nn \l_tmpa_tl { a { } b | { | | } | | e }
+    \regex_case_replace_all:nN
+      {
+        { \| } { \} }
+        { . (.) } { \0\1 }
+      }
+      \l_tmpa_tl
+    \tl_analysis_log:N \l_tmpa_tl
   }
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
diff --git a/l3kernel/testfiles/m3regex012.tlg b/l3kernel/testfiles/m3regex012.tlg
index 06988b349..5e35c3df2 100644
--- a/l3kernel/testfiles/m3regex012.tlg
+++ b/l3kernel/testfiles/m3regex012.tlg
@@ -47,6 +47,44 @@ YabcYYYz => [Y,Ya,a]bc[Y,YY,Y][Y,Yz,z]
 yabcbc => [Y,ya,a]b(abc,cbc,)
 ``Hello''---[,][ ]``world''---[!]
 ---``Hello''---[,][ ]---``world''---[!]
+The token list \l_tmpa_tl contains the tokens:
+>  a (the letter a)
+>  { (begin-group character {)
+>  b (the letter b)
+>  { (begin-group character {)
+>  c (the letter c)
+>  } (end-group character })
+>  d (the letter d)
+>  { (begin-group character {)
+>  } (end-group character })
+>  } (end-group character })
+>  e (the letter e)
+! LaTeX3 Error: Missing brace inserted when replacing.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+LaTeX was asked to do some regular expression operation, and the resulting
+token list would not have the same number of begin-group and end-group tokens.
+Braces were inserted: 2 left, 0 right.
+The token list \l_tmpa_tl contains the tokens:
+>  { (begin-group character {)
+>  { (begin-group character {)
+>  a (the letter a)
+>  { (begin-group character {)
+>  { (begin-group character {)
+>  } (end-group character })
+>  b (the letter b)
+>  b (the letter b)
+>  } (end-group character })
+>  { (begin-group character {)
+>  | (the character |)
+>  | (the character |)
+>  } (end-group character })
+>  } (end-group character })
+>  | (the character |)
+>  | (the character |)
+>  } (end-group character })
+>  e (the letter e)
 ============================================================
 ============================================================
 TEST 4: regex_case errors





More information about the latex3-commits mailing list.