[latex3-commits] [git/LaTeX3-latex3-latex3] main: Correct the validity check performed by \regex_show:N (fixes #1093) (c32e80231)

Bruno Le Floch blflatex at gmail.com
Fri Jun 24 14:55:34 CEST 2022


Repository : https://github.com/latex3/latex3
On branch  : main
Link       : https://github.com/latex3/latex3/commit/c32e80231fdbfeb06a7f769260a1c4dcbc62e288

>---------------------------------------------------------------

commit c32e80231fdbfeb06a7f769260a1c4dcbc62e288
Author: Bruno Le Floch <blflatex at gmail.com>
Date:   Fri Jun 24 14:55:34 2022 +0200

    Correct the validity check performed by \regex_show:N (fixes #1093)


>---------------------------------------------------------------

c32e80231fdbfeb06a7f769260a1c4dcbc62e288
 l3kernel/CHANGELOG.md             |  3 +++
 l3kernel/l3regex.dtx              | 40 ++++++++++++++++++++++++++++++++-------
 l3kernel/testfiles/m3regex007.lvt |  7 ++++++-
 l3kernel/testfiles/m3regex007.tlg | 32 +++++++++++++++++++++++++++++++
 4 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index 9595ed47f..7141b0226 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -7,6 +7,9 @@ this project uses date-based 'snapshot' version identifiers.
 
 ## [Unreleased]
 
+### Fixed
+- Correct validity check performed by `\regex_show:N` (gh/1093)
+
 ## [2022-06-16]
 
 ### Fixed
diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 7429b63cc..cb3d5fab8 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -2883,7 +2883,7 @@
     \cs_new_protected:cpx { @@_compile_/#2: }
       {
         \@@_compile_one:n
-          { \@@_item_reverse:n \exp_not:c { @@_prop_#1: } }
+          { \@@_item_reverse:n { \exp_not:c { @@_prop_#1: } } }
       }
   }
 \@@_tmp:w d D
@@ -3080,7 +3080,8 @@
 %   raw characters until hopefully reaching |:]|. If that's missing, or
 %   the \textsc{posix} class is unknown, abort. If all is right, add the
 %   test to the current class, with an extra \cs{@@_item_reverse:n}
-%   for negative classes.
+%   for negative classes (we make sure to wrap its argument in braces
+%   otherwise \cs{regex_show:N} would not recognize the regex as valid).
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_compile_class_posix_test:w #1#2
   {
@@ -3131,8 +3132,8 @@
           {
             \@@_compile_one:n
               {
-                \bool_if:NF \l_@@_internal_bool \@@_item_reverse:n
-                \exp_not:c { @@_posix_ \l_@@_internal_a_tl : }
+                \bool_if:NTF \l_@@_internal_bool \use:n \@@_item_reverse:n
+                { \exp_not:c { @@_posix_ \l_@@_internal_a_tl : } }
               }
           }
           {
@@ -3898,6 +3899,13 @@
   }
 \cs_new:Npn \@@_clean_class:n #1
   { \@@_clean_class_loop:nnn #1 ????? \prg_break_point: }
+%    \end{macrocode}
+% When cleaning a class there are many cases, including a dozen or so
+% like \cs{@@_prop_d:} or \cs{@@_posix_alpha:}.  To avoid listing all of
+% them we allow any command that starts with the
+% $13$ characters |__regex_prop_| or |__regex_posix| (handily these have
+% the same length, except for the trailing underscore).
+%    \begin{macrocode}
 \cs_new:Npn \@@_clean_class_loop:nnn #1#2#3
   {
     \tl_if_single:nF {#1} { \prg_break: }
@@ -3917,7 +3925,10 @@
             \@@_item_caseless_range:nn { }
             \@@_item_exact:nn { }
           }
-          { #1 { \@@_clean_int:n {#2} } { \@@_clean_int:n {#3} } }
+          {
+            #1 { \@@_clean_int:n {#2} } { \@@_clean_int:n {#3} }
+            \@@_clean_class_loop:nnn
+          }
           {
             \token_case_meaning:NnTF #1
               {
@@ -3928,7 +3939,22 @@
                 #1 { \@@_clean_int:n {#2} } { \@@_clean_class:n {#3} }
                 \@@_clean_class_loop:nnn
               }
-              { \prg_break: }
+              {
+                \exp_args:Nf \str_case:nnTF
+                  {
+                    \exp_args:Nf \str_range:nnn
+                      { \cs_to_str:N #1 } { 1 } { 13 }
+                  }
+                  {
+                    { @@_prop_ } { }
+                    { @@_posix } { }
+                  }
+                  {
+                    #1
+                    \@@_clean_class_loop:nnn {#2} {#3}
+                  }
+                  { \prg_break: }
+              }
           }
       }
   }
@@ -4922,7 +4948,7 @@
       \int_set_eq:NN \l_@@_curr_char_int \l_@@_last_char_int
       \@@_prop_w:
       \@@_break_point:TF
-        { \group_end: \@@_item_reverse:n \@@_prop_w: }
+        { \group_end: \@@_item_reverse:n { \@@_prop_w: } }
         { \group_end: \@@_prop_w: }
   }
 \cs_new_protected:Npn \@@_Z_test:
diff --git a/l3kernel/testfiles/m3regex007.lvt b/l3kernel/testfiles/m3regex007.lvt
index 1cde917a7..5604407c8 100644
--- a/l3kernel/testfiles/m3regex007.lvt
+++ b/l3kernel/testfiles/m3regex007.lvt
@@ -1,5 +1,5 @@
 %
-% Copyright (C) 2012,2016-2018,2021 The LaTeX Project
+% Copyright (C) 2012,2016-2018,2021,2022 The LaTeX Project
 %
 
 \documentclass{minimal}
@@ -32,6 +32,11 @@
     \regex_new:N \l_bar_regex
     \regex_show:N \l_bar_regex
     \regex_show:N \g_undefined_regex
+    \regex_set:Nn \l_tmpa_regex { \d | [a-z\W] . [[:alpha:][:^ascii:] \%] }
+    \regex_show:N \l_tmpa_regex
+    \regex_set:Nn \l_tmpa_regex { A }
+    \tl_put_right:Nn \l_tmpa_regex { X } % invalid on purpose
+    \regex_show:N \l_tmpa_regex
   }
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
diff --git a/l3kernel/testfiles/m3regex007.tlg b/l3kernel/testfiles/m3regex007.tlg
index dd7eda821..cd11a75c1 100644
--- a/l3kernel/testfiles/m3regex007.tlg
+++ b/l3kernel/testfiles/m3regex007.tlg
@@ -128,6 +128,38 @@ l. ...  }
 This is a coding error.
 LaTeX has been asked to show a variable \g_undefined_regex, but this has not
 been defined yet.
+> Compiled regex variable \l_tmpa_regex:
++-branch
+  range [48 (0),57 (9)]
++-branch
+  Match
+    range [97 (a),122 (z)]
+    Reversed match
+      range [97 (a),122 (z)]
+      range [65 (A),90 (Z)]
+      range [48 (0),57 (9)]
+      char code 95 (_)
+  any token
+  Match
+    range [97 (a),122 (z)]
+    range [65 (A),90 (Z)]
+    Reversed match
+      range [0,127]
+    char code 37 (%).
+<recently read> }
+l. ...  }
+! LaTeX3 Error: Variable '\l_tmpa_regex' is not a valid regex.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+This is a coding error.
+The variable '\l_tmpa_regex' with value
+    \__regex_branch:n {\__regex_class:NnnnN \c_true_bool
+    {\__regex_item_caseful_equal:n {65}}{1}{0}\c_false_bool }X
+should be a regex variable, but it does not have the correct internal
+structure:
+    \__regex_branch:n {\__regex_class:NnnnN \c_true_bool
+    {\__regex_item_caseful_equal:n {65}}{1}{0}\c_false_bool }
 ============================================================
 ============================================================
 TEST 2: regex_show again





More information about the latex3-commits mailing list.