[latex3-commits] [git/LaTeX3-latex3-latex3] gh590: Implement quantifiers for \u escape sequence in regex (see #885) (fb2f58208)

Bruno Le Floch blflatex at gmail.com
Mon Apr 26 18:13:24 CEST 2021


Repository : https://github.com/latex3/latex3
On branch  : gh590
Link       : https://github.com/latex3/latex3/commit/fb2f582084b638fb89f5fb20511624bbffb8fe29

>---------------------------------------------------------------

commit fb2f582084b638fb89f5fb20511624bbffb8fe29
Author: Bruno Le Floch <blflatex at gmail.com>
Date:   Mon Apr 26 18:13:02 2021 +0200

    Implement quantifiers for \u escape sequence in regex (see #885)


>---------------------------------------------------------------

fb2f582084b638fb89f5fb20511624bbffb8fe29
 l3kernel/l3regex.dtx              | 34 +++++++++++++++++++++++++---------
 l3kernel/testfiles/m3regex007.tlg |  5 +++--
 2 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index e0efe47f9..6e6b8f2db 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -2359,12 +2359,9 @@
 \cs_new_protected:Npn \@@_compile_if_quantifier:TFw #1#2#3#4
   {
     \token_if_eq_meaning:NNTF #3 \@@_compile_special:N
-      {
-        \cs_if_exist:cTF { @@_compile_quantifier_#4:w }
-          {#1}
-          { #2 #3 #4 }
-      }
-      { #2 #3 #4 }
+      { \cs_if_exist:cTF { @@_compile_quantifier_#4:w } }
+      { \use_ii:nn }
+    {#1} {#2} #3 #4
   }
 %    \end{macrocode}
 % \end{macro}
@@ -3453,13 +3450,32 @@
 % \end{macro}
 % \end{macro}
 %
-% \begin{macro}{\@@_compile_u_end:}
-%   Once we have extracted the variable's name, we store the contents of
-%   that variable in \cs{l_@@_internal_a_tl}. The behaviour of |\u|
+% \begin{macro}{\@@_compile_u_end:, \@@_compile_u_payload:}
+%   Once we have extracted the variable's name, we check for
+%   quantifiers, in which case we set up a non-capturing group with a
+%   single branch.  Inside this branch (we omit it and the group if
+%   there is no quantifier), \cs{@@_compile_u_payload:} puts
+%   the right tests corresponding to the contents of the variable, which
+%   we store in \cs{l_@@_internal_a_tl}. The behaviour of |\u|
 %   then depends on whether we are within a |\c{...}| escape (in this
 %   case, the variable is turned to a string), or not.
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_compile_u_end:
+  {
+    \@@_compile_if_quantifier:TFw
+      {
+        \tl_build_put_right:Nn \l_@@_build_tl
+          {
+            \@@_group_no_capture:nnnN { \if_false: } \fi:
+            \@@_branch:n { \if_false: } \fi:
+          }
+        \@@_compile_u_payload:
+        \tl_build_put_right:Nn \l_@@_build_tl { \if_false: { \fi: } }
+        \@@_compile_quantifier:w
+      }
+      { \@@_compile_u_payload: }
+  }
+\cs_new_protected:Npn \@@_compile_u_payload:
   {
     \tl_set:Nv \l_@@_internal_a_tl { \l_@@_internal_a_tl }
     \if_int_compare:w \l_@@_mode_int = \c_@@_outer_mode_int
diff --git a/l3kernel/testfiles/m3regex007.tlg b/l3kernel/testfiles/m3regex007.tlg
index b0ca9a8f2..d889779b6 100644
--- a/l3kernel/testfiles/m3regex007.tlg
+++ b/l3kernel/testfiles/m3regex007.tlg
@@ -75,8 +75,9 @@ l. ...  }
 > Compiled regex {\u {l_tmpa_tl}*\c {\u {l_tmpa_tl}|(?:\u
 {l_tmpa_int})?}{3}|y*}:
 +-branch
-  control sequence \abc
-  char code 42 (*)
+  ,-group begin (no capture)
+  | control sequence \abc
+  `-group end, repeated 0 or more times, greedy
   Match, repeated 3 times
     control sequence
     +-branch





More information about the latex3-commits mailing list.