[latex3-commits] [git/LaTeX3-latex3-latex3] gh590: Implement quantifiers for \ur escape sequence (83fd60c15)

Bruno Le Floch blflatex at gmail.com
Mon Apr 26 16:39:33 CEST 2021


Repository : https://github.com/latex3/latex3
On branch  : gh590
Link       : https://github.com/latex3/latex3/commit/83fd60c154c3f8ec115aa35eecd87f313e0fd21f

>---------------------------------------------------------------

commit 83fd60c154c3f8ec115aa35eecd87f313e0fd21f
Author: Bruno Le Floch <blflatex at gmail.com>
Date:   Mon Apr 26 16:39:33 2021 +0200

    Implement quantifiers for \ur escape sequence


>---------------------------------------------------------------

83fd60c154c3f8ec115aa35eecd87f313e0fd21f
 l3kernel/l3regex.dtx              | 54 +++++++++++++++++++++++++++++++--------
 l3kernel/testfiles/m3regex007.lvt |  2 +-
 l3kernel/testfiles/m3regex007.tlg |  4 +--
 3 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 40dfb15f8..e0efe47f9 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -2351,6 +2351,24 @@
 %
 % \subsubsection{Quantifiers}
 %
+% \begin{macro}{\@@_compile_if_quantifier:TFw}
+%   This looks ahead and checks whether there are any quantifier
+%   (special character equal to either of \texttt{?+*\{}).  This is
+%   useful for the |\u| and |\ur| escape sequences.
+%    \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_if_quantifier:TFw #1#2#3#4
+  {
+    \token_if_eq_meaning:NNTF #3 \@@_compile_special:N
+      {
+        \cs_if_exist:cTF { @@_compile_quantifier_#4:w }
+          {#1}
+          { #2 #3 #4 }
+      }
+      { #2 #3 #4 }
+  }
+%    \end{macrocode}
+% \end{macro}
+%
 % \begin{macro}{\@@_compile_quantifier:w}
 %   This looks ahead and finds any quantifier (special character equal
 %   to either of \texttt{?+*\{}).
@@ -3402,24 +3420,38 @@
 %    \end{macrocode}
 % \end{macro}
 %
-% \begin{macro}{\@@_compile_ur_end:}
+% \begin{macro}{\@@_compile_ur_end:, \@@_compile_ur:n}
+% \begin{macro}[EXP]{\@@_compile_ur_aux:w}
 %   For the |\ur{...}| construction, once we have extracted the
-%   variable's name, we simply insert the compiled regex (which the
-%   variable should be) into a non-capturing group (with no repetition)
-%   to respect the structure of regexes.  It might be possible to omit
-%   this group perhaps.
+%   variable's name, we determine the compiled regex (passed as the
+%   argument of \cs{@@_compile_ur:n}).  If that has a single branch
+%   (namely \cs{tl_if_empty:oTF} is false) and there is no quantifier,
+%   then simply insert the contents of this branch (obtained by
+%   \cs{use_ii:nn}, which is expanded later).  In all other cases,
+%   insert a non-capturing group and look for quantifiers to determine
+%   the number of repetition etc.
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_compile_ur_end:
+  { \exp_args:Nv \@@_compile_ur:n { \l_@@_internal_a_tl } }
+\cs_new_protected:Npn \@@_compile_ur:n #1
   {
-    \tl_build_put_right:Nx \l_@@_build_tl
-      {
-        \exp_not:N \@@_group_no_capture:nnnN
-          { \exp_not:v { \l_@@_internal_a_tl } }
-          { 1 } { 0 } \exp_not:N \c_false_bool
-      }
+    \tl_if_empty:oTF { \@@_compile_ur_aux:w #1 {} ? ? \q_@@_nil }
+      { \@@_compile_if_quantifier:TFw }
+      { \use_i:nn }
+          {
+            \tl_build_put_right:Nn \l_@@_build_tl
+              {
+                \@@_group_no_capture:nnnN { \if_false: } \fi:
+                \exp_not:n {#1}
+              }
+            \@@_compile_quantifier:w
+          }
+          { \tl_build_put_right:Nn \l_@@_build_tl { \use_ii:nn #1 } }
   }
+\cs_new:Npn \@@_compile_ur_aux:w \@@_branch:n #1#2#3 \q_@@_nil {#2}
 %    \end{macrocode}
 % \end{macro}
+% \end{macro}
 %
 % \begin{macro}{\@@_compile_u_end:}
 %   Once we have extracted the variable's name, we store the contents of
diff --git a/l3kernel/testfiles/m3regex007.lvt b/l3kernel/testfiles/m3regex007.lvt
index fb1b65fb6..627ac02cb 100644
--- a/l3kernel/testfiles/m3regex007.lvt
+++ b/l3kernel/testfiles/m3regex007.lvt
@@ -23,7 +23,7 @@
     \regex_new:N \l_foo_regex
     \regex_set:Nn \l_foo_regex { \A a|b| }
     \regex_show:N \l_foo_regex
-    \regex_show:n { a \ur{l_foo_regex} b \c{\ur{l_foo_regex}|D} }
+    \regex_show:n { a \ur{l_foo_regex} b \c{\ur{l_foo_regex}{2,7}?|D} }
     \regex_show:n { a\c{bc}\u{c_space_tl}\c{\u{c_space_tl}|} }
     \tl_set:Nn \l_tmpa_tl { \abc }
     \int_set:Nn \l_tmpa_int { 7 }
diff --git a/l3kernel/testfiles/m3regex007.tlg b/l3kernel/testfiles/m3regex007.tlg
index 0fd7781fe..b0ca9a8f2 100644
--- a/l3kernel/testfiles/m3regex007.tlg
+++ b/l3kernel/testfiles/m3regex007.tlg
@@ -39,7 +39,7 @@ Defining \l_foo_regex on line ...
 +-branch.
 <recently read> }
 l. ...  }
-> Compiled regex {a\ur {l_foo_regex}b\c {\ur {l_foo_regex}|D}}:
+> Compiled regex {a\ur {l_foo_regex}b\c {\ur {l_foo_regex}{2,7}?|D}}:
 +-branch
   char code 97 (a)
   ,-group begin (no capture)
@@ -59,7 +59,7 @@ l. ...  }
       +-branch
       | char code 98 (b)
       +-branch
-      `-group end
+      `-group end, repeated between 2 and 7 times, lazy
     +-branch
       char code 68 (D).
 <recently read> }





More information about the latex3-commits mailing list.