[latex3-commits] [git/LaTeX3-latex3-latex3] main: Refine documentation of \u in l3regex (1de0d8734)

Bruno Le Floch blflatex at gmail.com
Fri Apr 23 22:24:41 CEST 2021


Repository : https://github.com/latex3/latex3
On branch  : main
Link       : https://github.com/latex3/latex3/commit/1de0d8734e6bc26e0d98e05b0ecdebb57b6aebb3

>---------------------------------------------------------------

commit 1de0d8734e6bc26e0d98e05b0ecdebb57b6aebb3
Author: Bruno Le Floch <blflatex at gmail.com>
Date:   Fri Apr 23 22:20:32 2021 +0200

    Refine documentation of \u in l3regex
    
    Document that \u allows any variable, not just tl.  Remove the idea
    of allowing \u as the number of repetitions of something (in the
    sense that "a{2,\u{l_my_int}}" could denote repeating a from 2 to
    \l_my_int times) because this can be done more cleanly by
    x-expanding "a{2,\int_use:N\l_my_int}" before using it as a regex.


>---------------------------------------------------------------

1de0d8734e6bc26e0d98e05b0ecdebb57b6aebb3
 l3kernel/l3regex.dtx              | 11 ++++++-----
 l3kernel/testfiles/m3regex007.lvt |  3 ++-
 l3kernel/testfiles/m3regex007.tlg |  7 +++++--
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 375b92cb4..5569afdd8 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -328,11 +328,14 @@
 %
 % The |\u| escape sequence allows to insert the contents of a token list
 % directly into a regular expression or a replacement, avoiding the need
-% to escape special characters. Namely, |\u|\Arg{tl~var~name} matches
-% the exact contents of the token list \meta{tl~var}. Within a |\c{...}|
+% to escape special characters. Namely, |\u|\Arg{var~name} matches
+% the exact contents of the variable \cs[no-index]{\meta{var~name}},
+% which are obtained by applying \cs{exp_not:v} \Arg{var~name} at the
+% time the regular expression is compiled. Within a |\c{...}|
 % control sequence matching, the |\u| escape sequence only expands its
 % argument once, in effect performing \cs{tl_to_str:v}. Quantifiers are
-% not supported directly: use a group.
+% not supported directly: use a group, for instance as in
+% |(?:\u|\Arg{var~name}|){2,4}|.
 %
 % The option |(?i)| makes the match case insensitive (identifying
 % \texttt{A}--\texttt{Z} with \texttt{a}--\texttt{z}; no Unicode support
@@ -756,8 +759,6 @@
 %   \item Provide a syntax such as |\ur{l_my_regex}| to use an
 %     already-compiled regex in a more complicated regex.  This makes
 %     regexes more easily composable.
-%   \item Allowing |\u{l_my_tl}| in more places, for instance as the
-%     number of repetitions in a quantifier.
 % \end{itemize}
 %
 % The following features of \textsc{pcre} or Perl may or may not be
diff --git a/l3kernel/testfiles/m3regex007.lvt b/l3kernel/testfiles/m3regex007.lvt
index 2b359fd91..7e6c4bc5b 100644
--- a/l3kernel/testfiles/m3regex007.lvt
+++ b/l3kernel/testfiles/m3regex007.lvt
@@ -25,7 +25,8 @@
     \regex_show:N \l_foo_regex
     \regex_show:n { a\c{bc}\u{c_space_tl}\c{\u{c_space_tl}|} }
     \tl_set:Nn \l_tmpa_tl { \abc }
-    \regex_show:n { \u{l_tmpa_tl}* \c{\u{l_tmpa_tl}|x?}{3}|y* }
+    \int_set:Nn \l_tmpa_int { 7 }
+    \regex_show:n { \u{l_tmpa_tl}* \c{\u{l_tmpa_tl}|(?:\u{l_tmpa_int})?}{3}|y* }
     \regex_show:n { a(?: bc (?| de | f ){2} g | hi ){3,4}? }
     \regex_new:N \l_bar_regex
     \regex_show:N \l_bar_regex
diff --git a/l3kernel/testfiles/m3regex007.tlg b/l3kernel/testfiles/m3regex007.tlg
index ef8aec286..b797aa26f 100644
--- a/l3kernel/testfiles/m3regex007.tlg
+++ b/l3kernel/testfiles/m3regex007.tlg
@@ -47,7 +47,8 @@ l. ...  }
   control sequence \  or \.
 <recently read> }
 l. ...  }
-> Compiled regex {\u {l_tmpa_tl}*\c {\u {l_tmpa_tl}|x?}{3}|y*}:
+> Compiled regex {\u {l_tmpa_tl}*\c {\u {l_tmpa_tl}|(?:\u
+{l_tmpa_int})?}{3}|y*}:
 +-branch
   control sequence \abc
   char code 42
@@ -60,7 +61,9 @@ l. ...  }
       char code 99
       char code 32
     +-branch
-      char code 120, repeated between 0 and 1 times, greedy
+      ,-group begin (no capture)
+      | char code 55
+      `-group end, repeated between 0 and 1 times, greedy
 +-branch
   char code 121, repeated 0 or more times, greedy.
 <recently read> }





More information about the latex3-commits mailing list.