[latex3-commits] [git/LaTeX3-latex3-latex3] main: Suppress capturing groups in composed regex (see #590) (66f9319dc)

Bruno Le Floch blflatex at gmail.com
Tue Apr 27 15:56:19 CEST 2021


Repository : https://github.com/latex3/latex3
On branch  : main
Link       : https://github.com/latex3/latex3/commit/66f9319dca28388f7b17130960a2307b49c6b187

>---------------------------------------------------------------

commit 66f9319dca28388f7b17130960a2307b49c6b187
Author: Bruno Le Floch <blflatex at gmail.com>
Date:   Tue Apr 27 10:39:13 2021 +0200

    Suppress capturing groups in composed regex (see #590)
    
    Previously, if the regex inserted with \ur included capturing groups,
    then these were showing up as groups in the larger regex, thus shifing
    all subsequent groups: for instance in "\ur{l_tmpa_regex}a(b?)" the
    group number corresponding to "(b?)" would depend on the regex
    \l_tmpa_regex.  Now all groups in the inserted regex are treated as
    non-capturing groups and do not shift the group number.  We could
    presumably add an option like \urg or \ur[capture] to keep capturing
    the groups in the inserted regex.


>---------------------------------------------------------------

66f9319dca28388f7b17130960a2307b49c6b187
 l3kernel/l3regex.dtx                     | 24 ++++++++++-------
 l3kernel/testfiles/m3regex005.luatex.tlg | 44 ++++++++++++++++++++------------
 l3kernel/testfiles/m3regex005.lvt        |  4 +--
 l3kernel/testfiles/m3regex005.tlg        | 44 ++++++++++++++++++++------------
 l3kernel/testfiles/m3regex005.xetex.tlg  | 44 ++++++++++++++++++++------------
 5 files changed, 101 insertions(+), 59 deletions(-)

diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index da63a0c45..76a7947b4 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -370,8 +370,9 @@
 % |A\ur{l_tmpa_regex}D| matches the tokens |A| and |D| separated by
 % something that matches the regular expression
 % \cs[no-index]{l_tmpa_regex}.  This behaves as if a non-capturing group
-% were surrounding \cs[no-index]{l_tmpa_regex} (thus quantifiers are
-% supported).
+% were surrounding \cs[no-index]{l_tmpa_regex}, and any group contained
+% in \cs[no-index]{l_tmpa_regex} is converted to a non-capturing group.
+% Quantifiers are supported.
 %
 % For instance, if \cs[no-index]{l_tmpa_regex} has value \verb"B|C",
 % then |A\ur{l_tmpa_regex}D| is equivalent to \verb"A(?:B|C)D" (matching
@@ -3371,7 +3372,7 @@
 % \end{macro}
 % \end{macro}
 %
-% \subsubsection{Raw token lists with \cs{u}}
+% \subsubsection{Raw token lists with \cs[no-index]{u}}
 %
 % \begin{macro}{\@@_compile_/u:}
 %   The |\u| escape is invalid in classes and directly following a
@@ -3448,7 +3449,8 @@
 % \begin{macro}{\@@_compile_ur_end:, \@@_compile_ur:n}
 % \begin{macro}[EXP]{\@@_compile_ur_aux:w}
 %   For the |\ur{...}| construction, once we have extracted the
-%   variable's name, we determine the compiled regex (passed as the
+%   variable's name, we replace all groups by non-capturing groups in
+%   the compiled regex (passed as the
 %   argument of \cs{@@_compile_ur:n}).  If that has a single branch
 %   (namely \cs{tl_if_empty:oTF} is false) and there is no quantifier,
 %   then simply insert the contents of this branch (obtained by
@@ -3457,7 +3459,14 @@
 %   the number of repetition etc.
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_compile_ur_end:
-  { \exp_args:Nv \@@_compile_ur:n { \l_@@_internal_a_tl } }
+  {
+    \group_begin:
+      \cs_set:Npn \@@_group:nnnN { \@@_group_no_capture:nnnN }
+      \cs_set:Npn \@@_group_resetting:nnnN { \@@_group_no_capture:nnnN }
+      \exp_args:NNx
+    \group_end:
+    \@@_compile_ur:n { \use:c { \l_@@_internal_a_tl } }
+  }
 \cs_new_protected:Npn \@@_compile_ur:n #1
   {
     \tl_if_empty:oTF { \@@_compile_ur_aux:w #1 {} ? ? \q_@@_nil }
@@ -3465,10 +3474,7 @@
       { \use_i:nn }
           {
             \tl_build_put_right:Nn \l_@@_build_tl
-              {
-                \@@_group_no_capture:nnnN { \if_false: } \fi:
-                \exp_not:n {#1}
-              }
+              { \@@_group_no_capture:nnnN { \if_false: } \fi: #1 }
             \@@_compile_quantifier:w
           }
           { \tl_build_put_right:Nn \l_@@_build_tl { \use_ii:nn #1 } }
diff --git a/l3kernel/testfiles/m3regex005.luatex.tlg b/l3kernel/testfiles/m3regex005.luatex.tlg
index ca4479677..e658839c8 100644
--- a/l3kernel/testfiles/m3regex005.luatex.tlg
+++ b/l3kernel/testfiles/m3regex005.luatex.tlg
@@ -369,19 +369,25 @@ l. ...  }
 +-branch
   ,-group begin (no capture)
   | char code 97 (a)
-  | ,-group begin
+  | ,-group begin (no capture)
   | | char code 65 (A)
   | `-group end
   `-group end, repeated 0 or more times, greedy
-  char code 66 (B)
-  char code 97 (a)
   ,-group begin
-  | char code 65 (A)
-  `-group end
-  char code 67 (C).
+  | ,-group begin
+  | | char code 66 (B)
+  | `-group end
+  | char code 97 (a)
+  | ,-group begin (no capture)
+  | | char code 65 (A)
+  | `-group end
+  | ,-group begin
+  | | char code 67 (C)
+  | `-group end
+  `-group end.
 <recently read> }
 l. ...  }
-> \l_tmpa_tl=(aAaAaABaAC,A,A,)aABC(BaAC,,A,)BBC.
+> \l_tmpa_tl=(aAaAaABaAC,BaAC,B,C)aABC(BaAC,BaAC,B,C)BBC.
 <recently read> }
 l. ...  }
 ============================================================
@@ -390,22 +396,28 @@ l. ...  }
   ,-group begin (no capture)
   | char code 97 (a)
   +-branch
-  | ,-group begin
+  | ,-group begin (no capture)
   | | char code 65 (A)
   | `-group end
   `-group end, repeated between 0 and 2 times, greedy
-  char code 66 (B)
-  ,-group begin (no capture)
-  | char code 97 (a)
-  +-branch
+  ,-group begin
   | ,-group begin
-  | | char code 65 (A)
+  | | char code 66 (B)
+  | `-group end
+  | ,-group begin (no capture)
+  | | char code 97 (a)
+  | +-branch
+  | | ,-group begin (no capture)
+  | | | char code 65 (A)
+  | | `-group end
+  | `-group end
+  | ,-group begin
+  | | char code 67 (C)
   | `-group end
-  `-group end
-  char code 67 (C).
+  `-group end.
 <recently read> }
 l. ...  }
-> \l_tmpa_tl=aAaA(aABAC,A,A,)aABC(BaC,,,)BBaAC.
+> \l_tmpa_tl=aAaA(aABAC,BAC,B,C)aABC(BaC,BaC,B,C)BBaAC.
 <recently read> }
 l. ...  }
 ============================================================
diff --git a/l3kernel/testfiles/m3regex005.lvt b/l3kernel/testfiles/m3regex005.lvt
index f1ea56fb4..b276fea3a 100644
--- a/l3kernel/testfiles/m3regex005.lvt
+++ b/l3kernel/testfiles/m3regex005.lvt
@@ -200,7 +200,7 @@
     %
     \SEPARATOR
     \regex_set:Nn \l_tmpa_regex { a(A) }
-    \regex_set:Nn \l_tmpb_regex { \ur{l_tmpa_regex}* B \ur{l_tmpa_regex} C }
+    \regex_set:Nn \l_tmpb_regex { \ur{l_tmpa_regex}* ((B) \ur{l_tmpa_regex} (C)) }
     \regex_show:N \l_tmpb_regex
     \tl_set:Nn \l_tmpa_tl { aAaAaABaAC  aABC  BaAC BBC }
     \regex_replace_all:NnN \l_tmpb_regex { (\0,\1,\2,\3) } \l_tmpa_tl
@@ -208,7 +208,7 @@
     %
     \SEPARATOR
     \regex_set:Nn \l_tmpa_regex { a|(A) }
-    \regex_set:Nn \l_tmpb_regex { \ur{l_tmpa_regex}{0,2} B \ur{l_tmpa_regex} C }
+    \regex_set:Nn \l_tmpb_regex { \ur{l_tmpa_regex}{0,2} ((B) \ur{l_tmpa_regex} (C)) }
     \regex_show:N \l_tmpb_regex
     \tl_set:Nn \l_tmpa_tl { aAaAaABAC  aABC  BaC BBaAC }
     \regex_replace_all:NnN \l_tmpb_regex { (\0,\1,\2,\3) } \l_tmpa_tl
diff --git a/l3kernel/testfiles/m3regex005.tlg b/l3kernel/testfiles/m3regex005.tlg
index 7d36a252b..9969f12a5 100644
--- a/l3kernel/testfiles/m3regex005.tlg
+++ b/l3kernel/testfiles/m3regex005.tlg
@@ -369,19 +369,25 @@ l. ...  }
 +-branch
   ,-group begin (no capture)
   | char code 97 (a)
-  | ,-group begin
+  | ,-group begin (no capture)
   | | char code 65 (A)
   | `-group end
   `-group end, repeated 0 or more times, greedy
-  char code 66 (B)
-  char code 97 (a)
   ,-group begin
-  | char code 65 (A)
-  `-group end
-  char code 67 (C).
+  | ,-group begin
+  | | char code 66 (B)
+  | `-group end
+  | char code 97 (a)
+  | ,-group begin (no capture)
+  | | char code 65 (A)
+  | `-group end
+  | ,-group begin
+  | | char code 67 (C)
+  | `-group end
+  `-group end.
 <recently read> }
 l. ...  }
-> \l_tmpa_tl=(aAaAaABaAC,A,A,)aABC(BaAC,,A,)BBC.
+> \l_tmpa_tl=(aAaAaABaAC,BaAC,B,C)aABC(BaAC,BaAC,B,C)BBC.
 <recently read> }
 l. ...  }
 ============================================================
@@ -390,22 +396,28 @@ l. ...  }
   ,-group begin (no capture)
   | char code 97 (a)
   +-branch
-  | ,-group begin
+  | ,-group begin (no capture)
   | | char code 65 (A)
   | `-group end
   `-group end, repeated between 0 and 2 times, greedy
-  char code 66 (B)
-  ,-group begin (no capture)
-  | char code 97 (a)
-  +-branch
+  ,-group begin
   | ,-group begin
-  | | char code 65 (A)
+  | | char code 66 (B)
+  | `-group end
+  | ,-group begin (no capture)
+  | | char code 97 (a)
+  | +-branch
+  | | ,-group begin (no capture)
+  | | | char code 65 (A)
+  | | `-group end
+  | `-group end
+  | ,-group begin
+  | | char code 67 (C)
   | `-group end
-  `-group end
-  char code 67 (C).
+  `-group end.
 <recently read> }
 l. ...  }
-> \l_tmpa_tl=aAaA(aABAC,A,A,)aABC(BaC,,,)BBaAC.
+> \l_tmpa_tl=aAaA(aABAC,BAC,B,C)aABC(BaC,BaC,B,C)BBaAC.
 <recently read> }
 l. ...  }
 ============================================================
diff --git a/l3kernel/testfiles/m3regex005.xetex.tlg b/l3kernel/testfiles/m3regex005.xetex.tlg
index 5a43b91b5..71842e129 100644
--- a/l3kernel/testfiles/m3regex005.xetex.tlg
+++ b/l3kernel/testfiles/m3regex005.xetex.tlg
@@ -369,19 +369,25 @@ l. ...  }
 +-branch
   ,-group begin (no capture)
   | char code 97 (a)
-  | ,-group begin
+  | ,-group begin (no capture)
   | | char code 65 (A)
   | `-group end
   `-group end, repeated 0 or more times, greedy
-  char code 66 (B)
-  char code 97 (a)
   ,-group begin
-  | char code 65 (A)
-  `-group end
-  char code 67 (C).
+  | ,-group begin
+  | | char code 66 (B)
+  | `-group end
+  | char code 97 (a)
+  | ,-group begin (no capture)
+  | | char code 65 (A)
+  | `-group end
+  | ,-group begin
+  | | char code 67 (C)
+  | `-group end
+  `-group end.
 <recently read> }
 l. ...  }
-> \l_tmpa_tl=(aAaAaABaAC,A,A,)aABC(BaAC,,A,)BBC.
+> \l_tmpa_tl=(aAaAaABaAC,BaAC,B,C)aABC(BaAC,BaAC,B,C)BBC.
 <recently read> }
 l. ...  }
 ============================================================
@@ -390,22 +396,28 @@ l. ...  }
   ,-group begin (no capture)
   | char code 97 (a)
   +-branch
-  | ,-group begin
+  | ,-group begin (no capture)
   | | char code 65 (A)
   | `-group end
   `-group end, repeated between 0 and 2 times, greedy
-  char code 66 (B)
-  ,-group begin (no capture)
-  | char code 97 (a)
-  +-branch
+  ,-group begin
   | ,-group begin
-  | | char code 65 (A)
+  | | char code 66 (B)
+  | `-group end
+  | ,-group begin (no capture)
+  | | char code 97 (a)
+  | +-branch
+  | | ,-group begin (no capture)
+  | | | char code 65 (A)
+  | | `-group end
+  | `-group end
+  | ,-group begin
+  | | char code 67 (C)
   | `-group end
-  `-group end
-  char code 67 (C).
+  `-group end.
 <recently read> }
 l. ...  }
-> \l_tmpa_tl=aAaA(aABAC,A,A,)aABC(BaC,,,)BBaAC.
+> \l_tmpa_tl=aAaA(aABAC,BAC,B,C)aABC(BaC,BaC,B,C)BBaAC.
 <recently read> }
 l. ...  }
 ============================================================





More information about the latex3-commits mailing list.