[latex3-commits] [git/LaTeX3-latex3-latex3] gh590: Suppress capturing groups in composed regex (see #590) (a7568456a)

Bruno Le Floch blflatex at gmail.com
Tue Apr 27 10:43:06 CEST 2021


Repository : https://github.com/latex3/latex3
On branch  : gh590
Link       : https://github.com/latex3/latex3/commit/a7568456a35819276d5aec207baeefd0024fafc3

>---------------------------------------------------------------

commit a7568456a35819276d5aec207baeefd0024fafc3
Author: Bruno Le Floch <blflatex at gmail.com>
Date:   Tue Apr 27 10:39:13 2021 +0200

    Suppress capturing groups in composed regex (see #590)
    
    Previously, if the regex inserted with \ur included capturing groups,
    then these were showing up as groups in the larger regex, thus shifing
    all subsequent groups: for instance in "\ur{l_tmpa_regex}a(b?)" the
    group number corresponding to "(b?)" would depend on the regex
    \l_tmpa_regex.  Now all groups in the inserted regex are treated as
    non-capturing groups and do not shift the group number.  We could
    presumably add an option like \urg or \ur[capture] to keep capturing
    the groups in the inserted regex.


>---------------------------------------------------------------

a7568456a35819276d5aec207baeefd0024fafc3
 l3kernel/l3regex.dtx                     | 24 ++++++++++-------
 l3kernel/testfiles/m3regex005.luatex.tlg | 44 ++++++++++++++++++++------------
 l3kernel/testfiles/m3regex005.lvt        |  4 +--
 l3kernel/testfiles/m3regex005.tlg        | 44 ++++++++++++++++++++------------
 l3kernel/testfiles/m3regex005.xetex.tlg  | 44 ++++++++++++++++++++------------
 5 files changed, 101 insertions(+), 59 deletions(-)

diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 27c0c8642..4696853ab 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -370,8 +370,9 @@
 % |A\ur{l_tmpa_regex}D| matches the tokens |A| and |D| separated by
 % something that matches the regular expression
 % \cs[no-index]{l_tmpa_regex}.  This behaves as if a non-capturing group
-% were surrounding \cs[no-index]{l_tmpa_regex} (thus quantifiers are
-% supported).
+% were surrounding \cs[no-index]{l_tmpa_regex}, and any group contained
+% in \cs[no-index]{l_tmpa_regex} is converted to a non-capturing group.
+% Quantifiers are supported.
 %
 % For instance, if \cs[no-index]{l_tmpa_regex} has value \verb"B|C",
 % then |A\ur{l_tmpa_regex}D| is equivalent to \verb"A(?:B|C)D" (matching
@@ -3364,7 +3365,7 @@
 % \end{macro}
 % \end{macro}
 %
-% \subsubsection{Raw token lists with \cs{u}}
+% \subsubsection{Raw token lists with \cs[no-index]{u}}
 %
 % \begin{macro}{\@@_compile_/u:}
 %   The |\u| escape is invalid in classes and directly following a
@@ -3441,7 +3442,8 @@
 % \begin{macro}{\@@_compile_ur_end:, \@@_compile_ur:n}
 % \begin{macro}[EXP]{\@@_compile_ur_aux:w}
 %   For the |\ur{...}| construction, once we have extracted the
-%   variable's name, we determine the compiled regex (passed as the
+%   variable's name, we replace all groups by non-capturing groups in
+%   the compiled regex (passed as the
 %   argument of \cs{@@_compile_ur:n}).  If that has a single branch
 %   (namely \cs{tl_if_empty:oTF} is false) and there is no quantifier,
 %   then simply insert the contents of this branch (obtained by
@@ -3450,7 +3452,14 @@
 %   the number of repetition etc.
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_compile_ur_end:
-  { \exp_args:Nv \@@_compile_ur:n { \l_@@_internal_a_tl } }
+  {
+    \group_begin:
+      \cs_set:Npn \@@_group:nnnN { \@@_group_no_capture:nnnN }
+      \cs_set:Npn \@@_group_resetting:nnnN { \@@_group_no_capture:nnnN }
+      \exp_args:NNx
+    \group_end:
+    \@@_compile_ur:n { \use:c { \l_@@_internal_a_tl } }
+  }
 \cs_new_protected:Npn \@@_compile_ur:n #1
   {
     \tl_if_empty:oTF { \@@_compile_ur_aux:w #1 {} ? ? \q_@@_nil }
@@ -3458,10 +3467,7 @@
       { \use_i:nn }
           {
             \tl_build_put_right:Nn \l_@@_build_tl
-              {
-                \@@_group_no_capture:nnnN { \if_false: } \fi:
-                \exp_not:n {#1}
-              }
+              { \@@_group_no_capture:nnnN { \if_false: } \fi: #1 }
             \@@_compile_quantifier:w
           }
           { \tl_build_put_right:Nn \l_@@_build_tl { \use_ii:nn #1 } }
diff --git a/l3kernel/testfiles/m3regex005.luatex.tlg b/l3kernel/testfiles/m3regex005.luatex.tlg
index d29fd1503..68fc170f0 100644
--- a/l3kernel/testfiles/m3regex005.luatex.tlg
+++ b/l3kernel/testfiles/m3regex005.luatex.tlg
@@ -364,19 +364,25 @@ l. ...  }
 +-branch
   ,-group begin (no capture)
   | char code 97 (a)
-  | ,-group begin
+  | ,-group begin (no capture)
   | | char code 65 (A)
   | `-group end
   `-group end, repeated 0 or more times, greedy
-  char code 66 (B)
-  char code 97 (a)
   ,-group begin
-  | char code 65 (A)
-  `-group end
-  char code 67 (C).
+  | ,-group begin
+  | | char code 66 (B)
+  | `-group end
+  | char code 97 (a)
+  | ,-group begin (no capture)
+  | | char code 65 (A)
+  | `-group end
+  | ,-group begin
+  | | char code 67 (C)
+  | `-group end
+  `-group end.
 <recently read> }
 l. ...  }
-> \l_tmpa_tl=(aAaAaABaAC,A,A,)aABC(BaAC,,A,)BBC.
+> \l_tmpa_tl=(aAaAaABaAC,BaAC,B,C)aABC(BaAC,BaAC,B,C)BBC.
 <recently read> }
 l. ...  }
 ============================================================
@@ -385,22 +391,28 @@ l. ...  }
   ,-group begin (no capture)
   | char code 97 (a)
   +-branch
-  | ,-group begin
+  | ,-group begin (no capture)
   | | char code 65 (A)
   | `-group end
   `-group end, repeated between 0 and 2 times, greedy
-  char code 66 (B)
-  ,-group begin (no capture)
-  | char code 97 (a)
-  +-branch
+  ,-group begin
   | ,-group begin
-  | | char code 65 (A)
+  | | char code 66 (B)
+  | `-group end
+  | ,-group begin (no capture)
+  | | char code 97 (a)
+  | +-branch
+  | | ,-group begin (no capture)
+  | | | char code 65 (A)
+  | | `-group end
+  | `-group end
+  | ,-group begin
+  | | char code 67 (C)
   | `-group end
-  `-group end
-  char code 67 (C).
+  `-group end.
 <recently read> }
 l. ...  }
-> \l_tmpa_tl=aAaA(aABAC,A,A,)aABC(BaC,,,)BBaAC.
+> \l_tmpa_tl=aAaA(aABAC,BAC,B,C)aABC(BaC,BaC,B,C)BBaAC.
 <recently read> }
 l. ...  }
 ============================================================
diff --git a/l3kernel/testfiles/m3regex005.lvt b/l3kernel/testfiles/m3regex005.lvt
index 1458e8200..652763402 100644
--- a/l3kernel/testfiles/m3regex005.lvt
+++ b/l3kernel/testfiles/m3regex005.lvt
@@ -199,7 +199,7 @@
     %
     \SEPARATOR
     \regex_set:Nn \l_tmpa_regex { a(A) }
-    \regex_set:Nn \l_tmpb_regex { \ur{l_tmpa_regex}* B \ur{l_tmpa_regex} C }
+    \regex_set:Nn \l_tmpb_regex { \ur{l_tmpa_regex}* ((B) \ur{l_tmpa_regex} (C)) }
     \regex_show:N \l_tmpb_regex
     \tl_set:Nn \l_tmpa_tl { aAaAaABaAC  aABC  BaAC BBC }
     \regex_replace_all:NnN \l_tmpb_regex { (\0,\1,\2,\3) } \l_tmpa_tl
@@ -207,7 +207,7 @@
     %
     \SEPARATOR
     \regex_set:Nn \l_tmpa_regex { a|(A) }
-    \regex_set:Nn \l_tmpb_regex { \ur{l_tmpa_regex}{0,2} B \ur{l_tmpa_regex} C }
+    \regex_set:Nn \l_tmpb_regex { \ur{l_tmpa_regex}{0,2} ((B) \ur{l_tmpa_regex} (C)) }
     \regex_show:N \l_tmpb_regex
     \tl_set:Nn \l_tmpa_tl { aAaAaABAC  aABC  BaC BBaAC }
     \regex_replace_all:NnN \l_tmpb_regex { (\0,\1,\2,\3) } \l_tmpa_tl
diff --git a/l3kernel/testfiles/m3regex005.tlg b/l3kernel/testfiles/m3regex005.tlg
index 783bba26b..a5d7a0fc2 100644
--- a/l3kernel/testfiles/m3regex005.tlg
+++ b/l3kernel/testfiles/m3regex005.tlg
@@ -364,19 +364,25 @@ l. ...  }
 +-branch
   ,-group begin (no capture)
   | char code 97 (a)
-  | ,-group begin
+  | ,-group begin (no capture)
   | | char code 65 (A)
   | `-group end
   `-group end, repeated 0 or more times, greedy
-  char code 66 (B)
-  char code 97 (a)
   ,-group begin
-  | char code 65 (A)
-  `-group end
-  char code 67 (C).
+  | ,-group begin
+  | | char code 66 (B)
+  | `-group end
+  | char code 97 (a)
+  | ,-group begin (no capture)
+  | | char code 65 (A)
+  | `-group end
+  | ,-group begin
+  | | char code 67 (C)
+  | `-group end
+  `-group end.
 <recently read> }
 l. ...  }
-> \l_tmpa_tl=(aAaAaABaAC,A,A,)aABC(BaAC,,A,)BBC.
+> \l_tmpa_tl=(aAaAaABaAC,BaAC,B,C)aABC(BaAC,BaAC,B,C)BBC.
 <recently read> }
 l. ...  }
 ============================================================
@@ -385,22 +391,28 @@ l. ...  }
   ,-group begin (no capture)
   | char code 97 (a)
   +-branch
-  | ,-group begin
+  | ,-group begin (no capture)
   | | char code 65 (A)
   | `-group end
   `-group end, repeated between 0 and 2 times, greedy
-  char code 66 (B)
-  ,-group begin (no capture)
-  | char code 97 (a)
-  +-branch
+  ,-group begin
   | ,-group begin
-  | | char code 65 (A)
+  | | char code 66 (B)
+  | `-group end
+  | ,-group begin (no capture)
+  | | char code 97 (a)
+  | +-branch
+  | | ,-group begin (no capture)
+  | | | char code 65 (A)
+  | | `-group end
+  | `-group end
+  | ,-group begin
+  | | char code 67 (C)
   | `-group end
-  `-group end
-  char code 67 (C).
+  `-group end.
 <recently read> }
 l. ...  }
-> \l_tmpa_tl=aAaA(aABAC,A,A,)aABC(BaC,,,)BBaAC.
+> \l_tmpa_tl=aAaA(aABAC,BAC,B,C)aABC(BaC,BaC,B,C)BBaAC.
 <recently read> }
 l. ...  }
 ============================================================
diff --git a/l3kernel/testfiles/m3regex005.xetex.tlg b/l3kernel/testfiles/m3regex005.xetex.tlg
index d29fd1503..68fc170f0 100644
--- a/l3kernel/testfiles/m3regex005.xetex.tlg
+++ b/l3kernel/testfiles/m3regex005.xetex.tlg
@@ -364,19 +364,25 @@ l. ...  }
 +-branch
   ,-group begin (no capture)
   | char code 97 (a)
-  | ,-group begin
+  | ,-group begin (no capture)
   | | char code 65 (A)
   | `-group end
   `-group end, repeated 0 or more times, greedy
-  char code 66 (B)
-  char code 97 (a)
   ,-group begin
-  | char code 65 (A)
-  `-group end
-  char code 67 (C).
+  | ,-group begin
+  | | char code 66 (B)
+  | `-group end
+  | char code 97 (a)
+  | ,-group begin (no capture)
+  | | char code 65 (A)
+  | `-group end
+  | ,-group begin
+  | | char code 67 (C)
+  | `-group end
+  `-group end.
 <recently read> }
 l. ...  }
-> \l_tmpa_tl=(aAaAaABaAC,A,A,)aABC(BaAC,,A,)BBC.
+> \l_tmpa_tl=(aAaAaABaAC,BaAC,B,C)aABC(BaAC,BaAC,B,C)BBC.
 <recently read> }
 l. ...  }
 ============================================================
@@ -385,22 +391,28 @@ l. ...  }
   ,-group begin (no capture)
   | char code 97 (a)
   +-branch
-  | ,-group begin
+  | ,-group begin (no capture)
   | | char code 65 (A)
   | `-group end
   `-group end, repeated between 0 and 2 times, greedy
-  char code 66 (B)
-  ,-group begin (no capture)
-  | char code 97 (a)
-  +-branch
+  ,-group begin
   | ,-group begin
-  | | char code 65 (A)
+  | | char code 66 (B)
+  | `-group end
+  | ,-group begin (no capture)
+  | | char code 97 (a)
+  | +-branch
+  | | ,-group begin (no capture)
+  | | | char code 65 (A)
+  | | `-group end
+  | `-group end
+  | ,-group begin
+  | | char code 67 (C)
   | `-group end
-  `-group end
-  char code 67 (C).
+  `-group end.
 <recently read> }
 l. ...  }
-> \l_tmpa_tl=aAaA(aABAC,A,A,)aABC(BaC,,,)BBaAC.
+> \l_tmpa_tl=aAaA(aABAC,BAC,B,C)aABC(BaC,BaC,B,C)BBaAC.
 <recently read> }
 l. ...  }
 ============================================================





More information about the latex3-commits mailing list.