[latex3-commits] [git/LaTeX3-latex3-latex3] gh590: Suppress capturing groups in composed regex (see #590) (a7568456a)
Bruno Le Floch
blflatex at gmail.com
Tue Apr 27 10:43:06 CEST 2021
Repository : https://github.com/latex3/latex3
On branch : gh590
Link : https://github.com/latex3/latex3/commit/a7568456a35819276d5aec207baeefd0024fafc3
>---------------------------------------------------------------
commit a7568456a35819276d5aec207baeefd0024fafc3
Author: Bruno Le Floch <blflatex at gmail.com>
Date: Tue Apr 27 10:39:13 2021 +0200
Suppress capturing groups in composed regex (see #590)
Previously, if the regex inserted with \ur included capturing groups,
then these were showing up as groups in the larger regex, thus shifing
all subsequent groups: for instance in "\ur{l_tmpa_regex}a(b?)" the
group number corresponding to "(b?)" would depend on the regex
\l_tmpa_regex. Now all groups in the inserted regex are treated as
non-capturing groups and do not shift the group number. We could
presumably add an option like \urg or \ur[capture] to keep capturing
the groups in the inserted regex.
>---------------------------------------------------------------
a7568456a35819276d5aec207baeefd0024fafc3
l3kernel/l3regex.dtx | 24 ++++++++++-------
l3kernel/testfiles/m3regex005.luatex.tlg | 44 ++++++++++++++++++++------------
l3kernel/testfiles/m3regex005.lvt | 4 +--
l3kernel/testfiles/m3regex005.tlg | 44 ++++++++++++++++++++------------
l3kernel/testfiles/m3regex005.xetex.tlg | 44 ++++++++++++++++++++------------
5 files changed, 101 insertions(+), 59 deletions(-)
diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 27c0c8642..4696853ab 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -370,8 +370,9 @@
% |A\ur{l_tmpa_regex}D| matches the tokens |A| and |D| separated by
% something that matches the regular expression
% \cs[no-index]{l_tmpa_regex}. This behaves as if a non-capturing group
-% were surrounding \cs[no-index]{l_tmpa_regex} (thus quantifiers are
-% supported).
+% were surrounding \cs[no-index]{l_tmpa_regex}, and any group contained
+% in \cs[no-index]{l_tmpa_regex} is converted to a non-capturing group.
+% Quantifiers are supported.
%
% For instance, if \cs[no-index]{l_tmpa_regex} has value \verb"B|C",
% then |A\ur{l_tmpa_regex}D| is equivalent to \verb"A(?:B|C)D" (matching
@@ -3364,7 +3365,7 @@
% \end{macro}
% \end{macro}
%
-% \subsubsection{Raw token lists with \cs{u}}
+% \subsubsection{Raw token lists with \cs[no-index]{u}}
%
% \begin{macro}{\@@_compile_/u:}
% The |\u| escape is invalid in classes and directly following a
@@ -3441,7 +3442,8 @@
% \begin{macro}{\@@_compile_ur_end:, \@@_compile_ur:n}
% \begin{macro}[EXP]{\@@_compile_ur_aux:w}
% For the |\ur{...}| construction, once we have extracted the
-% variable's name, we determine the compiled regex (passed as the
+% variable's name, we replace all groups by non-capturing groups in
+% the compiled regex (passed as the
% argument of \cs{@@_compile_ur:n}). If that has a single branch
% (namely \cs{tl_if_empty:oTF} is false) and there is no quantifier,
% then simply insert the contents of this branch (obtained by
@@ -3450,7 +3452,14 @@
% the number of repetition etc.
% \begin{macrocode}
\cs_new_protected:Npn \@@_compile_ur_end:
- { \exp_args:Nv \@@_compile_ur:n { \l_@@_internal_a_tl } }
+ {
+ \group_begin:
+ \cs_set:Npn \@@_group:nnnN { \@@_group_no_capture:nnnN }
+ \cs_set:Npn \@@_group_resetting:nnnN { \@@_group_no_capture:nnnN }
+ \exp_args:NNx
+ \group_end:
+ \@@_compile_ur:n { \use:c { \l_@@_internal_a_tl } }
+ }
\cs_new_protected:Npn \@@_compile_ur:n #1
{
\tl_if_empty:oTF { \@@_compile_ur_aux:w #1 {} ? ? \q_@@_nil }
@@ -3458,10 +3467,7 @@
{ \use_i:nn }
{
\tl_build_put_right:Nn \l_@@_build_tl
- {
- \@@_group_no_capture:nnnN { \if_false: } \fi:
- \exp_not:n {#1}
- }
+ { \@@_group_no_capture:nnnN { \if_false: } \fi: #1 }
\@@_compile_quantifier:w
}
{ \tl_build_put_right:Nn \l_@@_build_tl { \use_ii:nn #1 } }
diff --git a/l3kernel/testfiles/m3regex005.luatex.tlg b/l3kernel/testfiles/m3regex005.luatex.tlg
index d29fd1503..68fc170f0 100644
--- a/l3kernel/testfiles/m3regex005.luatex.tlg
+++ b/l3kernel/testfiles/m3regex005.luatex.tlg
@@ -364,19 +364,25 @@ l. ... }
+-branch
,-group begin (no capture)
| char code 97 (a)
- | ,-group begin
+ | ,-group begin (no capture)
| | char code 65 (A)
| `-group end
`-group end, repeated 0 or more times, greedy
- char code 66 (B)
- char code 97 (a)
,-group begin
- | char code 65 (A)
- `-group end
- char code 67 (C).
+ | ,-group begin
+ | | char code 66 (B)
+ | `-group end
+ | char code 97 (a)
+ | ,-group begin (no capture)
+ | | char code 65 (A)
+ | `-group end
+ | ,-group begin
+ | | char code 67 (C)
+ | `-group end
+ `-group end.
<recently read> }
l. ... }
-> \l_tmpa_tl=(aAaAaABaAC,A,A,)aABC(BaAC,,A,)BBC.
+> \l_tmpa_tl=(aAaAaABaAC,BaAC,B,C)aABC(BaAC,BaAC,B,C)BBC.
<recently read> }
l. ... }
============================================================
@@ -385,22 +391,28 @@ l. ... }
,-group begin (no capture)
| char code 97 (a)
+-branch
- | ,-group begin
+ | ,-group begin (no capture)
| | char code 65 (A)
| `-group end
`-group end, repeated between 0 and 2 times, greedy
- char code 66 (B)
- ,-group begin (no capture)
- | char code 97 (a)
- +-branch
+ ,-group begin
| ,-group begin
- | | char code 65 (A)
+ | | char code 66 (B)
+ | `-group end
+ | ,-group begin (no capture)
+ | | char code 97 (a)
+ | +-branch
+ | | ,-group begin (no capture)
+ | | | char code 65 (A)
+ | | `-group end
+ | `-group end
+ | ,-group begin
+ | | char code 67 (C)
| `-group end
- `-group end
- char code 67 (C).
+ `-group end.
<recently read> }
l. ... }
-> \l_tmpa_tl=aAaA(aABAC,A,A,)aABC(BaC,,,)BBaAC.
+> \l_tmpa_tl=aAaA(aABAC,BAC,B,C)aABC(BaC,BaC,B,C)BBaAC.
<recently read> }
l. ... }
============================================================
diff --git a/l3kernel/testfiles/m3regex005.lvt b/l3kernel/testfiles/m3regex005.lvt
index 1458e8200..652763402 100644
--- a/l3kernel/testfiles/m3regex005.lvt
+++ b/l3kernel/testfiles/m3regex005.lvt
@@ -199,7 +199,7 @@
%
\SEPARATOR
\regex_set:Nn \l_tmpa_regex { a(A) }
- \regex_set:Nn \l_tmpb_regex { \ur{l_tmpa_regex}* B \ur{l_tmpa_regex} C }
+ \regex_set:Nn \l_tmpb_regex { \ur{l_tmpa_regex}* ((B) \ur{l_tmpa_regex} (C)) }
\regex_show:N \l_tmpb_regex
\tl_set:Nn \l_tmpa_tl { aAaAaABaAC aABC BaAC BBC }
\regex_replace_all:NnN \l_tmpb_regex { (\0,\1,\2,\3) } \l_tmpa_tl
@@ -207,7 +207,7 @@
%
\SEPARATOR
\regex_set:Nn \l_tmpa_regex { a|(A) }
- \regex_set:Nn \l_tmpb_regex { \ur{l_tmpa_regex}{0,2} B \ur{l_tmpa_regex} C }
+ \regex_set:Nn \l_tmpb_regex { \ur{l_tmpa_regex}{0,2} ((B) \ur{l_tmpa_regex} (C)) }
\regex_show:N \l_tmpb_regex
\tl_set:Nn \l_tmpa_tl { aAaAaABAC aABC BaC BBaAC }
\regex_replace_all:NnN \l_tmpb_regex { (\0,\1,\2,\3) } \l_tmpa_tl
diff --git a/l3kernel/testfiles/m3regex005.tlg b/l3kernel/testfiles/m3regex005.tlg
index 783bba26b..a5d7a0fc2 100644
--- a/l3kernel/testfiles/m3regex005.tlg
+++ b/l3kernel/testfiles/m3regex005.tlg
@@ -364,19 +364,25 @@ l. ... }
+-branch
,-group begin (no capture)
| char code 97 (a)
- | ,-group begin
+ | ,-group begin (no capture)
| | char code 65 (A)
| `-group end
`-group end, repeated 0 or more times, greedy
- char code 66 (B)
- char code 97 (a)
,-group begin
- | char code 65 (A)
- `-group end
- char code 67 (C).
+ | ,-group begin
+ | | char code 66 (B)
+ | `-group end
+ | char code 97 (a)
+ | ,-group begin (no capture)
+ | | char code 65 (A)
+ | `-group end
+ | ,-group begin
+ | | char code 67 (C)
+ | `-group end
+ `-group end.
<recently read> }
l. ... }
-> \l_tmpa_tl=(aAaAaABaAC,A,A,)aABC(BaAC,,A,)BBC.
+> \l_tmpa_tl=(aAaAaABaAC,BaAC,B,C)aABC(BaAC,BaAC,B,C)BBC.
<recently read> }
l. ... }
============================================================
@@ -385,22 +391,28 @@ l. ... }
,-group begin (no capture)
| char code 97 (a)
+-branch
- | ,-group begin
+ | ,-group begin (no capture)
| | char code 65 (A)
| `-group end
`-group end, repeated between 0 and 2 times, greedy
- char code 66 (B)
- ,-group begin (no capture)
- | char code 97 (a)
- +-branch
+ ,-group begin
| ,-group begin
- | | char code 65 (A)
+ | | char code 66 (B)
+ | `-group end
+ | ,-group begin (no capture)
+ | | char code 97 (a)
+ | +-branch
+ | | ,-group begin (no capture)
+ | | | char code 65 (A)
+ | | `-group end
+ | `-group end
+ | ,-group begin
+ | | char code 67 (C)
| `-group end
- `-group end
- char code 67 (C).
+ `-group end.
<recently read> }
l. ... }
-> \l_tmpa_tl=aAaA(aABAC,A,A,)aABC(BaC,,,)BBaAC.
+> \l_tmpa_tl=aAaA(aABAC,BAC,B,C)aABC(BaC,BaC,B,C)BBaAC.
<recently read> }
l. ... }
============================================================
diff --git a/l3kernel/testfiles/m3regex005.xetex.tlg b/l3kernel/testfiles/m3regex005.xetex.tlg
index d29fd1503..68fc170f0 100644
--- a/l3kernel/testfiles/m3regex005.xetex.tlg
+++ b/l3kernel/testfiles/m3regex005.xetex.tlg
@@ -364,19 +364,25 @@ l. ... }
+-branch
,-group begin (no capture)
| char code 97 (a)
- | ,-group begin
+ | ,-group begin (no capture)
| | char code 65 (A)
| `-group end
`-group end, repeated 0 or more times, greedy
- char code 66 (B)
- char code 97 (a)
,-group begin
- | char code 65 (A)
- `-group end
- char code 67 (C).
+ | ,-group begin
+ | | char code 66 (B)
+ | `-group end
+ | char code 97 (a)
+ | ,-group begin (no capture)
+ | | char code 65 (A)
+ | `-group end
+ | ,-group begin
+ | | char code 67 (C)
+ | `-group end
+ `-group end.
<recently read> }
l. ... }
-> \l_tmpa_tl=(aAaAaABaAC,A,A,)aABC(BaAC,,A,)BBC.
+> \l_tmpa_tl=(aAaAaABaAC,BaAC,B,C)aABC(BaAC,BaAC,B,C)BBC.
<recently read> }
l. ... }
============================================================
@@ -385,22 +391,28 @@ l. ... }
,-group begin (no capture)
| char code 97 (a)
+-branch
- | ,-group begin
+ | ,-group begin (no capture)
| | char code 65 (A)
| `-group end
`-group end, repeated between 0 and 2 times, greedy
- char code 66 (B)
- ,-group begin (no capture)
- | char code 97 (a)
- +-branch
+ ,-group begin
| ,-group begin
- | | char code 65 (A)
+ | | char code 66 (B)
+ | `-group end
+ | ,-group begin (no capture)
+ | | char code 97 (a)
+ | +-branch
+ | | ,-group begin (no capture)
+ | | | char code 65 (A)
+ | | `-group end
+ | `-group end
+ | ,-group begin
+ | | char code 67 (C)
| `-group end
- `-group end
- char code 67 (C).
+ `-group end.
<recently read> }
l. ... }
-> \l_tmpa_tl=aAaA(aABAC,A,A,)aABC(BaC,,,)BBaAC.
+> \l_tmpa_tl=aAaA(aABAC,BAC,B,C)aABC(BaC,BaC,B,C)BBaAC.
<recently read> }
l. ... }
============================================================
More information about the latex3-commits
mailing list.