[latex3-commits] [git/LaTeX3-latex3-latex3] main: l3regex: tests for \u and \ur escapes and quantifiers (afee1b026)

Bruno Le Floch blflatex at gmail.com
Tue Apr 27 15:56:18 CEST 2021


Repository : https://github.com/latex3/latex3
On branch  : main
Link       : https://github.com/latex3/latex3/commit/afee1b0268eb6718cec886ff87d38511c5866aac

>---------------------------------------------------------------

commit afee1b0268eb6718cec886ff87d38511c5866aac
Author: Bruno Le Floch <blflatex at gmail.com>
Date:   Mon Apr 26 22:41:40 2021 +0200

    l3regex: tests for \u and \ur escapes and quantifiers


>---------------------------------------------------------------

afee1b0268eb6718cec886ff87d38511c5866aac
 l3kernel/testfiles/m3regex005.luatex.tlg | 59 +++++++++++++++++++++++++++++++-
 l3kernel/testfiles/m3regex005.lvt        | 26 ++++++++++++++
 l3kernel/testfiles/m3regex005.tlg        | 59 +++++++++++++++++++++++++++++++-
 l3kernel/testfiles/m3regex005.xetex.tlg  | 59 +++++++++++++++++++++++++++++++-
 l3kernel/testfiles/m3regex007.lvt        |  2 +-
 l3kernel/testfiles/m3regex007.tlg        |  6 ++--
 6 files changed, 205 insertions(+), 6 deletions(-)

diff --git a/l3kernel/testfiles/m3regex005.luatex.tlg b/l3kernel/testfiles/m3regex005.luatex.tlg
index 32057e3e5..ca4479677 100644
--- a/l3kernel/testfiles/m3regex005.luatex.tlg
+++ b/l3kernel/testfiles/m3regex005.luatex.tlg
@@ -353,7 +353,64 @@ There were 2 missing right parentheses.
 > \l_tmpb_tl=.
 ============================================================
 ============================================================
-TEST 15: Catcode used by default
+TEST 15: Repetitions on \u escape
+============================================================
+The token list \l_tmpa_tl contains the tokens:
+>  / (the character /)
+>  \foobaz (control sequence=undefined)
+>  \foobaz (control sequence=undefined)
+>  y (the letter y)
+>  / (the character /)
+>  y (the letter y).
+<recently read> }
+l. ...  }
+============================================================
+> Compiled regex variable \l_tmpb_regex:
++-branch
+  ,-group begin (no capture)
+  | char code 97 (a)
+  | ,-group begin
+  | | char code 65 (A)
+  | `-group end
+  `-group end, repeated 0 or more times, greedy
+  char code 66 (B)
+  char code 97 (a)
+  ,-group begin
+  | char code 65 (A)
+  `-group end
+  char code 67 (C).
+<recently read> }
+l. ...  }
+> \l_tmpa_tl=(aAaAaABaAC,A,A,)aABC(BaAC,,A,)BBC.
+<recently read> }
+l. ...  }
+============================================================
+> Compiled regex variable \l_tmpb_regex:
++-branch
+  ,-group begin (no capture)
+  | char code 97 (a)
+  +-branch
+  | ,-group begin
+  | | char code 65 (A)
+  | `-group end
+  `-group end, repeated between 0 and 2 times, greedy
+  char code 66 (B)
+  ,-group begin (no capture)
+  | char code 97 (a)
+  +-branch
+  | ,-group begin
+  | | char code 65 (A)
+  | `-group end
+  `-group end
+  char code 67 (C).
+<recently read> }
+l. ...  }
+> \l_tmpa_tl=aAaA(aABAC,A,A,)aABC(BaC,,,)BBaAC.
+<recently read> }
+l. ...  }
+============================================================
+============================================================
+TEST 16: Catcode used by default
 ============================================================
 \g__cctab_next_cctab=\catcodetable...
 The token list \l_tmpa_tl contains the tokens:
diff --git a/l3kernel/testfiles/m3regex005.lvt b/l3kernel/testfiles/m3regex005.lvt
index 602a50efa..f1ea56fb4 100644
--- a/l3kernel/testfiles/m3regex005.lvt
+++ b/l3kernel/testfiles/m3regex005.lvt
@@ -189,6 +189,32 @@
     \tl_log:N \l_tmpb_tl
   }
 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\TEST { Repetitions~on~\u~escape }
+  {
+    \tl_set:Nn \l_tmpa_tl { \foobaz \foobaz y y }
+    \tl_set:Nn \l_tmpb_tl { \foobaz }
+    \regex_replace_once:nnN { \u{l_tmpb_tl}*? . y } { / \0 / } \l_tmpa_tl
+    \tl_analysis_show:N \l_tmpa_tl
+    %
+    \SEPARATOR
+    \regex_set:Nn \l_tmpa_regex { a(A) }
+    \regex_set:Nn \l_tmpb_regex { \ur{l_tmpa_regex}* B \ur{l_tmpa_regex} C }
+    \regex_show:N \l_tmpb_regex
+    \tl_set:Nn \l_tmpa_tl { aAaAaABaAC  aABC  BaAC BBC }
+    \regex_replace_all:NnN \l_tmpb_regex { (\0,\1,\2,\3) } \l_tmpa_tl
+    \tl_show:N \l_tmpa_tl
+    %
+    \SEPARATOR
+    \regex_set:Nn \l_tmpa_regex { a|(A) }
+    \regex_set:Nn \l_tmpb_regex { \ur{l_tmpa_regex}{0,2} B \ur{l_tmpa_regex} C }
+    \regex_show:N \l_tmpb_regex
+    \tl_set:Nn \l_tmpa_tl { aAaAaABAC  aABC  BaC BBaAC }
+    \regex_replace_all:NnN \l_tmpb_regex { (\0,\1,\2,\3) } \l_tmpa_tl
+    \tl_show:N \l_tmpa_tl
+  }
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \TEST { Catcode~used~by~default }
   {
diff --git a/l3kernel/testfiles/m3regex005.tlg b/l3kernel/testfiles/m3regex005.tlg
index cc6c44cba..7d36a252b 100644
--- a/l3kernel/testfiles/m3regex005.tlg
+++ b/l3kernel/testfiles/m3regex005.tlg
@@ -353,7 +353,64 @@ There were 2 missing right parentheses.
 > \l_tmpb_tl=.
 ============================================================
 ============================================================
-TEST 15: Catcode used by default
+TEST 15: Repetitions on \u escape
+============================================================
+The token list \l_tmpa_tl contains the tokens:
+>  / (the character /)
+>  \foobaz (control sequence=undefined)
+>  \foobaz (control sequence=undefined)
+>  y (the letter y)
+>  / (the character /)
+>  y (the letter y).
+<recently read> }
+l. ...  }
+============================================================
+> Compiled regex variable \l_tmpb_regex:
++-branch
+  ,-group begin (no capture)
+  | char code 97 (a)
+  | ,-group begin
+  | | char code 65 (A)
+  | `-group end
+  `-group end, repeated 0 or more times, greedy
+  char code 66 (B)
+  char code 97 (a)
+  ,-group begin
+  | char code 65 (A)
+  `-group end
+  char code 67 (C).
+<recently read> }
+l. ...  }
+> \l_tmpa_tl=(aAaAaABaAC,A,A,)aABC(BaAC,,A,)BBC.
+<recently read> }
+l. ...  }
+============================================================
+> Compiled regex variable \l_tmpb_regex:
++-branch
+  ,-group begin (no capture)
+  | char code 97 (a)
+  +-branch
+  | ,-group begin
+  | | char code 65 (A)
+  | `-group end
+  `-group end, repeated between 0 and 2 times, greedy
+  char code 66 (B)
+  ,-group begin (no capture)
+  | char code 97 (a)
+  +-branch
+  | ,-group begin
+  | | char code 65 (A)
+  | `-group end
+  `-group end
+  char code 67 (C).
+<recently read> }
+l. ...  }
+> \l_tmpa_tl=aAaA(aABAC,A,A,)aABC(BaC,,,)BBaAC.
+<recently read> }
+l. ...  }
+============================================================
+============================================================
+TEST 16: Catcode used by default
 ============================================================
 Defining \g__cctab_1_cctab on line ...
 The token list \l_tmpa_tl contains the tokens:
diff --git a/l3kernel/testfiles/m3regex005.xetex.tlg b/l3kernel/testfiles/m3regex005.xetex.tlg
index b74de5b57..5a43b91b5 100644
--- a/l3kernel/testfiles/m3regex005.xetex.tlg
+++ b/l3kernel/testfiles/m3regex005.xetex.tlg
@@ -353,7 +353,64 @@ There were 2 missing right parentheses.
 > \l_tmpb_tl=.
 ============================================================
 ============================================================
-TEST 15: Catcode used by default
+TEST 15: Repetitions on \u escape
+============================================================
+The token list \l_tmpa_tl contains the tokens:
+>  / (the character /)
+>  \foobaz (control sequence=undefined)
+>  \foobaz (control sequence=undefined)
+>  y (the letter y)
+>  / (the character /)
+>  y (the letter y).
+<recently read> }
+l. ...  }
+============================================================
+> Compiled regex variable \l_tmpb_regex:
++-branch
+  ,-group begin (no capture)
+  | char code 97 (a)
+  | ,-group begin
+  | | char code 65 (A)
+  | `-group end
+  `-group end, repeated 0 or more times, greedy
+  char code 66 (B)
+  char code 97 (a)
+  ,-group begin
+  | char code 65 (A)
+  `-group end
+  char code 67 (C).
+<recently read> }
+l. ...  }
+> \l_tmpa_tl=(aAaAaABaAC,A,A,)aABC(BaAC,,A,)BBC.
+<recently read> }
+l. ...  }
+============================================================
+> Compiled regex variable \l_tmpb_regex:
++-branch
+  ,-group begin (no capture)
+  | char code 97 (a)
+  +-branch
+  | ,-group begin
+  | | char code 65 (A)
+  | `-group end
+  `-group end, repeated between 0 and 2 times, greedy
+  char code 66 (B)
+  ,-group begin (no capture)
+  | char code 97 (a)
+  +-branch
+  | ,-group begin
+  | | char code 65 (A)
+  | `-group end
+  `-group end
+  char code 67 (C).
+<recently read> }
+l. ...  }
+> \l_tmpa_tl=aAaA(aABAC,A,A,)aABC(BaC,,,)BBaAC.
+<recently read> }
+l. ...  }
+============================================================
+============================================================
+TEST 16: Catcode used by default
 ============================================================
 Defining \g__cctab_1_cctab on line ...
 The token list \l_tmpa_tl contains the tokens:
diff --git a/l3kernel/testfiles/m3regex007.lvt b/l3kernel/testfiles/m3regex007.lvt
index 627ac02cb..b3e40a615 100644
--- a/l3kernel/testfiles/m3regex007.lvt
+++ b/l3kernel/testfiles/m3regex007.lvt
@@ -24,7 +24,7 @@
     \regex_set:Nn \l_foo_regex { \A a|b| }
     \regex_show:N \l_foo_regex
     \regex_show:n { a \ur{l_foo_regex} b \c{\ur{l_foo_regex}{2,7}?|D} }
-    \regex_show:n { a\c{bc}\u{c_space_tl}\c{\u{c_space_tl}|} }
+    \regex_show:n { a\c{bc}\u{c_space_tl}+\c{\u{c_space_tl}|} }
     \tl_set:Nn \l_tmpa_tl { \abc }
     \int_set:Nn \l_tmpa_int { 7 }
     \regex_show:n { \u{l_tmpa_tl}* \c{\u{l_tmpa_tl}|(?:\u{l_tmpa_int})?}{3}|y* }
diff --git a/l3kernel/testfiles/m3regex007.tlg b/l3kernel/testfiles/m3regex007.tlg
index d889779b6..1d6e96754 100644
--- a/l3kernel/testfiles/m3regex007.tlg
+++ b/l3kernel/testfiles/m3regex007.tlg
@@ -64,11 +64,13 @@ l. ...  }
       char code 68 (D).
 <recently read> }
 l. ...  }
-> Compiled regex {a\c {bc}\u {c_space_tl}\c {\u {c_space_tl}|}}:
+> Compiled regex {a\c {bc}\u {c_space_tl}+\c {\u {c_space_tl}|}}:
 +-branch
   char code 97 (a)
   control sequence \bc
-  char 32 ( ), catcode 10
+  ,-group begin (no capture)
+  | char 32 ( ), catcode 10
+  `-group end, repeated 1 or more times, greedy
   control sequence \  or \.
 <recently read> }
 l. ...  }





More information about the latex3-commits mailing list.