[latex3-commits] [git/LaTeX3-latex3-latex3] main: Make \regex_show:n show printable characters (fixes #590) (ab92696db)

Bruno Le Floch blflatex at gmail.com
Tue Apr 27 15:53:43 CEST 2021


Repository : https://github.com/latex3/latex3
On branch  : main
Link       : https://github.com/latex3/latex3/commit/ab92696db4b570a250fe4b028a027d0f09e2eaeb

>---------------------------------------------------------------

commit ab92696db4b570a250fe4b028a027d0f09e2eaeb
Author: Bruno Le Floch <blflatex at gmail.com>
Date:   Sun Apr 25 02:18:33 2021 +0200

    Make \regex_show:n show printable characters (fixes #590)
    
    Previously only the charcode was shown, which made it harder to
    debug things.


>---------------------------------------------------------------

ab92696db4b570a250fe4b028a027d0f09e2eaeb
 l3kernel/CHANGELOG.md                    |   1 +
 l3kernel/l3regex.dtx                     |  25 +++-
 l3kernel/testfiles/m3regex007.tlg        | 210 +++++++++++++++----------------
 l3kernel/testfiles/m3regex010.luatex.tlg |   2 +-
 4 files changed, 126 insertions(+), 112 deletions(-)

diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index d291d6650..8b4473e03 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -20,6 +20,7 @@ this project uses date-based 'snapshot' version identifiers.
 - `\pdf_version_gset:n` for `dvips`.
 - Improve handling of `\exp_not:n` in `\text_expand:n` (issue #875)
 - `\file_full_name:n` now avoids calling `\pdffilesize` multiple times on the same file.
+- Show printable characters explicitly in `\regex_show:n`
 
 ### Fixed
 - Evalutate integer constants only once (issue#861)
diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 4f151d591..0975173a5 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -509,7 +509,7 @@
 %   which never change.
 % \end{function}
 %
-% \begin{function}[added = 2017-05-26]{\regex_show:n, \regex_show:N}
+% \begin{function}[added = 2017-05-26, updated = 2021-04-15]{\regex_show:n, \regex_show:N}
 %   \begin{syntax}
 %     \cs{regex_show:n} \Arg{regex}
 %   \end{syntax}
@@ -3554,18 +3554,18 @@
       \cs_set:Npn \@@_A_test: { anchor~at~start~(\iow_char:N\\A) }
       \cs_set:Npn \@@_G_test: { anchor~at~start~of~match~(\iow_char:N\\G) }
       \cs_set_protected:Npn \@@_item_caseful_equal:n ##1
-        { \@@_show_one:n { char~code~\int_eval:n{##1} } }
+        { \@@_show_one:n { char~code~\@@_show_char:n{##1} } }
       \cs_set_protected:Npn \@@_item_caseful_range:nn ##1##2
         {
           \@@_show_one:n
-            { range~[\int_eval:n{##1}, \int_eval:n{##2}] }
+            { range~[\@@_show_char:n{##1}, \@@_show_char:n{##2}] }
         }
       \cs_set_protected:Npn \@@_item_caseless_equal:n ##1
-        { \@@_show_one:n { char~code~\int_eval:n{##1}~(caseless) } }
+        { \@@_show_one:n { char~code~\@@_show_char:n{##1}~(caseless) } }
       \cs_set_protected:Npn \@@_item_caseless_range:nn ##1##2
         {
           \@@_show_one:n
-            { Range~[\int_eval:n{##1}, \int_eval:n{##2}]~(caseless) }
+            { Range~[\@@_show_char:n{##1}, \@@_show_char:n{##2}]~(caseless) }
         }
       \cs_set_protected:Npn \@@_item_catcode:nT
         { \@@_show_item_catcode:NnT \c_true_bool }
@@ -3574,7 +3574,7 @@
       \cs_set_protected:Npn \@@_item_reverse:n
         { \@@_show_scope:nn { Reversed~match } }
       \cs_set_protected:Npn \@@_item_exact:nn ##1##2
-        { \@@_show_one:n { char~##2,~catcode~##1 } }
+        { \@@_show_one:n { char~\@@_show_char:n{##2},~catcode~##1 } }
       \cs_set_eq:NN \@@_item_exact_cs:n \@@_show_item_exact_cs:n
       \cs_set_protected:Npn \@@_item_cs:n
         { \@@_show_scope:nn { control~sequence } }
@@ -3590,6 +3590,19 @@
 %    \end{macrocode}
 % \end{macro}
 %
+% \begin{macro}[EXP]{\@@_show_char:n}
+%   Show a single character, together with its ascii representation if available.
+%   This could be extended to beyond ascii.  It is not ideal for parentheses themselves.
+%    \begin{macrocode}
+\cs_new:Npn \@@_show_char:n #1
+  {
+    \int_eval:n {#1}
+    \int_compare:nT { 32 <= #1 <= 126 }
+      { ~ ( \char_generate:nn {#1} {12} ) }
+  }
+%    \end{macrocode}
+% \end{macro}
+%
 % \begin{macro}{\@@_show_one:n}
 %   Every part of the final message go through this function, which adds
 %   one line to the output, with the appropriate prefix.
diff --git a/l3kernel/testfiles/m3regex007.tlg b/l3kernel/testfiles/m3regex007.tlg
index 09b55e7d9..0fd7781fe 100644
--- a/l3kernel/testfiles/m3regex007.tlg
+++ b/l3kernel/testfiles/m3regex007.tlg
@@ -7,68 +7,68 @@ TEST 1: regex_show
 LaTeX3 Warning: Extra right parenthesis ignored in regular expression.
 > Compiled regex {a*|b??|(c{0}d{2,}e|[^fg\c [^BE][^\d ]\w ]){2,4}?)}:
 +-branch
-  char code 97, repeated 0 or more times, greedy
+  char code 97 (a), repeated 0 or more times, greedy
 +-branch
-  char code 98, repeated between 0 and 1 times, lazy
+  char code 98 (b), repeated between 0 and 1 times, lazy
 +-branch
   ,-group begin
-  | char code 99, repeated 0 times
-  | char code 100, repeated 2 or more times, greedy
-  | char code 101
+  | char code 99 (c), repeated 0 times
+  | char code 100 (d), repeated 2 or more times, greedy
+  | char code 101 (e)
   +-branch
   | Don't match
-  |   char code 102
-  |   char code 103
+  |   char code 102 (f)
+  |   char code 103 (g)
   |   categories CMTPUDSLOA, negative class
-  |     range [48,57]
-  |   range [97,122]
-  |   range [65,90]
-  |   range [48,57]
-  |   char code 95
+  |     range [48 (0),57 (9)]
+  |   range [97 (a),122 (z)]
+  |   range [65 (A),90 (Z)]
+  |   range [48 (0),57 (9)]
+  |   char code 95 (_)
   `-group end, repeated between 2 and 4 times, lazy
-  char code 41.
+  char code 41 ()).
 <recently read> }
 l. ...  }
 Defining \l_foo_regex on line ...
 > Compiled regex variable \l_foo_regex:
 +-branch
   assertion: anchor at start (\A)
-  char code 97
+  char code 97 (a)
 +-branch
-  char code 98
+  char code 98 (b)
 +-branch.
 <recently read> }
 l. ...  }
 > Compiled regex {a\ur {l_foo_regex}b\c {\ur {l_foo_regex}|D}}:
 +-branch
-  char code 97
+  char code 97 (a)
   ,-group begin (no capture)
   | assertion: anchor at start (\A)
-  | char code 97
+  | char code 97 (a)
   +-branch
-  | char code 98
+  | char code 98 (b)
   +-branch
   `-group end
-  char code 98
+  char code 98 (b)
   Match
     control sequence
     +-branch
       ,-group begin (no capture)
       | assertion: anchor at start (\A)
-      | char code 97
+      | char code 97 (a)
       +-branch
-      | char code 98
+      | char code 98 (b)
       +-branch
       `-group end
     +-branch
-      char code 68.
+      char code 68 (D).
 <recently read> }
 l. ...  }
 > Compiled regex {a\c {bc}\u {c_space_tl}\c {\u {c_space_tl}|}}:
 +-branch
-  char code 97
+  char code 97 (a)
   control sequence \bc
-  char 32, catcode 10
+  char 32 ( ), catcode 10
   control sequence \  or \.
 <recently read> }
 l. ...  }
@@ -76,39 +76,39 @@ l. ...  }
 {l_tmpa_int})?}{3}|y*}:
 +-branch
   control sequence \abc
-  char code 42
+  char code 42 (*)
   Match, repeated 3 times
     control sequence
     +-branch
-      char code 92
-      char code 97
-      char code 98
-      char code 99
-      char code 32
+      char code 92 (\)
+      char code 97 (a)
+      char code 98 (b)
+      char code 99 (c)
+      char code 32 ( )
     +-branch
       ,-group begin (no capture)
-      | char code 55
+      | char code 55 (7)
       `-group end, repeated between 0 and 1 times, greedy
 +-branch
-  char code 121, repeated 0 or more times, greedy.
+  char code 121 (y), repeated 0 or more times, greedy.
 <recently read> }
 l. ...  }
 > Compiled regex {a(?:bc(?|de|f){2}g|hi){3,4}?}:
 +-branch
-  char code 97
+  char code 97 (a)
   ,-group begin (no capture)
-  | char code 98
-  | char code 99
+  | char code 98 (b)
+  | char code 99 (c)
   | ,-group begin (resetting)
-  | | char code 100
-  | | char code 101
+  | | char code 100 (d)
+  | | char code 101 (e)
   | +-branch
-  | | char code 102
+  | | char code 102 (f)
   | `-group end, repeated 2 times
-  | char code 103
+  | char code 103 (g)
   +-branch
-  | char code 104
-  | char code 105
+  | char code 104 (h)
+  | char code 105 (i)
   `-group end, repeated between 3 and 4 times, lazy.
 <recently read> }
 l. ...  }
@@ -132,23 +132,23 @@ TEST 2: regex_show again
 > Compiled regex {[^\d \W \s ]*?[^a-z[:^cntrl:]A-Z]??}:
 +-branch
   Don't match, repeated 0 or more times, lazy
-    range [48,57]
+    range [48 (0),57 (9)]
     Reversed match
-      range [97,122]
-      range [65,90]
-      range [48,57]
-      char code 95
-    char code 32
+      range [97 (a),122 (z)]
+      range [65 (A),90 (Z)]
+      range [48 (0),57 (9)]
+      char code 95 (_)
+    char code 32 ( )
     char code 9
     char code 10
     char code 12
     char code 13
   Don't match, repeated between 0 and 1 times, lazy
-    range [97,122]
+    range [97 (a),122 (z)]
     Reversed match
       range [0,31]
       char code 127
-    range [65,90].
+    range [65 (A),90 (Z)].
 <recently read> }
 l. ...  }
 > Compiled regex {[^\c [^L][^[:^alpha:]]]{2,}?}:
@@ -156,8 +156,8 @@ l. ...  }
   Don't match, repeated 2 or more times, lazy
     categories CBEMTPUDSOA, negative class
       Reversed match
-        range [97,122]
-        range [65,90].
+        range [97 (a),122 (z)]
+        range [65 (A),90 (Z)].
 <recently read> }
 l. ...  }
 ! LaTeX3 Error: Missing right bracket inserted in regular expression.
@@ -175,9 +175,9 @@ parentheses.
 > Compiled regex {(a(b|[^}:
 +-branch
   ,-group begin
-  | char code 97
+  | char code 97 (a)
   | ,-group begin
-  | | char code 98
+  | | char code 98 (b)
   | +-branch
   | | Pass
   | `-group end
@@ -232,8 +232,8 @@ l. ...  }
     control sequence
     +-branch
       Match
-        char code 125
-        char code 123.
+        char code 125 (})
+        char code 123 ({).
 <recently read> }
 l. ...  }
 ============================================================
@@ -245,49 +245,49 @@ TEST 3: all escaped sequences
 +-branch
   assertion: anchor at start (\A)
   negative assertion: word boundary
-  char code 67
+  char code 67 (C)
   Match
     Reversed match
-      range [48,57]
-  char code 69
-  char code 70
+      range [48 (0),57 (9)]
+  char code 69 (E)
+  char code 70 (F)
   assertion: anchor at start of match (\G)
   Match
     Reversed match
-      char code 32
+      char code 32 ( )
       char code 9
-  char code 73
-  char code 74
+  char code 73 (I)
+  char code 74 (J)
   reset match start (\K)
-  char code 76
-  char code 77
+  char code 76 (L)
+  char code 77 (M)
   Match
     Reversed match
       char code 10
-  char code 79
-  char code 80
-  char code 81
-  char code 82
+  char code 79 (O)
+  char code 80 (P)
+  char code 81 (Q)
+  char code 82 (R)
   Match
     Reversed match
-      char code 32
+      char code 32 ( )
       char code 9
       char code 10
       char code 12
       char code 13
-  char code 84
-  char code 85
+  char code 84 (T)
+  char code 85 (U)
   Match
     Reversed match
       range [10,13]
   Match
     Reversed match
-      range [97,122]
-      range [65,90]
-      range [48,57]
-      char code 95
-  char code 88
-  char code 89
+      range [97 (a),122 (z)]
+      range [65 (A),90 (Z)]
+      range [48 (0),57 (9)]
+      char code 95 (_)
+  char code 88 (X)
+  char code 89 (Y)
   assertion: anchor at end (\Z).
 <recently read> }
 l. ...  }
@@ -309,54 +309,54 @@ the variable to use.
 +-branch
   char code 7
   assertion: word boundary
-  range [48,57]
+  range [48 (0),57 (9)]
   char code 27
   char code 12
-  char code 103
+  char code 103 (g)
   Match
-    char code 32
+    char code 32 ( )
     char code 9
-  char code 105
-  char code 106
-  char code 107
-  char code 108
-  char code 109
+  char code 105 (i)
+  char code 106 (j)
+  char code 107 (k)
+  char code 108 (l)
+  char code 109 (m)
   char code 10
-  char code 111
-  char code 112
-  char code 113
+  char code 111 (o)
+  char code 112 (p)
+  char code 113 (q)
   char code 13
   Match
-    char code 32
+    char code 32 ( )
     char code 9
     char code 10
     char code 12
     char code 13
   char code 9
-  char code 117
+  char code 117 (u)
   range [10,13]
   Match
-    range [97,122]
-    range [65,90]
-    range [48,57]
-    char code 95
+    range [97 (a),122 (z)]
+    range [65 (A),90 (Z)]
+    range [48 (0),57 (9)]
+    char code 95 (_)
   char code 0
-  char code 121
+  char code 121 (y)
   assertion: anchor at end (\Z).
 <recently read> }
 l. ...  }
 > Compiled regex {\0\1\2\3\4\5\6\7\8\9}:
 +-branch
-  char code 48
-  char code 49
-  char code 50
-  char code 51
-  char code 52
-  char code 53
-  char code 54
-  char code 55
-  char code 56
-  char code 57.
+  char code 48 (0)
+  char code 49 (1)
+  char code 50 (2)
+  char code 51 (3)
+  char code 52 (4)
+  char code 53 (5)
+  char code 54 (6)
+  char code 55 (7)
+  char code 56 (8)
+  char code 57 (9).
 <recently read> }
 l. ...  }
 ============================================================
diff --git a/l3kernel/testfiles/m3regex010.luatex.tlg b/l3kernel/testfiles/m3regex010.luatex.tlg
index c0ad4d0c9..86fc5ca94 100644
--- a/l3kernel/testfiles/m3regex010.luatex.tlg
+++ b/l3kernel/testfiles/m3regex010.luatex.tlg
@@ -6,7 +6,7 @@ TEST 1: LuaTeX bug which leads to an l3regex bug
 ============================================================
 > Compiled regex {\\^^A}:
 +-branch
-  char code 92
+  char code 92 (\)
   char code 1.
 <recently read> }
 l. ...  }





More information about the latex3-commits mailing list.