[latex3-commits] [git/LaTeX3-latex3-latex3] main: l3regex: Forbid unescaped left braces in \c and \u escapes (c69de283a)

Bruno Le Floch blflatex at gmail.com
Tue Apr 27 15:59:07 CEST 2021


Repository : https://github.com/latex3/latex3
On branch  : main
Link       : https://github.com/latex3/latex3/commit/c69de283a480457492a16c2dac81deb9f6b1c181

>---------------------------------------------------------------

commit c69de283a480457492a16c2dac81deb9f6b1c181
Author: Bruno Le Floch <blflatex at gmail.com>
Date:   Tue Apr 27 11:57:11 2021 +0200

    l3regex: Forbid unescaped left braces in \c and \u escapes
    
    Of course, \c{ \{ } with an escaped brace still works fine.


>---------------------------------------------------------------

c69de283a480457492a16c2dac81deb9f6b1c181
 l3kernel/l3regex.dtx                     | 72 +++++++++++++++++++-----
 l3kernel/testfiles/m3regex005.luatex.tlg | 95 +++++++++++++++++++++++++++++++-
 l3kernel/testfiles/m3regex005.lvt        | 15 +++++
 l3kernel/testfiles/m3regex005.tlg        | 95 +++++++++++++++++++++++++++++++-
 l3kernel/testfiles/m3regex005.xetex.tlg  | 95 +++++++++++++++++++++++++++++++-
 5 files changed, 350 insertions(+), 22 deletions(-)

diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 76a7947b4..0cdd3d453 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -3289,6 +3289,21 @@
 %    \end{macrocode}
 % \end{macro}
 %
+% \begin{macro}+\@@_compile_{:+
+%   We forbid unescaped left braces inside a |\c{...}| escape because
+%   they otherwise lead to the confusing question of whether the first
+%   right brace in |\c{{}x}| should end |\c| or whether one should
+%   match braces.
+%    \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_ \c_left_brace_str : }
+  {
+    \@@_if_in_cs:TF
+      { \__kernel_msg_error:nnn { regex } { cu-lbrace } { c } }
+      { \exp_after:wN \@@_compile_raw:N \c_left_brace_str }
+  }
+%    \end{macrocode}
+% \end{macro}
+%
 % \begin{macro}+\@@_compile_}:+
 % \begin{macro}{\@@_compile_end_cs:}
 % \begin{macro}[EXP]{\@@_compile_cs_aux:Nn, \@@_compile_cs_aux:NNnnnN}
@@ -3433,7 +3448,14 @@
           {
             \exp_after:wN \token_if_eq_charcode:NNTF \c_right_brace_str #2
               { \if_false: { \fi: } \l_@@_internal_b_tl }
-              { #2 \@@_compile_u_loop:NN }
+              {
+                \if_charcode:w \c_left_brace_str #2
+                  \__kernel_msg_expandable_error:nnn { regex } { cu-lbrace } { u }
+                \else:
+                  #2
+                \fi:
+                \@@_compile_u_loop:NN
+              }
           }
           {
             \if_false: { \fi: }
@@ -5387,7 +5409,11 @@
           \if_charcode:w \c_right_brace_str ##1
             \@@_replacement_rbrace:N
           \else:
-            \@@_replacement_normal:n
+            \if_charcode:w \c_left_brace_str ##1
+              \@@_replacement_lbrace:N
+            \else:
+              \@@_replacement_normal:n
+            \fi:
           \fi:
           ##1
         }
@@ -5566,8 +5592,7 @@
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_replacement_g:w #1#2
   {
-    \@@_two_if_eq:NNNNTF
-      #1 #2 \@@_replacement_normal:n \c_left_brace_str
+    \token_if_eq_meaning:NNTF #1 \@@_replacement_lbrace:N
       { \l_@@_internal_a_int = \@@_replacement_g_digits:NN }
       { \@@_replacement_error:NNN g #1 #2 }
   }
@@ -5614,15 +5639,15 @@
   {
     \token_if_eq_meaning:NNTF #1 \@@_replacement_normal:n
       {
-        \exp_after:wN \token_if_eq_charcode:NNTF \c_left_brace_str #2
+        \cs_if_exist:cTF { @@_replacement_c_#2:w }
+          { \@@_replacement_cat:NNN #2 }
+          { \@@_replacement_error:NNN c #1#2 }
+      }
+      {
+        \token_if_eq_meaning:NNTF #1 \@@_replacement_lbrace:N
           { \@@_replacement_cu_aux:Nw \@@_replacement_exp_not:N }
-          {
-            \cs_if_exist:cTF { @@_replacement_c_#2:w }
-              { \@@_replacement_cat:NNN #2 }
-              { \@@_replacement_error:NNN c #1#2 }
-          }
+          { \@@_replacement_error:NNN c #1#2 }
       }
-      { \@@_replacement_error:NNN c #1#2 }
   }
 %    \end{macrocode}
 % \end{macro}
@@ -5656,8 +5681,7 @@
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_replacement_u:w #1#2
   {
-    \@@_two_if_eq:NNNNTF
-      #1 #2 \@@_replacement_normal:n \c_left_brace_str
+    \token_if_eq_meaning:NNTF #1 \@@_replacement_lbrace:N
       { \@@_replacement_cu_aux:Nw \@@_replacement_exp_not:V }
       { \@@_replacement_error:NNN u #1#2 }
   }
@@ -5681,6 +5705,21 @@
 %    \end{macrocode}
 % \end{macro}
 %
+% \begin{macro}{\@@_replacement_lbrace:N}
+%   Within a |\c{...}| or |\u{...}| construction, this is
+%   forbidden. Otherwise, this is a raw left brace.
+%    \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement_lbrace:N #1
+  {
+    \if_int_compare:w \l_@@_replacement_csnames_int > 0 \exp_stop_f:
+      \__kernel_msg_error:nnn { regex } { cu-lbrace } { u }
+    \else:
+      \@@_replacement_normal:n {#1}
+    \fi:
+  }
+%    \end{macrocode}
+% \end{macro}
+%
 % \subsubsection{Characters in replacement}
 %
 % \begin{macro}{\@@_replacement_cat:NNN}
@@ -7022,6 +7061,13 @@
     control~sequence~or~the~next~group~to~be~made~of~control~sequences.~
     It~only~makes~sense~to~follow~it~by~'.'~or~by~a~group.
   }
+\__kernel_msg_new:nnnn { regex } { cu-lbrace }
+  { Left~braces~must~be~escaped~in~'\iow_char:N\\#1{...}'. }
+  {
+    Constructions~such~as~'\iow_char:N\\#1{...\iow_char:N\{...}'~are~
+    not~allowed~and~should~be~replaced~by~
+    '\iow_char:N\\#1{...\token_to_str:N\{...}'.
+  }
 \__kernel_msg_new:nnnn { regex } { c-lparen-in-class }
   { Catcode~test~cannot~apply~to~group~in~character~class }
   {
diff --git a/l3kernel/testfiles/m3regex005.luatex.tlg b/l3kernel/testfiles/m3regex005.luatex.tlg
index e658839c8..4a73235d6 100644
--- a/l3kernel/testfiles/m3regex005.luatex.tlg
+++ b/l3kernel/testfiles/m3regex005.luatex.tlg
@@ -280,9 +280,15 @@ TEST 10: Caseless matching and cs
 ============================================================
 TEST 11: Braces
 ============================================================
-|\{}|
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+|\csname\endcsname}|
 The token list \l_tmpa_tl contains the tokens:
->  \{ (control sequence=\protected macro:->\ifmmode \lbrace \else \textb\ETC.)
+>  \csname\endcsname(control sequence=undefined)
 >  } (the letter }).
 <recently read> }
 l. ...  }
@@ -422,7 +428,90 @@ l. ...  }
 l. ...  }
 ============================================================
 ============================================================
-TEST 16: Catcode used by default
+TEST 16: c and u escapes with braces
+============================================================
+! LaTeX3 Error: Left braces must be escaped in '\c{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\c{...{...}' are not allowed and should be replaced by
+'\c{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                Left braces must be escaped in '\u{...}'.
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                Erroneous variable \csname\endcsnameused!
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+> Compiled regex {\c {{}\u {{}\cD }\cU }}:
++-branch
+  control sequence \
+  Match
+    categories D, class
+      char code 125 (})
+  Match
+    categories U, class
+      char code 125 (}).
+<recently read> }
+l. ...  }
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                Erroneous variable \csname\endcsnameused!
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+The token list \l_tmpa_tl contains the tokens:
+>  \csname\endcsname(control sequence=undefined)
+>  } (subscript character })
+>  } (superscript character }).
+<recently read> }
+l. ...  }
+============================================================
+Defining \x{ on line ...
+> Compiled regex {\c {\{}\u {x\{}}:
++-branch
+  control sequence \{
+  char 102 (f), catcode 11
+  char 111 (o), catcode 11
+  char 111 (o), catcode 11.
+<recently read> }
+l. ...  }
+The token list \l_tmpa_tl contains the tokens:
+>  \{ (control sequence=\protected macro:->\ifmmode \lbrace \else \textb\ETC.)
+>  f (the letter f)
+>  o (the letter o)
+>  o (the letter o).
+<recently read> }
+l. ...  }
+============================================================
+============================================================
+TEST 17: Catcode used by default
 ============================================================
 \g__cctab_next_cctab=\catcodetable...
 The token list \l_tmpa_tl contains the tokens:
diff --git a/l3kernel/testfiles/m3regex005.lvt b/l3kernel/testfiles/m3regex005.lvt
index b276fea3a..2de62efc1 100644
--- a/l3kernel/testfiles/m3regex005.lvt
+++ b/l3kernel/testfiles/m3regex005.lvt
@@ -215,6 +215,21 @@
     \tl_show:N \l_tmpa_tl
   }
 
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\TEST { c~and~u~escapes~with~braces }
+  {
+    \regex_show:n { \c{{} \u{{} \cD} \cU} }
+    \tl_clear:N \l_tmpa_tl
+    \regex_replace_all:nnN { } { \c{{} \u{{} \cD} \cU} } \l_tmpa_tl
+    \tl_analysis_show:N \l_tmpa_tl
+    \SEPARATOR
+    \cs_new_nopar:cpn { x \iow_char:N \{ } { foo }
+    \regex_show:n { \c{ \{ } \u{ x \{ } }
+    \tl_clear:N \l_tmpa_tl
+    \regex_replace_all:nnN { } { \c{\{} \u{x\{} } \l_tmpa_tl
+    \tl_analysis_show:N \l_tmpa_tl    
+  }
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \TEST { Catcode~used~by~default }
   {
diff --git a/l3kernel/testfiles/m3regex005.tlg b/l3kernel/testfiles/m3regex005.tlg
index 9969f12a5..c4e03280e 100644
--- a/l3kernel/testfiles/m3regex005.tlg
+++ b/l3kernel/testfiles/m3regex005.tlg
@@ -280,9 +280,15 @@ TEST 10: Caseless matching and cs
 ============================================================
 TEST 11: Braces
 ============================================================
-|\{}|
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+|\csname\endcsname}|
 The token list \l_tmpa_tl contains the tokens:
->  \{ (control sequence=\protected macro:->\ifmmode \lbrace \else \textb\ETC.)
+>  \csname\endcsname(control sequence=undefined)
 >  } (the letter }).
 <recently read> }
 l. ...  }
@@ -422,7 +428,90 @@ l. ...  }
 l. ...  }
 ============================================================
 ============================================================
-TEST 16: Catcode used by default
+TEST 16: c and u escapes with braces
+============================================================
+! LaTeX3 Error: Left braces must be escaped in '\c{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\c{...{...}' are not allowed and should be replaced by
+'\c{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                           Left braces must be escaped in '\u{...}'.
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                           Erroneous variable \csname\endcsnameused!
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+> Compiled regex {\c {{}\u {{}\cD }\cU }}:
++-branch
+  control sequence \
+  Match
+    categories D, class
+      char code 125 (})
+  Match
+    categories U, class
+      char code 125 (}).
+<recently read> }
+l. ...  }
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                           Erroneous variable \csname\endcsnameused!
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+The token list \l_tmpa_tl contains the tokens:
+>  \csname\endcsname(control sequence=undefined)
+>  } (subscript character })
+>  } (superscript character }).
+<recently read> }
+l. ...  }
+============================================================
+Defining \x{ on line ...
+> Compiled regex {\c {\{}\u {x\{}}:
++-branch
+  control sequence \{
+  char 102 (f), catcode 11
+  char 111 (o), catcode 11
+  char 111 (o), catcode 11.
+<recently read> }
+l. ...  }
+The token list \l_tmpa_tl contains the tokens:
+>  \{ (control sequence=\protected macro:->\ifmmode \lbrace \else \textb\ETC.)
+>  f (the letter f)
+>  o (the letter o)
+>  o (the letter o).
+<recently read> }
+l. ...  }
+============================================================
+============================================================
+TEST 17: Catcode used by default
 ============================================================
 Defining \g__cctab_1_cctab on line ...
 The token list \l_tmpa_tl contains the tokens:
diff --git a/l3kernel/testfiles/m3regex005.xetex.tlg b/l3kernel/testfiles/m3regex005.xetex.tlg
index 71842e129..28989f3aa 100644
--- a/l3kernel/testfiles/m3regex005.xetex.tlg
+++ b/l3kernel/testfiles/m3regex005.xetex.tlg
@@ -280,9 +280,15 @@ TEST 10: Caseless matching and cs
 ============================================================
 TEST 11: Braces
 ============================================================
-|\{}|
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+|\csname\endcsname}|
 The token list \l_tmpa_tl contains the tokens:
->  \{ (control sequence=\protected macro:->\ifmmode \lbrace \else \textb\ETC.)
+>  \csname\endcsname(control sequence=undefined)
 >  } (the letter }).
 <recently read> }
 l. ...  }
@@ -422,7 +428,90 @@ l. ...  }
 l. ...  }
 ============================================================
 ============================================================
-TEST 16: Catcode used by default
+TEST 16: c and u escapes with braces
+============================================================
+! LaTeX3 Error: Left braces must be escaped in '\c{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\c{...{...}' are not allowed and should be replaced by
+'\c{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                           Left braces must be escaped in '\u{...}'.
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                           Erroneous variable \csname\endcsnameused!
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+> Compiled regex {\c {{}\u {{}\cD }\cU }}:
++-branch
+  control sequence \
+  Match
+    categories D, class
+      char code 125 (})
+  Match
+    categories U, class
+      char code 125 (}).
+<recently read> }
+l. ...  }
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                           Erroneous variable \csname\endcsnameused!
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+The token list \l_tmpa_tl contains the tokens:
+>  \csname\endcsname(control sequence=undefined)
+>  } (subscript character })
+>  } (superscript character }).
+<recently read> }
+l. ...  }
+============================================================
+Defining \x{ on line ...
+> Compiled regex {\c {\{}\u {x\{}}:
++-branch
+  control sequence \{
+  char 102 (f), catcode 11
+  char 111 (o), catcode 11
+  char 111 (o), catcode 11.
+<recently read> }
+l. ...  }
+The token list \l_tmpa_tl contains the tokens:
+>  \{ (control sequence=\protected macro:->\ifmmode \lbrace \else \textb\ETC.)
+>  f (the letter f)
+>  o (the letter o)
+>  o (the letter o).
+<recently read> }
+l. ...  }
+============================================================
+============================================================
+TEST 17: Catcode used by default
 ============================================================
 Defining \g__cctab_1_cctab on line ...
 The token list \l_tmpa_tl contains the tokens:





More information about the latex3-commits mailing list.