[latex3-commits] [git/LaTeX3-latex3-latex3] gh590: l3regex: Forbid unescaped left braces in \c and \u escapes (2c08248a8)

Bruno Le Floch blflatex at gmail.com
Tue Apr 27 12:07:15 CEST 2021


Repository : https://github.com/latex3/latex3
On branch  : gh590
Link       : https://github.com/latex3/latex3/commit/2c08248a8f38864b3cb11dda5da250f9147dcc46

>---------------------------------------------------------------

commit 2c08248a8f38864b3cb11dda5da250f9147dcc46
Author: Bruno Le Floch <blflatex at gmail.com>
Date:   Tue Apr 27 11:57:11 2021 +0200

    l3regex: Forbid unescaped left braces in \c and \u escapes
    
    Of course, \c{ \{ } with an escaped brace still works fine.


>---------------------------------------------------------------

2c08248a8f38864b3cb11dda5da250f9147dcc46
 l3kernel/l3regex.dtx                     | 72 ++++++++++++++++++++-----
 l3kernel/testfiles/m3regex005.luatex.tlg | 91 +++++++++++++++++++++++++++++++-
 l3kernel/testfiles/m3regex005.lvt        | 14 +++++
 l3kernel/testfiles/m3regex005.tlg        | 91 +++++++++++++++++++++++++++++++-
 l3kernel/testfiles/m3regex005.xetex.tlg  | 91 +++++++++++++++++++++++++++++++-
 5 files changed, 343 insertions(+), 16 deletions(-)

diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 4696853ab..e533287c6 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -3282,6 +3282,21 @@
 %    \end{macrocode}
 % \end{macro}
 %
+% \begin{macro}+\@@_compile_{:+
+%   We forbid unescaped left braces inside a |\c{...}| escape because
+%   they otherwise lead to the confusing question of whether the first
+%   right brace in |\c{{}x}| should end |\c| or whether one should
+%   match braces.
+%    \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_ \c_left_brace_str : }
+  {
+    \@@_if_in_cs:TF
+      { \__kernel_msg_error:nnn { regex } { cu-lbrace } { c } }
+      { \exp_after:wN \@@_compile_raw:N \c_left_brace_str }
+  }
+%    \end{macrocode}
+% \end{macro}
+%
 % \begin{macro}+\@@_compile_}:+
 % \begin{macro}{\@@_compile_end_cs:}
 % \begin{macro}[EXP]{\@@_compile_cs_aux:Nn, \@@_compile_cs_aux:NNnnnN}
@@ -3426,7 +3441,14 @@
           {
             \exp_after:wN \token_if_eq_charcode:NNTF \c_right_brace_str #2
               { \if_false: { \fi: } \l_@@_internal_b_tl }
-              { #2 \@@_compile_u_loop:NN }
+              {
+                \if_charcode:w \c_left_brace_str #2
+                  \__kernel_msg_expandable_error:nnn { regex } { cu-lbrace } { u }
+                \else:
+                  #2
+                \fi:
+                \@@_compile_u_loop:NN
+              }
           }
           {
             \if_false: { \fi: }
@@ -5380,7 +5402,11 @@
           \if_charcode:w \c_right_brace_str ##1
             \@@_replacement_rbrace:N
           \else:
-            \@@_replacement_normal:n
+            \if_charcode:w \c_left_brace_str ##1
+              \@@_replacement_lbrace:N
+            \else:
+              \@@_replacement_normal:n
+            \fi:
           \fi:
           ##1
         }
@@ -5536,8 +5562,7 @@
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_replacement_g:w #1#2
   {
-    \@@_two_if_eq:NNNNTF
-      #1 #2 \@@_replacement_normal:n \c_left_brace_str
+    \token_if_eq_meaning:NNTF #1 \@@_replacement_lbrace:N
       { \l_@@_internal_a_int = \@@_replacement_g_digits:NN }
       { \@@_replacement_error:NNN g #1 #2 }
   }
@@ -5584,15 +5609,15 @@
   {
     \token_if_eq_meaning:NNTF #1 \@@_replacement_normal:n
       {
-        \exp_after:wN \token_if_eq_charcode:NNTF \c_left_brace_str #2
+        \cs_if_exist:cTF { @@_replacement_c_#2:w }
+          { \@@_replacement_cat:NNN #2 }
+          { \@@_replacement_error:NNN c #1#2 }
+      }
+      {
+        \token_if_eq_meaning:NNTF #1 \@@_replacement_lbrace:N
           { \@@_replacement_cu_aux:Nw \@@_replacement_exp_not:N }
-          {
-            \cs_if_exist:cTF { @@_replacement_c_#2:w }
-              { \@@_replacement_cat:NNN #2 }
-              { \@@_replacement_error:NNN c #1#2 }
-          }
+          { \@@_replacement_error:NNN c #1#2 }
       }
-      { \@@_replacement_error:NNN c #1#2 }
   }
 %    \end{macrocode}
 % \end{macro}
@@ -5626,8 +5651,7 @@
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_replacement_u:w #1#2
   {
-    \@@_two_if_eq:NNNNTF
-      #1 #2 \@@_replacement_normal:n \c_left_brace_str
+    \token_if_eq_meaning:NNTF #1 \@@_replacement_lbrace:N
       { \@@_replacement_cu_aux:Nw \@@_replacement_exp_not:V }
       { \@@_replacement_error:NNN u #1#2 }
   }
@@ -5651,6 +5675,21 @@
 %    \end{macrocode}
 % \end{macro}
 %
+% \begin{macro}{\@@_replacement_lbrace:N}
+%   Within a |\c{...}| or |\u{...}| construction, this is
+%   forbidden. Otherwise, this is a raw left brace.
+%    \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement_lbrace:N #1
+  {
+    \if_int_compare:w \l_@@_replacement_csnames_int > 0 \exp_stop_f:
+      \__kernel_msg_error:nnn { regex } { cu-lbrace } { u }
+    \else:
+      \@@_replacement_normal:n {#1}
+    \fi:
+  }
+%    \end{macrocode}
+% \end{macro}
+%
 % \subsubsection{Characters in replacement}
 %
 % \begin{macro}{\@@_replacement_cat:NNN}
@@ -6992,6 +7031,13 @@
     control~sequence~or~the~next~group~to~be~made~of~control~sequences.~
     It~only~makes~sense~to~follow~it~by~'.'~or~by~a~group.
   }
+\__kernel_msg_new:nnnn { regex } { cu-lbrace }
+  { Left~braces~must~be~escaped~in~'\iow_char:N\\#1{...}'. }
+  {
+    Constructions~such~as~'\iow_char:N\\#1{...\iow_char:N\{...}'~are~
+    not~allowed~and~should~be~replaced~by~
+    '\iow_char:N\\#1{...\token_to_str:N\{...}'.
+  }
 \__kernel_msg_new:nnnn { regex } { c-lparen-in-class }
   { Catcode~test~cannot~apply~to~group~in~character~class }
   {
diff --git a/l3kernel/testfiles/m3regex005.luatex.tlg b/l3kernel/testfiles/m3regex005.luatex.tlg
index 68fc170f0..d665ec17e 100644
--- a/l3kernel/testfiles/m3regex005.luatex.tlg
+++ b/l3kernel/testfiles/m3regex005.luatex.tlg
@@ -280,7 +280,13 @@ TEST 10: Caseless matching and cs
 ============================================================
 TEST 11: Braces
 ============================================================
-|\{}|
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+|\csname\endcsname}|
 ! LaTeX3 Error: Missing right brace inserted in replacement text.
 For immediate help type H <return>.
  ...                                              
@@ -416,3 +422,86 @@ l. ...  }
 <recently read> }
 l. ...  }
 ============================================================
+============================================================
+TEST 16: c and u escapes with braces
+============================================================
+! LaTeX3 Error: Left braces must be escaped in '\c{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\c{...{...}' are not allowed and should be replaced by
+'\c{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                Left braces must be escaped in '\u{...}'.
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                Erroneous variable \csname\endcsnameused!
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+> Compiled regex {\c {{}\u {{}\cD }\cU }}:
++-branch
+  control sequence \
+  Match
+    categories D, class
+      char code 125 (})
+  Match
+    categories U, class
+      char code 125 (}).
+<recently read> }
+l. ...  }
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                Erroneous variable \csname\endcsnameused!
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+The token list \l_tmpa_tl contains the tokens:
+>  \csname\endcsname(control sequence=undefined)
+>  } (subscript character })
+>  } (superscript character }).
+<recently read> }
+l. ...  }
+============================================================
+Defining \x{ on line ...
+> Compiled regex {\c {\{}\u {x\{}}:
++-branch
+  control sequence \{
+  char 102 (f), catcode 11
+  char 111 (o), catcode 11
+  char 111 (o), catcode 11.
+<recently read> }
+l. ...  }
+The token list \l_tmpa_tl contains the tokens:
+>  \{ (control sequence=\protected macro:->\ifmmode \lbrace \else \textb\ETC.)
+>  f (the letter f)
+>  o (the letter o)
+>  o (the letter o).
+<recently read> }
+l. ...  }
+============================================================
diff --git a/l3kernel/testfiles/m3regex005.lvt b/l3kernel/testfiles/m3regex005.lvt
index 652763402..98e0c4c32 100644
--- a/l3kernel/testfiles/m3regex005.lvt
+++ b/l3kernel/testfiles/m3regex005.lvt
@@ -215,4 +215,18 @@
   }
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\TEST { c~and~u~escapes~with~braces }
+  {
+    \regex_show:n { \c{{} \u{{} \cD} \cU} }
+    \tl_clear:N \l_tmpa_tl
+    \regex_replace_all:nnN { } { \c{{} \u{{} \cD} \cU} } \l_tmpa_tl
+    \tl_analysis_show:N \l_tmpa_tl
+    \SEPARATOR
+    \cs_new_nopar:cpn { x \iow_char:N \{ } { foo }
+    \regex_show:n { \c{ \{ } \u{ x \{ } }
+    \tl_clear:N \l_tmpa_tl
+    \regex_replace_all:nnN { } { \c{\{} \u{x\{} } \l_tmpa_tl
+    \tl_analysis_show:N \l_tmpa_tl    
+  }
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \END
diff --git a/l3kernel/testfiles/m3regex005.tlg b/l3kernel/testfiles/m3regex005.tlg
index a5d7a0fc2..4fd8236f7 100644
--- a/l3kernel/testfiles/m3regex005.tlg
+++ b/l3kernel/testfiles/m3regex005.tlg
@@ -280,7 +280,13 @@ TEST 10: Caseless matching and cs
 ============================================================
 TEST 11: Braces
 ============================================================
-|\{}|
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+|\csname\endcsname}|
 ! LaTeX3 Error: Missing right brace inserted in replacement text.
 For immediate help type H <return>.
  ...                                              
@@ -416,3 +422,86 @@ l. ...  }
 <recently read> }
 l. ...  }
 ============================================================
+============================================================
+TEST 16: c and u escapes with braces
+============================================================
+! LaTeX3 Error: Left braces must be escaped in '\c{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\c{...{...}' are not allowed and should be replaced by
+'\c{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                           Left braces must be escaped in '\u{...}'.
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                           Erroneous variable \csname\endcsnameused!
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+> Compiled regex {\c {{}\u {{}\cD }\cU }}:
++-branch
+  control sequence \
+  Match
+    categories D, class
+      char code 125 (})
+  Match
+    categories U, class
+      char code 125 (}).
+<recently read> }
+l. ...  }
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                           Erroneous variable \csname\endcsnameused!
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+The token list \l_tmpa_tl contains the tokens:
+>  \csname\endcsname(control sequence=undefined)
+>  } (subscript character })
+>  } (superscript character }).
+<recently read> }
+l. ...  }
+============================================================
+Defining \x{ on line ...
+> Compiled regex {\c {\{}\u {x\{}}:
++-branch
+  control sequence \{
+  char 102 (f), catcode 11
+  char 111 (o), catcode 11
+  char 111 (o), catcode 11.
+<recently read> }
+l. ...  }
+The token list \l_tmpa_tl contains the tokens:
+>  \{ (control sequence=\protected macro:->\ifmmode \lbrace \else \textb\ETC.)
+>  f (the letter f)
+>  o (the letter o)
+>  o (the letter o).
+<recently read> }
+l. ...  }
+============================================================
diff --git a/l3kernel/testfiles/m3regex005.xetex.tlg b/l3kernel/testfiles/m3regex005.xetex.tlg
index 68fc170f0..5aba4c713 100644
--- a/l3kernel/testfiles/m3regex005.xetex.tlg
+++ b/l3kernel/testfiles/m3regex005.xetex.tlg
@@ -280,7 +280,13 @@ TEST 10: Caseless matching and cs
 ============================================================
 TEST 11: Braces
 ============================================================
-|\{}|
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+|\csname\endcsname}|
 ! LaTeX3 Error: Missing right brace inserted in replacement text.
 For immediate help type H <return>.
  ...                                              
@@ -416,3 +422,86 @@ l. ...  }
 <recently read> }
 l. ...  }
 ============================================================
+============================================================
+TEST 16: c and u escapes with braces
+============================================================
+! LaTeX3 Error: Left braces must be escaped in '\c{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\c{...{...}' are not allowed and should be replaced by
+'\c{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                           Left braces must be escaped in '\u{...}'.
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                           Erroneous variable \csname\endcsnameused!
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+> Compiled regex {\c {{}\u {{}\cD }\cU }}:
++-branch
+  control sequence \
+  Match
+    categories D, class
+      char code 125 (})
+  Match
+    categories U, class
+      char code 125 (}).
+<recently read> }
+l. ...  }
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...                                              
+l. ...  }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+                           Erroneous variable \csname\endcsnameused!
+l. ...  }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+The token list \l_tmpa_tl contains the tokens:
+>  \csname\endcsname(control sequence=undefined)
+>  } (subscript character })
+>  } (superscript character }).
+<recently read> }
+l. ...  }
+============================================================
+Defining \x{ on line ...
+> Compiled regex {\c {\{}\u {x\{}}:
++-branch
+  control sequence \{
+  char 102 (f), catcode 11
+  char 111 (o), catcode 11
+  char 111 (o), catcode 11.
+<recently read> }
+l. ...  }
+The token list \l_tmpa_tl contains the tokens:
+>  \{ (control sequence=\protected macro:->\ifmmode \lbrace \else \textb\ETC.)
+>  f (the letter f)
+>  o (the letter o)
+>  o (the letter o).
+<recently read> }
+l. ...  }
+============================================================





More information about the latex3-commits mailing list.