[latex3-commits] [git/LaTeX3-latex3-latex3] main: l3regex: Forbid unescaped left braces in \c and \u escapes (c69de283a)
Bruno Le Floch
blflatex at gmail.com
Tue Apr 27 15:59:07 CEST 2021
Repository : https://github.com/latex3/latex3
On branch : main
Link : https://github.com/latex3/latex3/commit/c69de283a480457492a16c2dac81deb9f6b1c181
>---------------------------------------------------------------
commit c69de283a480457492a16c2dac81deb9f6b1c181
Author: Bruno Le Floch <blflatex at gmail.com>
Date: Tue Apr 27 11:57:11 2021 +0200
l3regex: Forbid unescaped left braces in \c and \u escapes
Of course, \c{ \{ } with an escaped brace still works fine.
>---------------------------------------------------------------
c69de283a480457492a16c2dac81deb9f6b1c181
l3kernel/l3regex.dtx | 72 +++++++++++++++++++-----
l3kernel/testfiles/m3regex005.luatex.tlg | 95 +++++++++++++++++++++++++++++++-
l3kernel/testfiles/m3regex005.lvt | 15 +++++
l3kernel/testfiles/m3regex005.tlg | 95 +++++++++++++++++++++++++++++++-
l3kernel/testfiles/m3regex005.xetex.tlg | 95 +++++++++++++++++++++++++++++++-
5 files changed, 350 insertions(+), 22 deletions(-)
diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 76a7947b4..0cdd3d453 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -3289,6 +3289,21 @@
% \end{macrocode}
% \end{macro}
%
+% \begin{macro}+\@@_compile_{:+
+% We forbid unescaped left braces inside a |\c{...}| escape because
+% they otherwise lead to the confusing question of whether the first
+% right brace in |\c{{}x}| should end |\c| or whether one should
+% match braces.
+% \begin{macrocode}
+\cs_new_protected:cpn { @@_compile_ \c_left_brace_str : }
+ {
+ \@@_if_in_cs:TF
+ { \__kernel_msg_error:nnn { regex } { cu-lbrace } { c } }
+ { \exp_after:wN \@@_compile_raw:N \c_left_brace_str }
+ }
+% \end{macrocode}
+% \end{macro}
+%
% \begin{macro}+\@@_compile_}:+
% \begin{macro}{\@@_compile_end_cs:}
% \begin{macro}[EXP]{\@@_compile_cs_aux:Nn, \@@_compile_cs_aux:NNnnnN}
@@ -3433,7 +3448,14 @@
{
\exp_after:wN \token_if_eq_charcode:NNTF \c_right_brace_str #2
{ \if_false: { \fi: } \l_@@_internal_b_tl }
- { #2 \@@_compile_u_loop:NN }
+ {
+ \if_charcode:w \c_left_brace_str #2
+ \__kernel_msg_expandable_error:nnn { regex } { cu-lbrace } { u }
+ \else:
+ #2
+ \fi:
+ \@@_compile_u_loop:NN
+ }
}
{
\if_false: { \fi: }
@@ -5387,7 +5409,11 @@
\if_charcode:w \c_right_brace_str ##1
\@@_replacement_rbrace:N
\else:
- \@@_replacement_normal:n
+ \if_charcode:w \c_left_brace_str ##1
+ \@@_replacement_lbrace:N
+ \else:
+ \@@_replacement_normal:n
+ \fi:
\fi:
##1
}
@@ -5566,8 +5592,7 @@
% \begin{macrocode}
\cs_new_protected:Npn \@@_replacement_g:w #1#2
{
- \@@_two_if_eq:NNNNTF
- #1 #2 \@@_replacement_normal:n \c_left_brace_str
+ \token_if_eq_meaning:NNTF #1 \@@_replacement_lbrace:N
{ \l_@@_internal_a_int = \@@_replacement_g_digits:NN }
{ \@@_replacement_error:NNN g #1 #2 }
}
@@ -5614,15 +5639,15 @@
{
\token_if_eq_meaning:NNTF #1 \@@_replacement_normal:n
{
- \exp_after:wN \token_if_eq_charcode:NNTF \c_left_brace_str #2
+ \cs_if_exist:cTF { @@_replacement_c_#2:w }
+ { \@@_replacement_cat:NNN #2 }
+ { \@@_replacement_error:NNN c #1#2 }
+ }
+ {
+ \token_if_eq_meaning:NNTF #1 \@@_replacement_lbrace:N
{ \@@_replacement_cu_aux:Nw \@@_replacement_exp_not:N }
- {
- \cs_if_exist:cTF { @@_replacement_c_#2:w }
- { \@@_replacement_cat:NNN #2 }
- { \@@_replacement_error:NNN c #1#2 }
- }
+ { \@@_replacement_error:NNN c #1#2 }
}
- { \@@_replacement_error:NNN c #1#2 }
}
% \end{macrocode}
% \end{macro}
@@ -5656,8 +5681,7 @@
% \begin{macrocode}
\cs_new_protected:Npn \@@_replacement_u:w #1#2
{
- \@@_two_if_eq:NNNNTF
- #1 #2 \@@_replacement_normal:n \c_left_brace_str
+ \token_if_eq_meaning:NNTF #1 \@@_replacement_lbrace:N
{ \@@_replacement_cu_aux:Nw \@@_replacement_exp_not:V }
{ \@@_replacement_error:NNN u #1#2 }
}
@@ -5681,6 +5705,21 @@
% \end{macrocode}
% \end{macro}
%
+% \begin{macro}{\@@_replacement_lbrace:N}
+% Within a |\c{...}| or |\u{...}| construction, this is
+% forbidden. Otherwise, this is a raw left brace.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_replacement_lbrace:N #1
+ {
+ \if_int_compare:w \l_@@_replacement_csnames_int > 0 \exp_stop_f:
+ \__kernel_msg_error:nnn { regex } { cu-lbrace } { u }
+ \else:
+ \@@_replacement_normal:n {#1}
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
% \subsubsection{Characters in replacement}
%
% \begin{macro}{\@@_replacement_cat:NNN}
@@ -7022,6 +7061,13 @@
control~sequence~or~the~next~group~to~be~made~of~control~sequences.~
It~only~makes~sense~to~follow~it~by~'.'~or~by~a~group.
}
+\__kernel_msg_new:nnnn { regex } { cu-lbrace }
+ { Left~braces~must~be~escaped~in~'\iow_char:N\\#1{...}'. }
+ {
+ Constructions~such~as~'\iow_char:N\\#1{...\iow_char:N\{...}'~are~
+ not~allowed~and~should~be~replaced~by~
+ '\iow_char:N\\#1{...\token_to_str:N\{...}'.
+ }
\__kernel_msg_new:nnnn { regex } { c-lparen-in-class }
{ Catcode~test~cannot~apply~to~group~in~character~class }
{
diff --git a/l3kernel/testfiles/m3regex005.luatex.tlg b/l3kernel/testfiles/m3regex005.luatex.tlg
index e658839c8..4a73235d6 100644
--- a/l3kernel/testfiles/m3regex005.luatex.tlg
+++ b/l3kernel/testfiles/m3regex005.luatex.tlg
@@ -280,9 +280,15 @@ TEST 10: Caseless matching and cs
============================================================
TEST 11: Braces
============================================================
-|\{}|
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...
+l. ... }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+|\csname\endcsname}|
The token list \l_tmpa_tl contains the tokens:
-> \{ (control sequence=\protected macro:->\ifmmode \lbrace \else \textb\ETC.)
+> \csname\endcsname(control sequence=undefined)
> } (the letter }).
<recently read> }
l. ... }
@@ -422,7 +428,90 @@ l. ... }
l. ... }
============================================================
============================================================
-TEST 16: Catcode used by default
+TEST 16: c and u escapes with braces
+============================================================
+! LaTeX3 Error: Left braces must be escaped in '\c{...}'.
+For immediate help type H <return>.
+ ...
+l. ... }
+Constructions such as '\c{...{...}' are not allowed and should be replaced by
+'\c{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error:
+ Left braces must be escaped in '\u{...}'.
+l. ... }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+! Undefined control sequence.
+<argument> \LaTeX3 error:
+ Erroneous variable \csname\endcsnameused!
+l. ... }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+> Compiled regex {\c {{}\u {{}\cD }\cU }}:
++-branch
+ control sequence \
+ Match
+ categories D, class
+ char code 125 (})
+ Match
+ categories U, class
+ char code 125 (}).
+<recently read> }
+l. ... }
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...
+l. ... }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...
+l. ... }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error:
+ Erroneous variable \csname\endcsnameused!
+l. ... }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+The token list \l_tmpa_tl contains the tokens:
+> \csname\endcsname(control sequence=undefined)
+> } (subscript character })
+> } (superscript character }).
+<recently read> }
+l. ... }
+============================================================
+Defining \x{ on line ...
+> Compiled regex {\c {\{}\u {x\{}}:
++-branch
+ control sequence \{
+ char 102 (f), catcode 11
+ char 111 (o), catcode 11
+ char 111 (o), catcode 11.
+<recently read> }
+l. ... }
+The token list \l_tmpa_tl contains the tokens:
+> \{ (control sequence=\protected macro:->\ifmmode \lbrace \else \textb\ETC.)
+> f (the letter f)
+> o (the letter o)
+> o (the letter o).
+<recently read> }
+l. ... }
+============================================================
+============================================================
+TEST 17: Catcode used by default
============================================================
\g__cctab_next_cctab=\catcodetable...
The token list \l_tmpa_tl contains the tokens:
diff --git a/l3kernel/testfiles/m3regex005.lvt b/l3kernel/testfiles/m3regex005.lvt
index b276fea3a..2de62efc1 100644
--- a/l3kernel/testfiles/m3regex005.lvt
+++ b/l3kernel/testfiles/m3regex005.lvt
@@ -215,6 +215,21 @@
\tl_show:N \l_tmpa_tl
}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\TEST { c~and~u~escapes~with~braces }
+ {
+ \regex_show:n { \c{{} \u{{} \cD} \cU} }
+ \tl_clear:N \l_tmpa_tl
+ \regex_replace_all:nnN { } { \c{{} \u{{} \cD} \cU} } \l_tmpa_tl
+ \tl_analysis_show:N \l_tmpa_tl
+ \SEPARATOR
+ \cs_new_nopar:cpn { x \iow_char:N \{ } { foo }
+ \regex_show:n { \c{ \{ } \u{ x \{ } }
+ \tl_clear:N \l_tmpa_tl
+ \regex_replace_all:nnN { } { \c{\{} \u{x\{} } \l_tmpa_tl
+ \tl_analysis_show:N \l_tmpa_tl
+ }
+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\TEST { Catcode~used~by~default }
{
diff --git a/l3kernel/testfiles/m3regex005.tlg b/l3kernel/testfiles/m3regex005.tlg
index 9969f12a5..c4e03280e 100644
--- a/l3kernel/testfiles/m3regex005.tlg
+++ b/l3kernel/testfiles/m3regex005.tlg
@@ -280,9 +280,15 @@ TEST 10: Caseless matching and cs
============================================================
TEST 11: Braces
============================================================
-|\{}|
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...
+l. ... }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+|\csname\endcsname}|
The token list \l_tmpa_tl contains the tokens:
-> \{ (control sequence=\protected macro:->\ifmmode \lbrace \else \textb\ETC.)
+> \csname\endcsname(control sequence=undefined)
> } (the letter }).
<recently read> }
l. ... }
@@ -422,7 +428,90 @@ l. ... }
l. ... }
============================================================
============================================================
-TEST 16: Catcode used by default
+TEST 16: c and u escapes with braces
+============================================================
+! LaTeX3 Error: Left braces must be escaped in '\c{...}'.
+For immediate help type H <return>.
+ ...
+l. ... }
+Constructions such as '\c{...{...}' are not allowed and should be replaced by
+'\c{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error:
+ Left braces must be escaped in '\u{...}'.
+l. ... }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+! Undefined control sequence.
+<argument> \LaTeX3 error:
+ Erroneous variable \csname\endcsnameused!
+l. ... }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+> Compiled regex {\c {{}\u {{}\cD }\cU }}:
++-branch
+ control sequence \
+ Match
+ categories D, class
+ char code 125 (})
+ Match
+ categories U, class
+ char code 125 (}).
+<recently read> }
+l. ... }
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...
+l. ... }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...
+l. ... }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error:
+ Erroneous variable \csname\endcsnameused!
+l. ... }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+The token list \l_tmpa_tl contains the tokens:
+> \csname\endcsname(control sequence=undefined)
+> } (subscript character })
+> } (superscript character }).
+<recently read> }
+l. ... }
+============================================================
+Defining \x{ on line ...
+> Compiled regex {\c {\{}\u {x\{}}:
++-branch
+ control sequence \{
+ char 102 (f), catcode 11
+ char 111 (o), catcode 11
+ char 111 (o), catcode 11.
+<recently read> }
+l. ... }
+The token list \l_tmpa_tl contains the tokens:
+> \{ (control sequence=\protected macro:->\ifmmode \lbrace \else \textb\ETC.)
+> f (the letter f)
+> o (the letter o)
+> o (the letter o).
+<recently read> }
+l. ... }
+============================================================
+============================================================
+TEST 17: Catcode used by default
============================================================
Defining \g__cctab_1_cctab on line ...
The token list \l_tmpa_tl contains the tokens:
diff --git a/l3kernel/testfiles/m3regex005.xetex.tlg b/l3kernel/testfiles/m3regex005.xetex.tlg
index 71842e129..28989f3aa 100644
--- a/l3kernel/testfiles/m3regex005.xetex.tlg
+++ b/l3kernel/testfiles/m3regex005.xetex.tlg
@@ -280,9 +280,15 @@ TEST 10: Caseless matching and cs
============================================================
TEST 11: Braces
============================================================
-|\{}|
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...
+l. ... }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+|\csname\endcsname}|
The token list \l_tmpa_tl contains the tokens:
-> \{ (control sequence=\protected macro:->\ifmmode \lbrace \else \textb\ETC.)
+> \csname\endcsname(control sequence=undefined)
> } (the letter }).
<recently read> }
l. ... }
@@ -422,7 +428,90 @@ l. ... }
l. ... }
============================================================
============================================================
-TEST 16: Catcode used by default
+TEST 16: c and u escapes with braces
+============================================================
+! LaTeX3 Error: Left braces must be escaped in '\c{...}'.
+For immediate help type H <return>.
+ ...
+l. ... }
+Constructions such as '\c{...{...}' are not allowed and should be replaced by
+'\c{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error:
+ Left braces must be escaped in '\u{...}'.
+l. ... }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+! Undefined control sequence.
+<argument> \LaTeX3 error:
+ Erroneous variable \csname\endcsnameused!
+l. ... }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+> Compiled regex {\c {{}\u {{}\cD }\cU }}:
++-branch
+ control sequence \
+ Match
+ categories D, class
+ char code 125 (})
+ Match
+ categories U, class
+ char code 125 (}).
+<recently read> }
+l. ... }
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...
+l. ... }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! LaTeX3 Error: Left braces must be escaped in '\u{...}'.
+For immediate help type H <return>.
+ ...
+l. ... }
+Constructions such as '\u{...{...}' are not allowed and should be replaced by
+'\u{...\{...}'.
+! Undefined control sequence.
+<argument> \LaTeX3 error:
+ Erroneous variable \csname\endcsnameused!
+l. ... }
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
+The token list \l_tmpa_tl contains the tokens:
+> \csname\endcsname(control sequence=undefined)
+> } (subscript character })
+> } (superscript character }).
+<recently read> }
+l. ... }
+============================================================
+Defining \x{ on line ...
+> Compiled regex {\c {\{}\u {x\{}}:
++-branch
+ control sequence \{
+ char 102 (f), catcode 11
+ char 111 (o), catcode 11
+ char 111 (o), catcode 11.
+<recently read> }
+l. ... }
+The token list \l_tmpa_tl contains the tokens:
+> \{ (control sequence=\protected macro:->\ifmmode \lbrace \else \textb\ETC.)
+> f (the letter f)
+> o (the letter o)
+> o (the letter o).
+<recently read> }
+l. ... }
+============================================================
+============================================================
+TEST 17: Catcode used by default
============================================================
Defining \g__cctab_1_cctab on line ...
The token list \l_tmpa_tl contains the tokens:
More information about the latex3-commits
mailing list.