[latex3-commits] [git/LaTeX3-latex3-latex3] main: Correct the validity check performed by \regex_show:N (fixes #1093) (c32e80231)
Bruno Le Floch
blflatex at gmail.com
Fri Jun 24 14:55:34 CEST 2022
Repository : https://github.com/latex3/latex3
On branch : main
Link : https://github.com/latex3/latex3/commit/c32e80231fdbfeb06a7f769260a1c4dcbc62e288
>---------------------------------------------------------------
commit c32e80231fdbfeb06a7f769260a1c4dcbc62e288
Author: Bruno Le Floch <blflatex at gmail.com>
Date: Fri Jun 24 14:55:34 2022 +0200
Correct the validity check performed by \regex_show:N (fixes #1093)
>---------------------------------------------------------------
c32e80231fdbfeb06a7f769260a1c4dcbc62e288
l3kernel/CHANGELOG.md | 3 +++
l3kernel/l3regex.dtx | 40 ++++++++++++++++++++++++++++++++-------
l3kernel/testfiles/m3regex007.lvt | 7 ++++++-
l3kernel/testfiles/m3regex007.tlg | 32 +++++++++++++++++++++++++++++++
4 files changed, 74 insertions(+), 8 deletions(-)
diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index 9595ed47f..7141b0226 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -7,6 +7,9 @@ this project uses date-based 'snapshot' version identifiers.
## [Unreleased]
+### Fixed
+- Correct validity check performed by `\regex_show:N` (gh/1093)
+
## [2022-06-16]
### Fixed
diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 7429b63cc..cb3d5fab8 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -2883,7 +2883,7 @@
\cs_new_protected:cpx { @@_compile_/#2: }
{
\@@_compile_one:n
- { \@@_item_reverse:n \exp_not:c { @@_prop_#1: } }
+ { \@@_item_reverse:n { \exp_not:c { @@_prop_#1: } } }
}
}
\@@_tmp:w d D
@@ -3080,7 +3080,8 @@
% raw characters until hopefully reaching |:]|. If that's missing, or
% the \textsc{posix} class is unknown, abort. If all is right, add the
% test to the current class, with an extra \cs{@@_item_reverse:n}
-% for negative classes.
+% for negative classes (we make sure to wrap its argument in braces
+% otherwise \cs{regex_show:N} would not recognize the regex as valid).
% \begin{macrocode}
\cs_new_protected:Npn \@@_compile_class_posix_test:w #1#2
{
@@ -3131,8 +3132,8 @@
{
\@@_compile_one:n
{
- \bool_if:NF \l_@@_internal_bool \@@_item_reverse:n
- \exp_not:c { @@_posix_ \l_@@_internal_a_tl : }
+ \bool_if:NTF \l_@@_internal_bool \use:n \@@_item_reverse:n
+ { \exp_not:c { @@_posix_ \l_@@_internal_a_tl : } }
}
}
{
@@ -3898,6 +3899,13 @@
}
\cs_new:Npn \@@_clean_class:n #1
{ \@@_clean_class_loop:nnn #1 ????? \prg_break_point: }
+% \end{macrocode}
+% When cleaning a class there are many cases, including a dozen or so
+% like \cs{@@_prop_d:} or \cs{@@_posix_alpha:}. To avoid listing all of
+% them we allow any command that starts with the
+% $13$ characters |__regex_prop_| or |__regex_posix| (handily these have
+% the same length, except for the trailing underscore).
+% \begin{macrocode}
\cs_new:Npn \@@_clean_class_loop:nnn #1#2#3
{
\tl_if_single:nF {#1} { \prg_break: }
@@ -3917,7 +3925,10 @@
\@@_item_caseless_range:nn { }
\@@_item_exact:nn { }
}
- { #1 { \@@_clean_int:n {#2} } { \@@_clean_int:n {#3} } }
+ {
+ #1 { \@@_clean_int:n {#2} } { \@@_clean_int:n {#3} }
+ \@@_clean_class_loop:nnn
+ }
{
\token_case_meaning:NnTF #1
{
@@ -3928,7 +3939,22 @@
#1 { \@@_clean_int:n {#2} } { \@@_clean_class:n {#3} }
\@@_clean_class_loop:nnn
}
- { \prg_break: }
+ {
+ \exp_args:Nf \str_case:nnTF
+ {
+ \exp_args:Nf \str_range:nnn
+ { \cs_to_str:N #1 } { 1 } { 13 }
+ }
+ {
+ { @@_prop_ } { }
+ { @@_posix } { }
+ }
+ {
+ #1
+ \@@_clean_class_loop:nnn {#2} {#3}
+ }
+ { \prg_break: }
+ }
}
}
}
@@ -4922,7 +4948,7 @@
\int_set_eq:NN \l_@@_curr_char_int \l_@@_last_char_int
\@@_prop_w:
\@@_break_point:TF
- { \group_end: \@@_item_reverse:n \@@_prop_w: }
+ { \group_end: \@@_item_reverse:n { \@@_prop_w: } }
{ \group_end: \@@_prop_w: }
}
\cs_new_protected:Npn \@@_Z_test:
diff --git a/l3kernel/testfiles/m3regex007.lvt b/l3kernel/testfiles/m3regex007.lvt
index 1cde917a7..5604407c8 100644
--- a/l3kernel/testfiles/m3regex007.lvt
+++ b/l3kernel/testfiles/m3regex007.lvt
@@ -1,5 +1,5 @@
%
-% Copyright (C) 2012,2016-2018,2021 The LaTeX Project
+% Copyright (C) 2012,2016-2018,2021,2022 The LaTeX Project
%
\documentclass{minimal}
@@ -32,6 +32,11 @@
\regex_new:N \l_bar_regex
\regex_show:N \l_bar_regex
\regex_show:N \g_undefined_regex
+ \regex_set:Nn \l_tmpa_regex { \d | [a-z\W] . [[:alpha:][:^ascii:] \%] }
+ \regex_show:N \l_tmpa_regex
+ \regex_set:Nn \l_tmpa_regex { A }
+ \tl_put_right:Nn \l_tmpa_regex { X } % invalid on purpose
+ \regex_show:N \l_tmpa_regex
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
diff --git a/l3kernel/testfiles/m3regex007.tlg b/l3kernel/testfiles/m3regex007.tlg
index dd7eda821..cd11a75c1 100644
--- a/l3kernel/testfiles/m3regex007.tlg
+++ b/l3kernel/testfiles/m3regex007.tlg
@@ -128,6 +128,38 @@ l. ... }
This is a coding error.
LaTeX has been asked to show a variable \g_undefined_regex, but this has not
been defined yet.
+> Compiled regex variable \l_tmpa_regex:
++-branch
+ range [48 (0),57 (9)]
++-branch
+ Match
+ range [97 (a),122 (z)]
+ Reversed match
+ range [97 (a),122 (z)]
+ range [65 (A),90 (Z)]
+ range [48 (0),57 (9)]
+ char code 95 (_)
+ any token
+ Match
+ range [97 (a),122 (z)]
+ range [65 (A),90 (Z)]
+ Reversed match
+ range [0,127]
+ char code 37 (%).
+<recently read> }
+l. ... }
+! LaTeX3 Error: Variable '\l_tmpa_regex' is not a valid regex.
+For immediate help type H <return>.
+ ...
+l. ... }
+This is a coding error.
+The variable '\l_tmpa_regex' with value
+ \__regex_branch:n {\__regex_class:NnnnN \c_true_bool
+ {\__regex_item_caseful_equal:n {65}}{1}{0}\c_false_bool }X
+should be a regex variable, but it does not have the correct internal
+structure:
+ \__regex_branch:n {\__regex_class:NnnnN \c_true_bool
+ {\__regex_item_caseful_equal:n {65}}{1}{0}\c_false_bool }
============================================================
============================================================
TEST 2: regex_show again
More information about the latex3-commits
mailing list.