[latex3-commits] [git/LaTeX3-latex3-latex3] main: Fix how peek analysis deals with normal tokens (fix #1109) (fix #1110) (24f718890)
Bruno Le Floch
blflatex at gmail.com
Mon Oct 3 22:14:00 CEST 2022
Repository : https://github.com/latex3/latex3
On branch : main
Link : https://github.com/latex3/latex3/commit/24f7188904d6a3695019b03c036221cc4abf3dac
>---------------------------------------------------------------
commit 24f7188904d6a3695019b03c036221cc4abf3dac
Author: Bruno Le Floch <blflatex at gmail.com>
Date: Mon Oct 3 21:58:30 2022 +0200
Fix how peek analysis deals with normal tokens (fix #1109) (fix #1110)
Some contortions meant to support outer macros led to a very obvious
bug: the code was setting scanned tokens willy-nilly to \scan_stop:,
which broke when these tokens were things like \exp_after:wN.
I had also used arbitrary characters as delimiters, which broke for
macro parameter characters. Now fixed by revamping the logic to
better separate these two difficulties.
>---------------------------------------------------------------
24f7188904d6a3695019b03c036221cc4abf3dac
l3kernel/CHANGELOG.md | 9 ++
l3kernel/l3tl-analysis.dtx | 213 +++++++++++++++++++++++++++++----------
l3kernel/l3token.dtx | 3 +-
l3kernel/testfiles/m3peek003.tlg | 2 +-
4 files changed, 169 insertions(+), 58 deletions(-)
diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index 33b9759ed..a76cafb94 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -7,6 +7,15 @@ this project uses date-based 'snapshot' version identifiers.
## [Unreleased]
+### Changed
+- In `\peek_analysis_map_inline:n`, omit unnecessary `\exp_not:n` when the token
+ seen is a character that is neither active nor a macro parameter character
+
+### Fixed
+- `\peek_analysis_map_inline:n` support for macro parameter characters (issue
+ [\#1109](https://github.com/latex3/latex3/issues/1109)) and for many
+ expandable tokens (issue [\#1110](https://github.com/latex3/latex3/issues/1110))
+
## [2022-09-28]
### Added
diff --git a/l3kernel/l3tl-analysis.dtx b/l3kernel/l3tl-analysis.dtx
index 612ab5e72..8228bff64 100644
--- a/l3kernel/l3tl-analysis.dtx
+++ b/l3kernel/l3tl-analysis.dtx
@@ -219,9 +219,10 @@
% A token list containing the character number~$32$ (space) with all
% possible category codes except $1$ and $2$ (begin-group and
% end-group). Why $32$? Because some \LuaTeX{} versions only allow
-% creation of catcode~$10$ (space) tokens with this character code,
-% and because even in other engines it is much easier to produce since
-% \cs{char_generate:nn} refuses to produce spaces.
+% creation of catcode~$10$ (space) tokens with this character code, so
+% that we decided to make \cs{char_generate:nn} refuse to create such
+% weird spaces as well. We do not include the macro parameter case
+% (catcode~$6$) because it cannot be used as a macro delimiter.
% \begin{macrocode}
\group_begin:
\char_set_active_eq:NN \ \scan_stop:
@@ -229,7 +230,6 @@
{
\char_generate:nn { 32 } { 3 } 3
\char_generate:nn { 32 } { 4 } 4
- # \char_generate:nn { 32 } { 6 } 6
\char_generate:nn { 32 } { 7 } 7
\char_generate:nn { 32 } { 8 } 8
\c_space_tl \token_to_str:N A
@@ -809,7 +809,8 @@
\scan_stop:
\exp_after:wN \use_none:n \token_to_str:N #3 \prg_do_nothing:
\scan_stop:
- \exp_after:wN \@@_analysis_b_char:Nww
+ \exp_after:wN \@@_analysis_b_char:Nn
+ \exp_after:wN \@@_analysis_b_char_aux:nww
\else:
\exp_after:wN \@@_analysis_b_cs:Nww
\fi:
@@ -819,35 +820,43 @@
% \end{macro}
% \end{macro}
%
-% \begin{macro}[EXP]{\@@_analysis_b_char:Nww}
+% \begin{macro}[EXP]{\@@_analysis_b_char:Nn, \@@_analysis_b_char_aux:nww}
+% This function is called here with arguments
+% \cs{@@_analysis_b_char_aux:nww} and a normal character, while in the
+% peek analysis code it is called with \cs{use_none:n} and possibly a
+% space character, which is why the function has signature |Nn|.
% If the normal token we grab is a character, leave
% \meta{catcode} \meta{charcode} followed by \cs{s_@@}
% in the input stream, and call \cs{@@_analysis_b_normals:ww}
% with its first argument decremented.
% \begin{macrocode}
-\cs_new:Npx \@@_analysis_b_char:Nww #1
+\cs_new:Npx \@@_analysis_b_char:Nn #1#2
{
- \exp_not:N \if_meaning:w #1 \exp_not:N \tex_undefined:D
+ \exp_not:N \if_meaning:w #2 \exp_not:N \tex_undefined:D
\token_to_str:N D \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_catcode_other_token
+ \exp_not:N \if_catcode:w #2 \c_catcode_other_token
\token_to_str:N C \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_catcode_letter_token
+ \exp_not:N \if_catcode:w #2 \c_catcode_letter_token
\token_to_str:N B \exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_math_toggle_token 3
+ \exp_not:N \if_catcode:w #2 \c_math_toggle_token 3
\exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_alignment_token 4
+ \exp_not:N \if_catcode:w #2 \c_alignment_token 4
\exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_math_superscript_token 7
+ \exp_not:N \if_catcode:w #2 \c_math_superscript_token 7
\exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_math_subscript_token 8
+ \exp_not:N \if_catcode:w #2 \c_math_subscript_token 8
\exp_not:N \else:
- \exp_not:N \if_catcode:w #1 \c_space_token
+ \exp_not:N \if_catcode:w #2 \c_space_token
\token_to_str:N A \exp_not:N \else:
6
\exp_not:n { \fi: \fi: \fi: \fi: \fi: \fi: \fi: \fi: }
- \exp_not:N \int_value:w `#1 \s_@@
- \exp_not:N \exp_after:wN \exp_not:N \@@_analysis_b_normals:ww
- \exp_not:N \int_value:w \exp_not:N \int_eval:w - 1 +
+ #1 {#2}
+ }
+\cs_new:Npn \@@_analysis_b_char_aux:nww #1
+ {
+ \int_value:w `#1 \s_@@
+ \exp_after:wN \@@_analysis_b_normals:ww
+ \int_value:w \int_eval:w - 1 +
}
% \end{macrocode}
% \end{macro}
@@ -1170,8 +1179,9 @@
% {
% \peek_analysis_map_inline:n,
% \@@_peek_analysis_loop:NNn, \@@_peek_analysis_test:,
-% \@@_peek_analysis_normal:N, \@@_peek_analysis_cs:,
-% \@@_peek_analysis_char:N, \@@_peek_analysis_char:nN,
+% \@@_peek_analysis_exp:N, \@@_peek_analysis_exp_active:N,
+% \@@_peek_analysis_nonexp:N, \@@_peek_analysis_cs:N,
+% \@@_peek_analysis_char:N, \@@_peek_analysis_char:w,
% \@@_peek_analysis_special:, \@@_peek_analysis_retest:,
% \@@_peek_analysis_next:, \@@_peek_analysis_str:,
% \@@_peek_analysis_str:w, \@@_peek_analysis_str:n,
@@ -1184,7 +1194,8 @@
% nested maps. We may wish to pass to this function an \tn{outer}
% control sequence or active character; for this we will undefine
% potentially-\tn{outer} tokens within a group, closed after the
-% function receives its arguments. This user's code function also
+% function reads its arguments (for an \tn{outer} active character
+% there is no good alternative). This user's code function also
% calls the loop auxiliary, and includes the trailing
% \cs{prg_break_point:Nn} for when the user wants to stop the loop.
% The loop auxiliary must remove that break point because it must look
@@ -1209,7 +1220,8 @@
% \end{macrocode}
% The loop starts a group (closed by the user-code function defined
% above) with a normalized escape character, and checks if the next
-% token is special or \texttt{N}-type.
+% token is special or \texttt{N}-type (distinguishing expandable from
+% non-expandable tokens).
% \begin{macrocode}
\cs_new_protected:Npn \@@_peek_analysis_loop:NNn #1#2#3
{
@@ -1224,60 +1236,149 @@
}
\cs_new_protected:Npn \@@_peek_analysis_test:
{
- \if_int_odd:w
- \if_catcode:w \exp_not:N \l_peek_token { \c_zero_int \fi:
- \if_catcode:w \exp_not:N \l_peek_token } \c_zero_int \fi:
- \if_meaning:w \l_peek_token \c_space_token \c_zero_int \fi:
- \c_one_int
+ \if_case:w
+ \if_catcode:w \exp_not:N \l_peek_token { \c_max_int \fi:
+ \if_catcode:w \exp_not:N \l_peek_token } \c_max_int \fi:
+ \if_meaning:w \l_peek_token \c_space_token \c_max_int \fi:
+ \exp_after:wN \if_meaning:w \exp_not:N \l_peek_token \l_peek_token
+ \c_one_int
+ \fi:
+ \c_zero_int
\exp_after:wN \exp_after:wN
- \exp_after:wN \@@_peek_analysis_normal:N
+ \exp_after:wN \@@_peek_analysis_exp:N
\exp_after:wN \exp_not:N
+ \or:
+ \exp_after:wN \@@_peek_analysis_nonexp:N
\else:
\exp_after:wN \@@_peek_analysis_special:
\fi:
}
% \end{macrocode}
-% Normal tokens are not too hard, but can be \tn{outer}, hence the
-% \cs{exp_not:N} in the code above. If the token is expandable then
-% it might be an \tn{outer} or a \TeX{} conditional, so to be safe we
-% set it to \cs{scan_stop:} (the assignment is local and stopped by
-% the \cs{group_end:} upon calling the user's code). Then distinguish
-% characters (including active ones and macro parameter characters)
-% from control sequences (whose string representation is more than one
-% character because the escape character is printable). For a control
-% sequence call the user code with suitable arguments.
+% Expandable tokens (which are automatically |N|-type) can be
+% \tn{outer} macros, hence the need for \cs{exp_after:wN} and
+% \cs{exp_not:N} in the code above, which allows the next function to
+% safely grab the token as an argument. We run some code that is
+% expanded using the primitive \cs{cs_set_nopar:Npx} rather than
+% \cs{tl_set:Nx} to avoid grabbing it as an argument as |#1| may be
+% \tn{outer}. To allow~|#1| as an argument of the user's function
+% (stored in \cs{l_@@_peek_code_tl}), we set it equal to
+% \cs{scan_stop:} first, immediately before running the code as |#1|
+% may be some pretty important function such as \cs{exp_after:wN}.
+% Then we put the user's function and the first argument
+% \cs{exp_not:N} |#1|. Then we must add |{-1}0| if the token is a
+% control sequence and \Arg{charcode}|D| otherwise. Distinguishing
+% the two cases is easy: since we have made the escape character
+% printable, \cs{token_to_str:N} gives at least two characters for a
+% control sequence versus a single one for an active character
+% (possibly being a space). Producing the right outcome is trickier,
+% as |#1| cannot appear in either branch of the conditional (it could
+% be \tn{outer}, or simply a \TeX{} conditional), and can only be
+% safely discarded by \cs{use_none:n} if it is first hit with
+% \cs{exp_not:N}.
% \begin{macrocode}
-\cs_new_protected:Npn \@@_peek_analysis_normal:N #1
+\cs_new_protected:Npn \@@_peek_analysis_exp:N #1
+ {
+ \cs_set_nopar:Npx \l_@@_peek_code_tl
+ {
+ \tex_let:D \exp_not:N #1 \scan_stop:
+ \exp_not:o \l_@@_peek_code_tl
+ { \exp_not:N \exp_not:N \exp_not:N #1 }
+ \if:w \scan_stop:
+ \exp_after:wN \use_none:n \token_to_str:N #1 \prg_do_nothing:
+ \scan_stop:
+ \exp_after:wN \exp_after:wN
+ \exp_after:wN \@@_peek_analysis_exp_active:N
+ \else:
+ { -1 } 0
+ \exp_after:wN \exp_after:wN
+ \exp_after:wN \use_none:n
+ \fi:
+ \exp_not:N #1
+ }
+ \l_@@_peek_code_tl
+ }
+\cs_new:Npx \@@_peek_analysis_exp_active:N #1
+ { { \exp_not:N \int_value:w `#1 } \token_to_str:N D }
+% \end{macrocode}
+% For normal non-expandable tokens we must distinguish characters
+% (including active ones and macro parameter characters) from control
+% sequences (whose string representation is more than one character
+% because we made the escape character printable). For a control
+% sequence call the user code with suitable arguments, wrapping |#1|
+% within \cs{exp_not:n} just in case it happens to be equal to a macro
+% parameter character. We do not skip \cs{exp_not:n} when
+% unnecessary, because there might be situations where the argument
+% could be used by the user after further redefinitions of the token,
+% and it seems more convenient to know that \cs{exp_not:n} is always
+% used.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_peek_analysis_nonexp:N #1
{
- \exp_after:wN \reverse_if:N \exp_after:wN \if_meaning:w
- \exp_not:N #1 #1
- \tex_let:D #1 \scan_stop:
- \tl_put_right:Nn \l_@@_peek_code_tl { { \exp_not:N #1 } }
- \else:
- \tl_put_right:Nn \l_@@_peek_code_tl { { \exp_not:n {#1} } }
- \fi:
\if_charcode:w
\scan_stop:
\exp_after:wN \use_none:n \token_to_str:N #1 \prg_do_nothing:
\scan_stop:
\exp_after:wN \@@_peek_analysis_char:N
- \exp_after:wN #1
\else:
- \exp_after:wN \@@_peek_analysis_cs:
+ \exp_after:wN \@@_peek_analysis_cs:N
\fi:
+ #1
}
-\cs_new_protected:Npn \@@_peek_analysis_cs:
- { \l_@@_peek_code_tl { -1 } 0 }
-\cs_new_protected:Npn \@@_peek_analysis_char:N #1
+\cs_new_protected:Npn \@@_peek_analysis_cs:N #1
+ { \l_@@_peek_code_tl { \exp_not:n {#1} } { -1 } 0 }
+% \end{macrocode}
+% For normal characters we must determine their catcode. The main
+% difficulty is that the character may be an active character
+% masquerading as (i.e., set equal to) itself with a different
+% catcode. Two approaches based on \tn{lowercase} can detect this.
+% One could make an active character with the same catcode as~|#1| and
+% change its definition before testing the catcode of~|#1|, but in
+% some Unicode engine this fills up the hash table uselessly.
+% Instead, we lowercase~|#1| itself, changing its character code
+% to~$32$, namely space (because \LuaTeX{} cannot turn catcode~$10$
+% characters to anything else than character code~$32$), then we apply
+% \cs{@@_analysis_b_char:Nn}, which detects active characters by
+% comparing them to \cs{tex_undefined:D}, and we must have undefined
+% the active space for this test to work ---we use an |x|-expanding
+% assignment to get the active space in the right place. Finally
+% \cs{@@_peek_analysis_char:w} puts the arguments in the correct
+% order, including \cs{exp_not:n} for macro parameter characters and
+% active characters (the latter could be macro parameter characters,
+% and it seems more uniform to always put \cs{exp_not:n}).
+% \begin{macrocode}
+\group_begin:
+\char_set_active_eq:NN \ \scan_stop:
+\cs_new_protected:Npx \@@_peek_analysis_char:N #1
{
- \char_set_lccode:nn { `#1 } { 32 }
- \tex_lowercase:D { \@@_peek_analysis_char:nN {#1} } #1
+ \cs_set_eq:NN
+ \char_generate:nn { 32 } { 13 }
+ \exp_not:N \tex_undefined:D
+ \tex_lccode:D `#1 = 32 \exp_stop_f:
+ \tex_lowercase:D
+ {
+ \tl_put_right:Nx \exp_not:N \l_@@_peek_code_tl
+ { \exp_not:n { \@@_analysis_b_char:Nn \use_none:n } {#1} }
+ }
+ \exp_not:n
+ {
+ \exp_after:wN \@@_peek_analysis_char:w
+ \int_value:w
+ }
+ `#1
+ \exp_not:n { \exp_after:wN \s_@@ \l_@@_peek_code_tl }
+ #1
}
-\cs_new_protected:Npn \@@_peek_analysis_char:nN #1#2
+\group_end:
+\cs_new_protected:Npn \@@_peek_analysis_char:w #1 \s_@@ #2#3#4
{
- \cs_set_protected:Npn \@@_tmp:w ##1 #1 ##2 ##3 \scan_stop:
- { \exp_args:No \l_@@_peek_code_tl { \int_value:w `#2 } ##2 }
- \exp_after:wN \@@_tmp:w \c_@@_peek_catcodes_tl \scan_stop:
+ \if_charcode:w 6 #3
+ \else:
+ \if_charcode:w D #3
+ \else:
+ \exp_args:NNNo
+ \fi:
+ \fi:
+ #2 { \exp_not:n {#4} } {#1} #3
}
% \end{macrocode}
% For special characters the idea is to eventually act with
diff --git a/l3kernel/l3token.dtx b/l3kernel/l3token.dtx
index 0312bf987..1aa442299 100644
--- a/l3kernel/l3token.dtx
+++ b/l3kernel/l3token.dtx
@@ -960,7 +960,8 @@
% (as appropriate to the result of the test).
% \end{function}
%
-% \begin{function}[added = 2020-12-03]{\peek_analysis_map_inline:n}
+% \begin{function}[added = 2020-12-03, updated = 2022-10-03]
+% {\peek_analysis_map_inline:n}
% \begin{syntax}
% \cs{peek_analysis_map_inline:n} \Arg{inline function}
% \end{syntax}
diff --git a/l3kernel/testfiles/m3peek003.tlg b/l3kernel/testfiles/m3peek003.tlg
index c51252dc9..d87929e4f 100644
--- a/l3kernel/testfiles/m3peek003.tlg
+++ b/l3kernel/testfiles/m3peek003.tlg
@@ -4,7 +4,7 @@ Author: Bruno Le Floch
============================================================
TEST 1: Peek analysis map inline
============================================================
-\exp_not:n {a},97,B
+a,97,B
\exp_after:wN {\if_false: }\fi: ,123,1
,32,A
\exp_after:wN {\if_false: }\fi: ,123,1
More information about the latex3-commits
mailing list.