[latex3-commits] [git/LaTeX3-latex3-latex3] regex-callout: First version of callouts (user code) in l3regex (b85b55fc1)
Bruno Le Floch
blflatex at gmail.com
Sat May 15 18:36:06 CEST 2021
Repository : https://github.com/latex3/latex3
On branch : regex-callout
Link : https://github.com/latex3/latex3/commit/b85b55fc1a4a642952408f8f3f9c2ba7b2556175
>---------------------------------------------------------------
commit b85b55fc1a4a642952408f8f3f9c2ba7b2556175
Author: Bruno Le Floch <blflatex at gmail.com>
Date: Sun Jul 19 02:04:49 2020 +0200
First version of callouts (user code) in l3regex
At the moment they are only implemented for assertions, but I plan
to also give a way to apply callouts to groups. Two issues, even
just for assertions: need to implement/forbid nested regex
operations, and need to get rid of toks abuses
>---------------------------------------------------------------
b85b55fc1a4a642952408f8f3f9c2ba7b2556175
l3kernel/l3debug.dtx | 4 ++
l3kernel/l3regex.dtx | 142 ++++++++++++++++++++++++++++++++++----
l3kernel/testfiles/m3regex011.lvt | 29 +++++++-
l3kernel/testfiles/m3regex011.tlg | 8 ++-
4 files changed, 167 insertions(+), 16 deletions(-)
diff --git a/l3kernel/l3debug.dtx b/l3kernel/l3debug.dtx
index 319fa7aa2..2653ba436 100644
--- a/l3kernel/l3debug.dtx
+++ b/l3kernel/l3debug.dtx
@@ -1124,6 +1124,10 @@
{ \@@_trace_push:nnN { regex } { 1 } \@@_replacement:n }
{ \@@_trace_pop:nnN { regex } { 1 } \@@_replacement:n }
{ \@@_replacement:n }
+ \__kernel_patch:nnn
+ { \__kernel_chk_cs_exist:c { \l_@@_internal_a_tl } }
+ { }
+ { \@@_compile_C_end:NN }
% \end{macrocode}
%
% \begin{macrocode}
diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 599a2979b..34b812899 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -407,6 +407,17 @@
% |\regex_count:nnN { \G a } { aaba } \l_tmpa_int| yields $2$, but
% replacing |\G| by |^| would result in \cs{l_tmpa_int} holding the
% value $1$.
+% \item[(?C\{\meta{csname}\})] Callout to the user-defined
+% \char`\\\meta{csname}, which should be a conditional with
+% signature~|TF|. It may consult the following public variables:
+% position (in numbers of tokens) \cs{l_regex_curr_pos_int} of the
+% next token and position \cs{l_regex_start_pos_int} at which the
+% current match attempt started, and information about the next
+% token as \cs{l_regex_curr_token_tl} (\texttt{o}-expanding and
+% \texttt{x}-expanding to the token), \cs{l_regex_curr_char_int}
+% (character code) and \cs{l_regex_curr_catcode_int} (category
+% code). The latter two integers are $-1$ and $0$ for control
+% sequences, and $-2$ and $15$ for the end of the input.
% \end{l3regex-syntax}
%
% The option |(?i)| makes the match case insensitive (identifying
@@ -1886,6 +1897,7 @@
% \item \cs{@@_assertion:Nn} \meta{boolean} \Arg{assertion test},
% where the \meta{assertion test} is \cs{@@_b_test:} or
% \cs{@@_Z_test:} or \cs{@@_A_test:} or \cs{@@_G_test:}
+% or \cs{@@_C_group:c} \Arg{csname}
% \end{itemize}
% Tests can be the following:
% \begin{itemize}
@@ -2321,7 +2333,8 @@
{ \__kernel_msg_error:nn { regex } { c-trailing } }
\int_compare:nNnT \l_@@_mode_int < \c_@@_outer_mode_int
{
- \__kernel_msg_error:nn { regex } { c-missing-rbrace }
+ \__kernel_msg_error:nnx { regex } { missing-rbrace }
+ { \iow_char:N \\c }
\@@_compile_end_cs:
\prg_do_nothing: \prg_do_nothing:
\prg_do_nothing: \prg_do_nothing:
@@ -3121,6 +3134,65 @@
% \end{macro}
% \end{macro}
%
+% \begin{macro}{\@@_compile_special_group_C:w, \@@_compile_C_end:NN}
+% \begin{macro}[rEXP]{\@@_compile_C_loop:NN}
+% We support syntax such as |(?C{mypkg_do:TF})| for assertions based
+% on calling the function \cs[no-index]{mypkg_do:TF}. During
+% compilation we simply need to catch the control sequence name, with
+% suitable (mandatory) braces.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_compile_special_group_C:w #1#2
+ {
+ \@@_two_if_eq:NNNNTF #1 #2 \@@_compile_special:N \c_left_brace_str
+ {
+ \tl_set:Nx \l_@@_internal_a_tl { \if_false: } \fi:
+ \@@_compile_C_loop:NN
+ }
+ {
+ \__kernel_msg_error:nnn { regex } { missing-lbrace } { (?C }
+ \@@_compile_raw:N (
+ \@@_compile_raw:N ?
+ \@@_compile_raw:N C
+ #1 #2
+ }
+ }
+\cs_new:Npn \@@_compile_C_loop:NN #1#2
+ {
+ \token_if_eq_meaning:NNTF #1 \@@_compile_raw:N
+ { #2 \@@_compile_C_loop:NN }
+ {
+ \token_if_eq_meaning:NNTF #1 \@@_compile_special:N
+ {
+ \exp_after:wN \token_if_eq_charcode:NNTF \c_right_brace_str #2
+ { \if_false: { \fi: } \@@_compile_C_end:NN }
+ { #2 \@@_compile_C_loop:NN }
+ }
+ {
+ \if_false: { \fi: }
+ \__kernel_msg_error:nnx { regex } { missing-rbrace } { (?C }
+ \@@_compile_C_end:NN
+ #1 #2
+ }
+ }
+ }
+\cs_new_protected:Npn \@@_compile_C_end:NN #1#2
+ {
+ \tl_build_put_right:Nx \l_@@_build_tl
+ {
+ \@@_assertion:Nn \c_true_bool
+ { \@@_C_group:c { \l_@@_internal_a_tl } }
+ }
+ \@@_two_if_eq:NNNNTF #1 #2 \@@_compile_special:N )
+ { }
+ {
+ \__kernel_msg_error:nnx { regex } { missing-rparen } { 1 }
+ #1 #2
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
% \begin{macro}
% {\@@_compile_special_group_i:w, \@@_compile_special_group_-:w}
% The match can be made case-insensitive by setting the option with
@@ -3442,7 +3514,8 @@
\@@_compile_u_loop:NN
}
{
- \__kernel_msg_error:nn { regex } { u-missing-lbrace }
+ \__kernel_msg_error:nn { regex } { missing-lbrace }
+ { \iow_char:N \\u }
\token_if_eq_meaning:NNTF #1 \@@_compile_ur_end:
{ \@@_compile_raw:N u \@@_compile_raw:N r }
{ \@@_compile_raw:N u }
@@ -3826,6 +3899,8 @@
\@@_show_one:n
{ \bool_if:NF ##1 { negative~ } assertion:~##2 }
}
+ \cs_set:Npn \@@_C_group:c ##1
+ { callout~to~\iow_char:N\\##1 }
\cs_set:Npn \@@_b_test: { word~boundary }
\cs_set:Npn \@@_Z_test: { anchor~at~end~(\iow_char:N\\Z) }
\cs_set:Npn \@@_A_test: { anchor~at~start~(\iow_char:N\\A) }
@@ -3842,7 +3917,7 @@
\cs_set_protected:Npn \@@_item_caseless_range:nn ##1##2
{
\@@_show_one:n
- { Range~[\@@_show_char:n{##1}, \@@_show_char:n{##2}]~(caseless) }
+ { range~[\@@_show_char:n{##1}, \@@_show_char:n{##2}]~(caseless) }
}
\cs_set_protected:Npn \@@_item_catcode:nT
{ \@@_show_item_catcode:NnT \c_true_bool }
@@ -4716,6 +4791,24 @@
% \end{macrocode}
% \end{macro}
%
+% \begin{macro}{\@@_C_group:c}
+% The user code expects some public variables.
+% The code has signature |TF| and we return as appropriate.
+% \begin{macrocode}
+\cs_new_protected:Npn \@@_C_group:c #1
+ {
+ \int_set:Nn \l_regex_curr_pos_int
+ { \l_@@_curr_pos_int - \l_@@_min_pos_int + 1 }
+ \int_set:Nn \l_regex_start_pos_int
+ { \l_@@_start_pos_int - \l_@@_min_pos_int + 1 }
+ \int_set_eq:NN \l_regex_curr_char_int \l_@@_curr_char_int
+ \int_set_eq:NN \l_regex_curr_catcode_int \l_@@_curr_catcode_int
+ \tl_set_eq:NN \l_regex_curr_token_tl \l_@@_curr_token_tl
+ \use:c {#1} { \@@_break_true:w } { }
+ }
+% \end{macrocode}
+% \end{macro}
+%
% \begin{macro}{\@@_command_K:}
% Change the starting point of the $0$-th submatch (full match), and
% transition to a new state, pretending that this is a fresh thread.
@@ -4784,6 +4877,22 @@
%
% \begin{variable}
% {
+% \l_regex_curr_pos_int, \l_regex_start_pos_int,
+% \l_regex_curr_char_int, \l_regex_curr_catcode_int,
+% \l_regex_curr_token_tl
+% }
+% Public-facing versions of internal variables, for use in callouts.
+% \begin{macrocode}
+\int_new:N \l_regex_curr_pos_int
+\int_new:N \l_regex_start_pos_int
+\int_new:N \l_regex_curr_char_int
+\int_new:N \l_regex_curr_catcode_int
+\tl_new:N \l_regex_curr_token_tl
+% \end{macrocode}
+% \end{variable}
+%
+% \begin{variable}
+% {
% \l_@@_min_pos_int,
% \l_@@_max_pos_int,
% \l_@@_curr_pos_int,
@@ -7383,7 +7492,7 @@
}
% \end{macrocode}
%
-% Errors related to |\c| and |\u|.
+% Errors related to |\c| and |\u| and |(?C{...})|
% \begin{macrocode}
\__kernel_msg_new:nnnn { regex } { c-bad-mode }
{ Invalid~nested~'\iow_char:N\\c'~escape~in~regular~expression. }
@@ -7413,11 +7522,11 @@
Construction~such~as~'\iow_char:N\\cL(abc)'~are~not~allowed~inside~a~
class~'[...]'~because~classes~do~not~match~multiple~characters~at~once.
}
-\__kernel_msg_new:nnnn { regex } { c-missing-rbrace }
- { Missing~right~brace~inserted~for~'\iow_char:N\\c'~escape. }
+\__kernel_msg_new:nnnn { regex } { missing-rbrace }
+ { Missing~right~brace~inserted~for~'#1'~escape. }
{
LaTeX~was~given~a~regular~expression~where~a~
- '\iow_char:N\\c\iow_char:N\{...'~construction~was~not~ended~
+ '#1\{...'~construction~was~not~ended~
with~a~closing~brace~'\iow_char:N\}'.
}
\__kernel_msg_new:nnnn { regex } { c-missing-rbrack }
@@ -7440,11 +7549,17 @@
A~regular~expression~ends~with~'\iow_char:N\\c'~followed~
by~a~letter.~It~will~be~ignored.
}
-\__kernel_msg_new:nnnn { regex } { u-missing-lbrace }
- { Missing~left~brace~following~'\iow_char:N\\u'~escape. }
+\__kernel_msg_new:nnnn { regex } { missing-lbrace }
+ { Missing~left~brace~following~'#1'~escape. }
{
- The~'\iow_char:N\\u'~escape~sequence~must~be~followed~by~
- a~brace~group~with~the~name~of~the~variable~to~use.
+ The~'#1'~escape~sequence~must~be~followed~by~
+ a~brace~group~with~the~name~of~the~
+ \str_case_e:nn {#1}
+ {
+ { \iow_char:N \\u } { variable~ }
+ { (?C } { function~ }
+ }
+ to~use.
}
\__kernel_msg_new:nnnn { regex } { u-missing-rbrace }
{ Missing~right~brace~inserted~for~'\iow_char:N\\u'~escape. }
@@ -7507,7 +7622,8 @@
{ Unknown~special~group~'#1~...'~in~a~regular~expression. }
{
The~only~valid~constructions~starting~with~'(?'~are~
- '(?:~...~)',~'(?|~...~)',~'(?i)',~and~'(?-i)'.
+ '(?:~...~)',~'(?|~...~)',~'(?i)',~'(?-i)',~and
+ '(?C{...})'.
}
% \end{macrocode}
%
@@ -7524,7 +7640,7 @@
{ Misused~'\iow_char:N\\u'~command~in~a~replacement~text. }
{
In~a~replacement~text,~the~'\iow_char:N\\u'~escape~sequence~
- must~be~~followed~by~a~brace~group~holding~the~name~of~the~
+ must~be~followed~by~a~brace~group~holding~the~name~of~the~
variable~to~use.
}
\__kernel_msg_new:nnnn { regex } { replacement-g }
diff --git a/l3kernel/testfiles/m3regex011.lvt b/l3kernel/testfiles/m3regex011.lvt
index 7bc07f297..53f8d874a 100644
--- a/l3kernel/testfiles/m3regex011.lvt
+++ b/l3kernel/testfiles/m3regex011.lvt
@@ -27,7 +27,34 @@
{
\tl_set:Nn \l_tmpa_tl { \aa \bb }
\regex_replace_all:nnN { \c{(.*)}(.) } { \0,\1,\2,\3 } \l_tmpa_tl
- \tl_show:N \l_tmpa_tl
+ \tl_log:N \l_tmpa_tl
+ }
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\OMIT
+\prg_new_protected_conditional:Npnn \test_callout: { TF }
+ {
+ \int_compare:nTF { 4 <= \l_regex_curr_pos_int <= 5 }
+ { \prg_return_true: }
+ { \prg_return_false: }
+ }
+\TIMO
+
+\TEST { Callouts~with~(?C...) }
+ {
+ \tl_set:Nn \l_tmpa_tl { \aa \bb }
+ \regex_replace_all:nnN
+ {
+ (?C{use_i:nn}) \c{aa} |
+ (?C{use_ii:nn}) .
+ }
+ { [\0{]} } \l_tmpa_tl
+ \tl_log:N \l_tmpa_tl
+ \regex_replace_all:nnN
+ { (?C{test_callout:TF}) }
+ { / }
+ \l_tmpa_tl
+ \tl_log:N \l_tmpa_tl
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
diff --git a/l3kernel/testfiles/m3regex011.tlg b/l3kernel/testfiles/m3regex011.tlg
index bd7120b17..5306c5d7d 100644
--- a/l3kernel/testfiles/m3regex011.tlg
+++ b/l3kernel/testfiles/m3regex011.tlg
@@ -28,8 +28,12 @@ misspelled it (e.g., `\hobx'), type `I' and the correct
spelling (e.g., `I\hbox'). Otherwise just continue,
and I'll forget about whatever was undefined.
> \l_tmpa_tl=\aa \bb ,\bb ,,.
-<recently read> }
-l. ... }
+============================================================
+============================================================
+TEST 3: Callouts with (?C...)
+============================================================
+> \l_tmpa_tl=[\aa {]}\bb .
+> \l_tmpa_tl=[\aa {/]/}\bb .
============================================================
============================================================
TEST 3: Peek regex
More information about the latex3-commits
mailing list.