[latex3-commits] [git/LaTeX3-latex3-latex3] peek-regex: Internal change to how begin/end anchors are implemented (bf8e352e0)
Bruno Le Floch
blflatex at gmail.com
Mon Jul 20 02:19:19 CEST 2020
Repository : https://github.com/latex3/latex3
On branch : peek-regex
Link : https://github.com/latex3/latex3/commit/bf8e352e008c24976bf58ea1a76718066ca4b9ea
>---------------------------------------------------------------
commit bf8e352e008c24976bf58ea1a76718066ca4b9ea
Author: Bruno Le Floch <blflatex at gmail.com>
Date: Sun Jul 19 17:32:15 2020 +0200
Internal change to how begin/end anchors are implemented
The aim is to remove uses of max_pos, because that will not be available
when peeking for a regex
>---------------------------------------------------------------
bf8e352e008c24976bf58ea1a76718066ca4b9ea
l3kernel/l3regex.dtx | 140 ++++++++++++++++++++++-----------------------------
1 file changed, 59 insertions(+), 81 deletions(-)
diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index cfb002475..0878f68e2 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -1825,7 +1825,7 @@
% \item \cs{@@_command_K:}
% \item \cs{@@_assertion:Nn} \meta{boolean} \Arg{assertion test},
% where the \meta{assertion test} is \cs{@@_b_test:} or
-% |{|\cs{@@_anchor:N} \meta{integer}|}|
+% \cs{@@_Z_test:} or \cs{@@_A_test:} or \cs{@@_G_test:}
% \end{itemize}
% Tests can be the following:
% \begin{itemize}
@@ -2666,73 +2666,56 @@
%
% \subsubsection{Anchoring and simple assertions}
%
-% \begin{macro}{\@@_compile_anchor:NF}
+% \begin{macro}{\@@_compile_anchor_letter:NNN}
+% \begin{macro}{\@@_compile_/A:, \@@_compile_/G:, \@@_compile_/Z:, \@@_compile_/z:, \@@_compile_/b:, \@@_compile_/B:}
% \begin{macro}+\@@_compile_^:+
-% \begin{macro}{\@@_compile_/A:, \@@_compile_/G:}
% \begin{macro}+\@@_compile_$:+
-% \begin{macro}{\@@_compile_/Z:, \@@_compile_/z:}
-% In modes where assertions are allowed, anchor to the start of the
-% query, the start of the match, or the end of the query, depending on
-% the integer |#1|. In other modes, |#2| treats the character as raw,
-% with an error for escaped letters (|$| is valid in a class, but |\A|
-% is definitely a mistake on the user's part).
+% In modes where assertions are forbidden, anchors such as |\A|
+% produce an error (|\A|~is invalid in classes); otherwise they add an
+% \cs{@@_assertion:Nn} test as appropriate (the only negative
+% assertion is~|\B|). The test functions are defined later. The
+% implementation for
+% |$| and |^| is only different from |\A| etc because these are valid
+% in a class.
% \begin{macrocode}
-\cs_new_protected:Npn \@@_compile_anchor:NF #1#2
+\cs_new_protected:Npn \@@_compile_anchor_letter:NNN #1#2#3
{
- \@@_if_in_class_or_catcode:TF {#2}
+ \@@_if_in_class_or_catcode:TF { \@@_compile_raw_error:N #1 }
{
\tl_build_put_right:Nn \l_@@_build_tl
- { \@@_assertion:Nn \c_true_bool { \@@_anchor:N #1 } }
+ { \@@_assertion:Nn #2 {#3} }
}
}
-\cs_set_protected:Npn \@@_tmp:w #1#2
- {
- \cs_new_protected:cpn { @@_compile_/#1: }
- { \@@_compile_anchor:NF #2 { \@@_compile_raw_error:N #1 } }
- }
-\@@_tmp:w A \l_@@_min_pos_int
-\@@_tmp:w G \l_@@_start_pos_int
-\@@_tmp:w Z \l_@@_max_pos_int
-\@@_tmp:w z \l_@@_max_pos_int
+\cs_new_protected:cpn { @@_compile_/A: }
+ { \@@_compile_anchor_letter:NNN A \c_true_bool \@@_A_test: }
+\cs_new_protected:cpn { @@_compile_/G: }
+ { \@@_compile_anchor_letter:NNN G \c_true_bool \@@_G_test: }
+\cs_new_protected:cpn { @@_compile_/Z: }
+ { \@@_compile_anchor_letter:NNN Z \c_true_bool \@@_Z_test: }
+\cs_new_protected:cpn { @@_compile_/z: }
+ { \@@_compile_anchor_letter:NNN z \c_true_bool \@@_Z_test: }
+\cs_new_protected:cpn { @@_compile_/b: }
+ { \@@_compile_anchor_letter:NNN b \c_true_bool \@@_b_test: }
+\cs_new_protected:cpn { @@_compile_/B: }
+ { \@@_compile_anchor_letter:NNN B \c_false_bool \@@_b_test: }
\cs_set_protected:Npn \@@_tmp:w #1#2
{
\cs_new_protected:cpn { @@_compile_#1: }
- { \@@_compile_anchor:NF #2 { \@@_compile_raw:N #1 } }
+ {
+ \@@_if_in_class_or_catcode:TF { \@@_compile_raw:N #1 }
+ {
+ \tl_build_put_right:Nn \l_@@_build_tl
+ { \@@_assertion:Nn \c_true_bool {#2} }
+ }
+ }
}
-\exp_args:Nx \@@_tmp:w { \iow_char:N \^ } \l_@@_min_pos_int
-\exp_args:Nx \@@_tmp:w { \iow_char:N \$ } \l_@@_max_pos_int
+\exp_args:Nx \@@_tmp:w { \iow_char:N \^ } { \@@_A_test: }
+\exp_args:Nx \@@_tmp:w { \iow_char:N \$ } { \@@_Z_test: }
% \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
-% \end{macro}
-%
-% \begin{macro}{\@@_compile_/b:, \@@_compile_/B:}
-% Contrarily to |^| and |$|, which could be implemented without really
-% knowing what precedes in the token list, this requires more
-% information, namely, the knowledge of the last character code.
-% \begin{macrocode}
-\cs_new_protected:cpn { @@_compile_/b: }
- {
- \@@_if_in_class_or_catcode:TF
- { \@@_compile_raw_error:N b }
- {
- \tl_build_put_right:Nn \l_@@_build_tl
- { \@@_assertion:Nn \c_true_bool { \@@_b_test: } }
- }
- }
-\cs_new_protected:cpn { @@_compile_/B: }
- {
- \@@_if_in_class_or_catcode:TF
- { \@@_compile_raw_error:N B }
- {
- \tl_build_put_right:Nn \l_@@_build_tl
- { \@@_assertion:Nn \c_false_bool { \@@_b_test: } }
- }
- }
-% \end{macrocode}
-% \end{macro}
%
% \subsubsection{Character classes}
%
@@ -3515,7 +3498,9 @@
{ \bool_if:NF ##1 { negative~ } assertion:~##2 }
}
\cs_set:Npn \@@_b_test: { word~boundary }
- \cs_set_eq:NN \@@_anchor:N \@@_show_anchor_to_str:N
+ \cs_set:Npn \@@_Z_test: { anchor~at~end~(\iow_char:N\\Z) }
+ \cs_set:Npn \@@_A_test: { anchor~at~start~(\iow_char:N\\A) }
+ \cs_set:Npn \@@_G_test: { anchor~at~start~of~match~(\iow_char:N\\G) }
\cs_set_protected:Npn \@@_item_caseful_equal:n ##1
{ \@@_show_one:n { char~code~\int_eval:n{##1} } }
\cs_set_protected:Npn \@@_item_caseful_range:nn ##1##2
@@ -3658,24 +3643,6 @@
% \end{macrocode}
% \end{macro}
%
-% \begin{macro}[rEXP]{\@@_show_anchor_to_str:N}
-% The argument is an integer telling us where the anchor is. We
-% convert that to the relevant info.
-% \begin{macrocode}
-\cs_new:Npn \@@_show_anchor_to_str:N #1
- {
- anchor~at~
- \str_case:nnF { #1 }
- {
- { \l_@@_min_pos_int } { start~(\iow_char:N\\A) }
- { \l_@@_start_pos_int } { start~of~match~(\iow_char:N\\G) }
- { \l_@@_max_pos_int } { end~(\iow_char:N\\Z) }
- }
- { <error:~'#1'~not~recognized> }
- }
-% \end{macrocode}
-% \end{macro}
-%
% \begin{macro}{\@@_show_item_catcode:NnT}
% Produce a sequence of categories which the catcode bitmap |#2|
% contains, and show it, indenting the tests on which this catcode
@@ -3855,7 +3822,7 @@
\@@_pop_lr_states:
\@@_toks_put_right:Nn \l_@@_right_state_int
{
- \if_int_compare:w \l_@@_curr_pos_int = \l_@@_max_pos_int
+ \if_int_compare:w -2 = \l_@@_curr_char_int
\exp_after:wN \@@_action_success:
\fi:
}
@@ -4343,16 +4310,14 @@
%
% \subsubsection{Others}
%
-% \begin{macro}{\@@_assertion:Nn, \@@_b_test:, \@@_anchor:N}
+% \begin{macro}{\@@_assertion:Nn, \@@_b_test:, \@@_A_test:, \@@_G_test:, \@@_Z_test:}
% Usage: \cs{@@_assertion:Nn} \meta{boolean} \Arg{test}, where the
% \meta{test} is either of the two other functions. Add a free
% transition to a new state, conditionally to the assertion test. The
% \cs{@@_b_test:} test is used by the |\b| and |\B| escape: check
% if the last character was a word character or not, and do the same
% to the current character. The boundary-markers of the string are
-% non-word characters for this purpose. Anchors at the start or end
-% of match use \cs{@@_anchor:N}, with a position controlled by the
-% integer |#1|.
+% non-word characters for this purpose.
% \begin{macrocode}
\cs_new_protected:Npn \@@_assertion:Nn #1#2
{
@@ -4372,12 +4337,6 @@
\bool_if:NT #1 { { } }
}
}
-\cs_new_protected:Npn \@@_anchor:N #1
- {
- \if_int_compare:w #1 = \l_@@_curr_pos_int
- \exp_after:wN \@@_break_true:w
- \fi:
- }
\cs_new_protected:Npn \@@_b_test:
{
\group_begin:
@@ -4387,6 +4346,24 @@
{ \group_end: \@@_item_reverse:n \@@_prop_w: }
{ \group_end: \@@_prop_w: }
}
+\cs_new_protected:Npn \@@_Z_test:
+ {
+ \if_int_compare:w -2 = \l_@@_curr_char_int
+ \exp_after:wN \@@_break_true:w
+ \fi:
+ }
+\cs_new_protected:Npn \@@_A_test:
+ {
+ \if_int_compare:w -2 = \l_@@_last_char_int
+ \exp_after:wN \@@_break_true:w
+ \fi:
+ }
+\cs_new_protected:Npn \@@_G_test:
+ {
+ \if_int_compare:w \l_@@_curr_pos_int = \l_@@_start_pos_int
+ \exp_after:wN \@@_break_true:w
+ \fi:
+ }
% \end{macrocode}
% \end{macro}
%
@@ -4701,6 +4678,7 @@
}
\int_zero:N \l_@@_step_int
\int_set_eq:NN \l_@@_success_pos_int \l_@@_min_pos_int
+ \int_set:Nn \l_@@_last_char_success_int { -2 }
\tl_build_begin:N \l_@@_matched_analysis_tl
\tl_clear:N \l_@@_curr_analysis_tl
\int_set:Nn \l_@@_min_submatch_int { 1 }
More information about the latex3-commits
mailing list.