[latex3-commits] [git/LaTeX3-latex3-latex3] peek-regex: Documentation and internal names (b660b8838)
Joseph Wright
joseph.wright at morningstar2.co.uk
Thu Dec 3 16:03:05 CET 2020
Repository : https://github.com/latex3/latex3
On branch : peek-regex
Link : https://github.com/latex3/latex3/commit/b660b88380c16770c89f5be6206edc1a74875533
>---------------------------------------------------------------
commit b660b88380c16770c89f5be6206edc1a74875533
Author: Bruno Le Floch <bruno at le-floch.fr>
Date: Sat Jul 11 22:36:44 2020 +0200
Documentation and internal names
>---------------------------------------------------------------
b660b88380c16770c89f5be6206edc1a74875533
l3kernel/l3regex.dtx | 82 +++++++++++++++++++++++++---------------------------
1 file changed, 40 insertions(+), 42 deletions(-)
diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 2086b5c3e..df4b1a50b 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -713,7 +713,7 @@
% |{n,}| quantifier? (I think not.)
% \item Quantifiers for |\u| and assertions.
% \item When matching, keep track of an explicit stack of
-% \texttt{current_state} and \texttt{current_submatches}.
+% \texttt{curr_state} and \texttt{curr_submatches}.
% \item If possible, when a state is reused by the same thread, kill
% other subthreads.
% \item Use an array rather than \cs[no-index]{l__regex_balance_tl}
@@ -845,12 +845,13 @@
% names).
% \begin{itemize}
% \item \emph{Group}: index of the capturing group, $-1$ for
-% non-capturing groups.
+% non-capturing groups. ^^A start/end index?
% \item \emph{Position}: each token in the query is labelled by an
% integer \meta{position}, with $\texttt{min_pos} - 1 \leq
% \meta{position} \leq \texttt{max_pos}$. The lowest and highest
-% positions correspond to imaginary begin and end markers (with
-% inaccessible category code and character code).
+% positions $\texttt{min_pos} - 1$ and $\texttt{max_pos}$
+% correspond to imaginary begin and end markers (with
+% non-existent category code and character code).
% \item \emph{Query}: the token list to which we apply the regular
% expression.
% \item \emph{State}: each state of the \textsc{nfa} is labelled by an
@@ -865,8 +866,8 @@
% unique id for all the steps of the matching algorithm.
% \end{itemize}
%
-% We use \pkg{l3intarray} to manipulate arrays of integers (stored into
-% some dimension registers in scaled points). We also abuse \TeX{}'s
+% We use \pkg{l3intarray} to manipulate arrays of integers.
+% We also abuse \TeX{}'s
% \tn{toks} registers, by accessing them directly by number rather than
% tying them to control sequence using the \tn{newtoks} allocation
% functions. Specifically, these arrays and \tn{toks} are used as
@@ -877,7 +878,7 @@
% \item \cs{g_@@_state_active_intarray} holds the last \meta{step} in
% which each \meta{state} was active.
% \item \cs{g_@@_thread_state_intarray} maps each \meta{thread} (with
-% $\texttt{min_active} \leq \meta{thread} < \texttt{max_active}$) to
+% $\texttt{min_thread} \leq \meta{thread} < \texttt{max_thread}$) to
% the \meta{state} in which the \meta{thread} currently is. The
% \meta{threads} are ordered starting from the best to the least
% preferred.
@@ -944,7 +945,7 @@
{ \@@_toks_set:Nn #1 { } }
\cs_new_eq:NN \@@_toks_set:Nn \tex_toks:D
\cs_new_protected:Npn \@@_toks_set:No #1
- { \@@_toks_set:Nn #1 \exp_after:wN }
+ { \tex_toks:D #1 \exp_after:wN }
% \end{macrocode}
% \end{macro}
%
@@ -975,13 +976,13 @@
% \begin{macrocode}
\cs_new_protected:Npn \@@_toks_put_left:Nx #1#2
{
- \cs_set:Npx \@@_tmp:w { #2 }
+ \cs_set_nopar:Npx \@@_tmp:w { #2 }
\tex_toks:D #1 \exp_after:wN \exp_after:wN \exp_after:wN
{ \exp_after:wN \@@_tmp:w \tex_the:D \tex_toks:D #1 }
}
\cs_new_protected:Npn \@@_toks_put_right:Nx #1#2
{
- \cs_set:Npx \@@_tmp:w {#2}
+ \cs_set_nopar:Npx \@@_tmp:w {#2}
\tex_toks:D #1 \exp_after:wN
{ \tex_the:D \tex_toks:D \exp_after:wN #1 \@@_tmp:w }
}
@@ -1201,7 +1202,7 @@
% \begin{macro}
% {\@@_item_caseless_equal:n, \@@_item_caseless_range:nn}
% For caseless matching, we perform the test both on the
-% \texttt{current_char} and on the \texttt{case_changed_char}. Before
+% \texttt{curr_char} and on the \texttt{case_changed_char}. Before
% doing the second set of tests, we make sure that
% \texttt{case_changed_char} has been computed.
% \begin{macrocode}
@@ -1308,7 +1309,8 @@
%
% \begin{macro}{\@@_item_exact:nn, \@@_item_exact_cs:n}
% This matches an exact \meta{category}-\meta{character code} pair, or
-% an exact control sequence, more precisely one of several possible control sequences.
+% an exact control sequence, more precisely one of several possible
+% control sequences, separated by \cs{scan_stop:}.
% \begin{macrocode}
\cs_new_protected:Npn \@@_item_exact:nn #1#2
{
@@ -1337,9 +1339,7 @@
% Match a control sequence (the argument is a compiled regex).
% First test the catcode of the current token to be zero.
% Then perform the matching test, and break if the csname
-% indeed matches. The three \cs{exp_after:wN} expand the contents
-% of the \tn{toks}\meta{current position} (of the form \cs{exp_not:n}
-% \Arg{control sequence}) to \meta{control sequence}.
+% indeed matches.
% We store the cs name before building states for the cs, as those
% states may overlap with toks registers storing the user's input.
% \begin{macrocode}
@@ -3828,17 +3828,17 @@
% \begin{itemize}
% \item \cs{g_@@_state_active_intarray} from \cs{l_@@_min_state_int}
% to $\cs{l_@@_max_state_int}-1$;
-% \item \cs{g_@@_thread_state_intarray} from \cs{l_@@_min_active_int}
-% to $\cs{l_@@_max_active_int}-1$.
+% \item \cs{g_@@_thread_state_intarray} from \cs{l_@@_min_thread_int}
+% to $\cs{l_@@_max_thread_int}-1$.
% \end{itemize}
% In fact, some data is stored in \tn{toks} registers (local) in the
% same ranges so these ranges mustn't overlap. This is done by
-% setting \cs{l_@@_min_active_int} to \cs{l_@@_max_state_int} after
+% setting \cs{l_@@_min_thread_int} to \cs{l_@@_max_state_int} after
% building the \textsc{nfa}. Here, in this nested call to the
% matching code, we need the new versions of these ranges to involve
% completely new entries of the intarray variables, so we begin by
% setting (the new) \cs{l_@@_min_state_int} to (the old)
-% \cs{l_@@_max_active_int} to use higher entries.
+% \cs{l_@@_max_thread_int} to use higher entries.
%
% When using a regex to match a cs, we don't insert a wildcard, we
% anchor at the end, and since we ignore submatches, there is no need
@@ -3848,7 +3848,7 @@
% \begin{macrocode}
\cs_new_protected:Npn \@@_build_for_cs:n #1
{
- \int_set_eq:NN \l_@@_min_state_int \l_@@_max_active_int
+ \int_set_eq:NN \l_@@_min_state_int \l_@@_max_thread_int
\int_set_eq:NN \l_@@_max_state_int \l_@@_min_state_int
\@@_build_new_state:
\@@_build_new_state:
@@ -4471,7 +4471,7 @@
% don't start from $0$ because the \tn{toks} registers with low
% numbers are used to hold the states of the \textsc{nfa}. We match
% without backtracking, keeping all threads in lockstep at the
-% \texttt{current_pos} in the query. The starting point of the current
+% \texttt{curr_pos} in the query. The starting point of the current
% match attempt is \texttt{start_pos}, and \texttt{success_pos},
% updated whenever a thread succeeds, is used as the next starting
% position.
@@ -4496,7 +4496,7 @@
% and the character code of the result of changing the case of the
% current token (|A-Z|$\leftrightarrow$|a-z|). This last integer is
% only computed when necessary, and is otherwise \cs{c_max_int}. The
-% \texttt{current_char} variable is also used in various other phases
+% \texttt{curr_char} variable is also used in various other phases
% to hold a character code.
% \begin{macrocode}
\int_new:N \l_@@_curr_char_int
@@ -4519,7 +4519,7 @@
% \begin{variable}
% {\l_@@_curr_submatches_prop, \l_@@_success_submatches_prop}
% The submatches for the thread which is currently active are stored
-% in the \texttt{current_submatches} property list variable. This
+% in the \texttt{curr_submatches} property list variable. This
% property list is stored by \cs{@@_action_cost:n} into the
% \tn{toks} register for the target state of the transition, to be
% retrieved when matching at the next position. When a thread
@@ -4551,17 +4551,17 @@
% \end{macrocode}
% \end{variable}
%
-% \begin{variable}{\l_@@_min_active_int, \l_@@_max_active_int}
+% \begin{variable}{\l_@@_min_thread_int, \l_@@_max_thread_int}
% All the currently active threads are kept in order of precedence in
% \cs{g_@@_thread_state_intarray}, and the corresponding submatches in the
% \tn{toks}. For our purposes, those serve as an array, indexed from
-% \texttt{min_active} (inclusive) to \texttt{max_active} (excluded).
+% \texttt{min_thread} (inclusive) to \texttt{max_thread} (excluded).
% At the start of every step, the whole array is unpacked, so that the
-% space can immediately be reused, and \texttt{max_active} is reset to
-% \texttt{min_active}, effectively clearing the array.
+% space can immediately be reused, and \texttt{max_thread} is reset to
+% \texttt{min_thread}, effectively clearing the array.
% \begin{macrocode}
-\int_new:N \l_@@_min_active_int
-\int_new:N \l_@@_max_active_int
+\int_new:N \l_@@_min_thread_int
+\int_new:N \l_@@_max_thread_int
% \end{macrocode}
% \end{variable}
%
@@ -4689,7 +4689,7 @@
\__kernel_intarray_gset:Nnn
\g_@@_state_active_intarray {##1} { 1 }
}
- \int_set_eq:NN \l_@@_min_active_int \l_@@_max_state_int
+ \int_set_eq:NN \l_@@_min_thread_int \l_@@_max_state_int
\int_zero:N \l_@@_step_int
\int_set_eq:NN \l_@@_success_pos_int \l_@@_min_pos_int
\int_set:Nn \l_@@_min_submatch_int
@@ -4729,7 +4729,7 @@
\int_set_eq:NN \l_@@_start_pos_int \l_@@_success_pos_int
\bool_set_false:N \l_@@_match_success_bool
\prop_clear:N \l_@@_curr_submatches_prop
- \int_set_eq:NN \l_@@_max_active_int \l_@@_min_active_int
+ \int_set_eq:NN \l_@@_max_thread_int \l_@@_min_thread_int
\@@_store_state:n { \l_@@_min_state_int }
\int_set:Nn \l_@@_curr_pos_int
{ \l_@@_start_pos_int - 1 }
@@ -4774,7 +4774,7 @@
% At each new position, set some variables and get the new character
% and category from the query. Then unpack the array of active
% threads, and clear it by resetting its length
-% (\texttt{max_active}). This results in a sequence of
+% (\texttt{max_thread}). This results in a sequence of
% \cs{@@_use_state_and_submatches:nn} \Arg{state} \Arg{prop}, and
% we consider those states one by one in order. As soon as a thread
% succeeds, exit the step, and, if there are threads to consider at the
@@ -4792,15 +4792,15 @@
\@@_query_get:
\use:x
{
- \int_set_eq:NN \l_@@_max_active_int \l_@@_min_active_int
+ \int_set_eq:NN \l_@@_max_thread_int \l_@@_min_thread_int
\int_step_function:nnN
- { \l_@@_min_active_int }
- { \l_@@_max_active_int - 1 }
+ { \l_@@_min_thread_int }
+ { \l_@@_max_thread_int - 1 }
\@@_match_one_active:n
}
\prg_break_point:
\bool_set_false:N \l_@@_fresh_thread_bool
- \if_int_compare:w \l_@@_max_active_int > \l_@@_min_active_int
+ \if_int_compare:w \l_@@_max_thread_int > \l_@@_min_thread_int
\if_int_compare:w \l_@@_curr_pos_int < \l_@@_max_pos_int
\exp_after:wN \exp_after:wN \exp_after:wN \@@_match_loop:
\fi:
@@ -4882,7 +4882,7 @@
% \begin{macro}{\@@_use_state_and_submatches:nn}
% This function is called as one item in the array of active threads
% after that array has been unpacked for a new step. Update the
-% \texttt{current_state} and \texttt{current_submatches} and use the
+% \texttt{curr_state} and \texttt{curr_submatches} and use the
% state if it has not yet been encountered at this step.
% \begin{macrocode}
\cs_new_protected:Npn \@@_use_state_and_submatches:nn #1 #2
@@ -4986,12 +4986,12 @@
{
\@@_store_submatches:
\__kernel_intarray_gset:Nnn \g_@@_thread_state_intarray
- { \l_@@_max_active_int } {#1}
- \int_incr:N \l_@@_max_active_int
+ { \l_@@_max_thread_int } {#1}
+ \int_incr:N \l_@@_max_thread_int
}
\cs_new_protected:Npn \@@_store_submatches:
{
- \@@_toks_set:No \l_@@_max_active_int
+ \@@_toks_set:No \l_@@_max_thread_int
{ \l_@@_curr_submatches_prop }
}
% \end{macrocode}
@@ -6726,8 +6726,6 @@
%^^A NOT IMPLEMENTED
%^^A \p{xx} a character with the xx property
%^^A \P{xx} a character without the xx property
-%^^A [[:xxx:]] positive POSIX named set
-%^^A [[:^xxx:]] negative POSIX named set
%^^A (?=...) positive look ahead
%^^A (?!...) negative look ahead
%^^A (?<=...) positive look behind
More information about the latex3-commits
mailing list.