[latex3-commits] [git/LaTeX3-latex3-latex3] peek-regex: Documentation and internal names (02170b41d)

Mon Jul 20 02:19:19 CEST 2020

Repository : https://github.com/latex3/latex3
On branch  : peek-regex
Link       : https://github.com/latex3/latex3/commit/02170b41daf36fc3956fa6be29c4f7f80368dbd3

>---------------------------------------------------------------

commit 02170b41daf36fc3956fa6be29c4f7f80368dbd3
Author: Bruno Le Floch <bruno at le-floch.fr>
Date:   Sat Jul 11 22:36:44 2020 +0200

    Documentation and internal names


>---------------------------------------------------------------

02170b41daf36fc3956fa6be29c4f7f80368dbd3
 l3kernel/l3regex.dtx | 82 +++++++++++++++++++++++++---------------------------
 1 file changed, 40 insertions(+), 42 deletions(-)

diff --git a/l3kernel/l3regex.dtx b/l3kernel/l3regex.dtx
index 3820ecdb6..be81a8c66 100644
--- a/l3kernel/l3regex.dtx
+++ b/l3kernel/l3regex.dtx
@@ -713,7 +713,7 @@
 %     |{n,}| quantifier? (I think not.)
 %   \item Quantifiers for |\u| and assertions.
 %   \item When matching, keep track of an explicit stack of
-%     \texttt{current_state} and \texttt{current_submatches}.
+%     \texttt{curr_state} and \texttt{curr_submatches}.
 %   \item If possible, when a state is reused by the same thread, kill
 %     other subthreads.
 %   \item Use an array rather than \cs[no-index]{l__regex_balance_tl}
@@ -845,12 +845,13 @@
 % names).
 % \begin{itemize}
 %   \item \emph{Group}: index of the capturing group, $-1$ for
-%     non-capturing groups.
+%     non-capturing groups. ^^A start/end index?
 %   \item \emph{Position}: each token in the query is labelled by an
 %     integer \meta{position}, with $\texttt{min_pos} - 1 \leq
 %     \meta{position} \leq \texttt{max_pos}$. The lowest and highest
-%     positions correspond to imaginary begin and end markers (with
-%     inaccessible category code and character code).
+%     positions $\texttt{min_pos} - 1$ and $\texttt{max_pos}$
+%     correspond to imaginary begin and end markers (with
+%     non-existent category code and character code).
 %   \item \emph{Query}: the token list to which we apply the regular
 %     expression.
 %   \item \emph{State}: each state of the \textsc{nfa} is labelled by an
@@ -865,8 +866,8 @@
 %     unique id for all the steps of the matching algorithm.
 % \end{itemize}
 %
-% We use \pkg{l3intarray} to manipulate arrays of integers (stored into
-% some dimension registers in scaled points).  We also abuse \TeX{}'s
+% We use \pkg{l3intarray} to manipulate arrays of integers.
+% We also abuse \TeX{}'s
 % \tn{toks} registers, by accessing them directly by number rather than
 % tying them to control sequence using the \tn{newtoks} allocation
 % functions. Specifically, these arrays and \tn{toks} are used as
@@ -877,7 +878,7 @@
 %   \item \cs{g_@@_state_active_intarray} holds the last \meta{step} in
 %     which each \meta{state} was active.
 %   \item \cs{g_@@_thread_state_intarray} maps each \meta{thread} (with
-%     $\texttt{min_active} \leq \meta{thread} < \texttt{max_active}$) to
+%     $\texttt{min_thread} \leq \meta{thread} < \texttt{max_thread}$) to
 %     the \meta{state} in which the \meta{thread} currently is. The
 %     \meta{threads} are ordered starting from the best to the least
 %     preferred.
@@ -944,7 +945,7 @@
   { \@@_toks_set:Nn #1 { } }
 \cs_new_eq:NN \@@_toks_set:Nn \tex_toks:D
 \cs_new_protected:Npn \@@_toks_set:No #1
-  { \@@_toks_set:Nn #1 \exp_after:wN }
+  { \tex_toks:D #1 \exp_after:wN }
 %    \end{macrocode}
 % \end{macro}
 %
@@ -975,13 +976,13 @@
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_toks_put_left:Nx #1#2
   {
-    \cs_set:Npx \@@_tmp:w { #2 }
+    \cs_set_nopar:Npx \@@_tmp:w { #2 }
     \tex_toks:D #1 \exp_after:wN \exp_after:wN \exp_after:wN
       { \exp_after:wN \@@_tmp:w \tex_the:D \tex_toks:D #1 }
   }
 \cs_new_protected:Npn \@@_toks_put_right:Nx #1#2
   {
-    \cs_set:Npx \@@_tmp:w {#2}
+    \cs_set_nopar:Npx \@@_tmp:w {#2}
     \tex_toks:D #1 \exp_after:wN
       { \tex_the:D \tex_toks:D \exp_after:wN #1 \@@_tmp:w }
   }
@@ -1201,7 +1202,7 @@
 % \begin{macro}
 %   {\@@_item_caseless_equal:n, \@@_item_caseless_range:nn}
 %   For caseless matching, we perform the test both on the
-%   \texttt{current_char} and on the \texttt{case_changed_char}. Before
+%   \texttt{curr_char} and on the \texttt{case_changed_char}. Before
 %   doing the second set of tests, we make sure that
 %   \texttt{case_changed_char} has been computed.
 %    \begin{macrocode}
@@ -1308,7 +1309,8 @@
 %
 % \begin{macro}{\@@_item_exact:nn, \@@_item_exact_cs:n}
 %   This matches an exact \meta{category}-\meta{character code} pair, or
-%   an exact control sequence, more precisely one of several possible control sequences.
+%   an exact control sequence, more precisely one of several possible
+%   control sequences, separated by \cs{scan_stop:}.
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_item_exact:nn #1#2
   {
@@ -1337,9 +1339,7 @@
 %   Match a control sequence (the argument is a compiled regex).
 %   First test the catcode of the current token to be zero.
 %   Then perform the matching test, and break if the csname
-%   indeed matches. The three \cs{exp_after:wN} expand the contents
-%   of the \tn{toks}\meta{current position} (of the form \cs{exp_not:n}
-%   \Arg{control sequence}) to \meta{control sequence}.
+%   indeed matches.
 %   We store the cs name before building states for the cs, as those
 %   states may overlap with toks registers storing the user's input.
 %    \begin{macrocode}
@@ -3828,17 +3828,17 @@
 %   \begin{itemize}
 %     \item \cs{g_@@_state_active_intarray} from \cs{l_@@_min_state_int}
 %       to $\cs{l_@@_max_state_int}-1$;
-%     \item \cs{g_@@_thread_state_intarray} from \cs{l_@@_min_active_int}
-%       to $\cs{l_@@_max_active_int}-1$.
+%     \item \cs{g_@@_thread_state_intarray} from \cs{l_@@_min_thread_int}
+%       to $\cs{l_@@_max_thread_int}-1$.
 %   \end{itemize}
 %   In fact, some data is stored in \tn{toks} registers (local) in the
 %   same ranges so these ranges mustn't overlap.  This is done by
-%   setting \cs{l_@@_min_active_int} to \cs{l_@@_max_state_int} after
+%   setting \cs{l_@@_min_thread_int} to \cs{l_@@_max_state_int} after
 %   building the \textsc{nfa}.  Here, in this nested call to the
 %   matching code, we need the new versions of these ranges to involve
 %   completely new entries of the intarray variables, so we begin by
 %   setting (the new) \cs{l_@@_min_state_int} to (the old)
-%   \cs{l_@@_max_active_int} to use higher entries.
+%   \cs{l_@@_max_thread_int} to use higher entries.
 %
 %   When using a regex to match a cs, we don't insert a wildcard, we
 %   anchor at the end, and since we ignore submatches, there is no need
@@ -3848,7 +3848,7 @@
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_build_for_cs:n #1
   {
-    \int_set_eq:NN \l_@@_min_state_int \l_@@_max_active_int
+    \int_set_eq:NN \l_@@_min_state_int \l_@@_max_thread_int
     \int_set_eq:NN \l_@@_max_state_int \l_@@_min_state_int
     \@@_build_new_state:
     \@@_build_new_state:
@@ -4471,7 +4471,7 @@
 %   don't start from $0$ because the \tn{toks} registers with low
 %   numbers are used to hold the states of the \textsc{nfa}. We match
 %   without backtracking, keeping all threads in lockstep at the
-%   \texttt{current_pos} in the query. The starting point of the current
+%   \texttt{curr_pos} in the query. The starting point of the current
 %   match attempt is \texttt{start_pos}, and \texttt{success_pos},
 %   updated whenever a thread succeeds, is used as the next starting
 %   position.
@@ -4496,7 +4496,7 @@
 %   and the character code of the result of changing the case of the
 %   current token (|A-Z|$\leftrightarrow$|a-z|). This last integer is
 %   only computed when necessary, and is otherwise \cs{c_max_int}.  The
-%   \texttt{current_char} variable is also used in various other phases
+%   \texttt{curr_char} variable is also used in various other phases
 %   to hold a character code.
 %    \begin{macrocode}
 \int_new:N \l_@@_curr_char_int
@@ -4519,7 +4519,7 @@
 % \begin{variable}
 %   {\l_@@_curr_submatches_prop, \l_@@_success_submatches_prop}
 %   The submatches for the thread which is currently active are stored
-%   in the \texttt{current_submatches} property list variable. This
+%   in the \texttt{curr_submatches} property list variable. This
 %   property list is stored by \cs{@@_action_cost:n} into the
 %   \tn{toks} register for the target state of the transition, to be
 %   retrieved when matching at the next position. When a thread
@@ -4551,17 +4551,17 @@
 %    \end{macrocode}
 % \end{variable}
 %
-% \begin{variable}{\l_@@_min_active_int, \l_@@_max_active_int}
+% \begin{variable}{\l_@@_min_thread_int, \l_@@_max_thread_int}
 %   All the currently active threads are kept in order of precedence in
 %   \cs{g_@@_thread_state_intarray}, and the corresponding submatches in the
 %   \tn{toks}. For our purposes, those serve as an array, indexed from
-%   \texttt{min_active} (inclusive) to \texttt{max_active} (excluded).
+%   \texttt{min_thread} (inclusive) to \texttt{max_thread} (excluded).
 %   At the start of every step, the whole array is unpacked, so that the
-%   space can immediately be reused, and \texttt{max_active} is reset to
-%   \texttt{min_active}, effectively clearing the array.
+%   space can immediately be reused, and \texttt{max_thread} is reset to
+%   \texttt{min_thread}, effectively clearing the array.
 %    \begin{macrocode}
-\int_new:N \l_@@_min_active_int
-\int_new:N \l_@@_max_active_int
+\int_new:N \l_@@_min_thread_int
+\int_new:N \l_@@_max_thread_int
 %    \end{macrocode}
 % \end{variable}
 %
@@ -4689,7 +4689,7 @@
         \__kernel_intarray_gset:Nnn
           \g_@@_state_active_intarray {##1} { 1 }
       }
-    \int_set_eq:NN \l_@@_min_active_int \l_@@_max_state_int
+    \int_set_eq:NN \l_@@_min_thread_int \l_@@_max_state_int
     \int_zero:N \l_@@_step_int
     \int_set_eq:NN \l_@@_success_pos_int \l_@@_min_pos_int
     \int_set:Nn \l_@@_min_submatch_int
@@ -4729,7 +4729,7 @@
     \int_set_eq:NN \l_@@_start_pos_int \l_@@_success_pos_int
     \bool_set_false:N \l_@@_match_success_bool
     \prop_clear:N \l_@@_curr_submatches_prop
-    \int_set_eq:NN \l_@@_max_active_int \l_@@_min_active_int
+    \int_set_eq:NN \l_@@_max_thread_int \l_@@_min_thread_int
     \@@_store_state:n { \l_@@_min_state_int }
     \int_set:Nn \l_@@_curr_pos_int
       { \l_@@_start_pos_int - 1 }
@@ -4774,7 +4774,7 @@
 %   At each new position, set some variables and get the new character
 %   and category from the query. Then unpack the array of active
 %   threads, and clear it by resetting its length
-%   (\texttt{max_active}). This results in a sequence of
+%   (\texttt{max_thread}). This results in a sequence of
 %   \cs{@@_use_state_and_submatches:nn} \Arg{state} \Arg{prop}, and
 %   we consider those states one by one in order. As soon as a thread
 %   succeeds, exit the step, and, if there are threads to consider at the
@@ -4792,15 +4792,15 @@
     \@@_query_get:
     \use:x
       {
-        \int_set_eq:NN \l_@@_max_active_int \l_@@_min_active_int
+        \int_set_eq:NN \l_@@_max_thread_int \l_@@_min_thread_int
         \int_step_function:nnN
-          { \l_@@_min_active_int }
-          { \l_@@_max_active_int - 1 }
+          { \l_@@_min_thread_int }
+          { \l_@@_max_thread_int - 1 }
           \@@_match_one_active:n
       }
     \prg_break_point:
     \bool_set_false:N \l_@@_fresh_thread_bool
-    \if_int_compare:w \l_@@_max_active_int > \l_@@_min_active_int
+    \if_int_compare:w \l_@@_max_thread_int > \l_@@_min_thread_int
       \if_int_compare:w \l_@@_curr_pos_int < \l_@@_max_pos_int
         \exp_after:wN \exp_after:wN \exp_after:wN \@@_match_loop:
       \fi:
@@ -4882,7 +4882,7 @@
 % \begin{macro}{\@@_use_state_and_submatches:nn}
 %   This function is called as one item in the array of active threads
 %   after that array has been unpacked for a new step. Update the
-%   \texttt{current_state} and \texttt{current_submatches} and use the
+%   \texttt{curr_state} and \texttt{curr_submatches} and use the
 %   state if it has not yet been encountered at this step.
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_use_state_and_submatches:nn #1 #2
@@ -4986,12 +4986,12 @@
   {
     \@@_store_submatches:
     \__kernel_intarray_gset:Nnn \g_@@_thread_state_intarray
-      { \l_@@_max_active_int } {#1}
-    \int_incr:N \l_@@_max_active_int
+      { \l_@@_max_thread_int } {#1}
+    \int_incr:N \l_@@_max_thread_int
   }
 \cs_new_protected:Npn \@@_store_submatches:
   {
-    \@@_toks_set:No \l_@@_max_active_int
+    \@@_toks_set:No \l_@@_max_thread_int
       { \l_@@_curr_submatches_prop }
   }
 %    \end{macrocode}
@@ -6726,8 +6726,6 @@
 %^^A NOT IMPLEMENTED
 %^^A    \p{xx}     a character with the xx property
 %^^A    \P{xx}     a character without the xx property
-%^^A    [[:xxx:]]  positive POSIX named set
-%^^A    [[:^xxx:]] negative POSIX named set
 %^^A    (?=...)    positive look ahead
 %^^A    (?!...)    negative look ahead
 %^^A    (?<=...)   positive look behind