[latex3-commits] [l3svn] r6971 - Optimize matching of control sequences in l3regex (see #261)

noreply at latex-project.org noreply at latex-project.org
Mon Feb 20 21:59:37 CET 2017


Author: bruno
Date: 2017-02-20 21:59:37 +0100 (Mon, 20 Feb 2017)
New Revision: 6971

Modified:
   trunk/l3experimental/l3str/l3regex.dtx
   trunk/l3experimental/l3str/testfiles/m3regex005.tlg
   trunk/l3experimental/l3str/testfiles/m3regex007.tlg
Log:
Optimize matching of control sequences in l3regex (see #261)


Modified: trunk/l3experimental/l3str/l3regex.dtx
===================================================================
--- trunk/l3experimental/l3str/l3regex.dtx	2017-02-20 19:22:46 UTC (rev 6970)
+++ trunk/l3experimental/l3str/l3regex.dtx	2017-02-20 20:59:37 UTC (rev 6971)
@@ -1086,9 +1086,9 @@
 % \end{macro}
 % \end{macro}
 %
-% \begin{macro}[int]{\@@_item_exact:nn, \@@_item_exact_cs:c}
+% \begin{macro}[int]{\@@_item_exact:nn, \@@_item_exact_cs:n}
 %   This matches an exact \meta{category}-\meta{character code} pair, or
-%   an exact control sequence.
+%   an exact control sequence, more precisely one of several possible control sequences.
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_item_exact:nn #1#2
   {
@@ -1098,16 +1098,18 @@
       \fi:
     \fi:
   }
-\cs_new_protected:Npn \@@_item_exact_cs:c #1
+\cs_new_protected:Npn \@@_item_exact_cs:n #1
   {
     \int_compare:nNnTF \l_@@_current_catcode_int = 0
       {
-        \str_if_eq_x:nnTF
+        \tl_set:Nx \l_@@_internal_a_tl
           {
+            \scan_stop:
             \exp_after:wN \exp_after:wN \exp_after:wN \cs_to_str:N
             \tex_the:D \tex_toks:D \l_@@_current_pos_int
+            \scan_stop:
           }
-          { #1 }
+        \tl_if_in:noTF { \scan_stop: #1 \scan_stop: } \l_@@_internal_a_tl
           { \@@_break_true:w } { }
       }
       { }
@@ -1620,7 +1622,9 @@
 %   \item \cs{@@_item_catcode_reverse:nT} \Arg{catcode bitmap} \Arg{tests}
 %   \item \cs{@@_item_reverse:n} \Arg{tests}
 %   \item \cs{@@_item_exact:nn} \Arg{catcode} \Arg{char code}
-%   \item \cs{@@_item_exact_cs:c} \Arg{csname}
+%   \item \cs{@@_item_exact_cs:n} \Arg{csnames}, more precisely given as
+%     \meta{csname} \cs{scan_stop:} \meta{csname} \cs{scan_stop:}
+%     \meta{csname} and so on in a brace group.
 %   \item \cs{@@_item_cs:n} \Arg{compiled regex}
 % \end{itemize}
 %
@@ -2011,9 +2015,7 @@
       \int_compare:nNnT \l_@@_mode_int < \c_@@_outer_mode_int
         {
           \__msg_kernel_error:nn { regex } { c-missing-rbrace }
-          \@@_compile_end:
-          \@@_compile_one:x
-            { \@@_item_cs:n { \exp_not:o \l_@@_internal_regex } }
+          \@@_compile_end_cs:
           \prg_do_nothing: \prg_do_nothing:
           \prg_do_nothing: \prg_do_nothing:
         }
@@ -2951,26 +2953,83 @@
 % \end{macro}
 %
 % \begin{macro}+\@@_compile_}:+
+% \begin{macro}{\@@_compile_end_cs:}
+% \begin{macro}[EXP,aux]{\@@_compile_cs_aux:Nn, \@@_compile_cs_aux:NNnnnN}
 %   Non-escaped right braces are only special if they appear when
 %   compiling the regular expression for a csname, but not within a
-%   class: |\c{[}{]}| matches the control sequences |\}| and
-%   |\{|\ldots{} Admittedly, that would be better done as
-%   |\c{[{}]}|. So, end compiling the inner regex (this closes any
-%   dangling class or group).  Then insert the corresponding test in the
-%   outer regex.
+%   class: |\c{[{}]}| matches the control sequences |\{| and |\}|.  So,
+%   end compiling the inner regex (this closes any dangling class or
+%   group).  Then insert the corresponding test in the outer regex.  As
+%   an optimization, if the control sequence test simply consists of
+%   several explicit possibilities (branches) then use
+%   \cs{@@_item_exact_cs:n} with an argument consisting of all
+%   possibilities separated by \cs{scan_stop:}.
 %    \begin{macrocode}
+\flag_new:n { @@_cs }
 \cs_new_protected:cpn { @@_compile_ \c_right_brace_str : }
   {
     \@@_if_in_cs:TF
+      { \@@_compile_end_cs: }
+      { \exp_after:wN \@@_compile_raw:N \c_right_brace_str }
+  }
+\cs_new_protected:Npn \@@_compile_end_cs:
+  {
+    \@@_compile_end:
+    \flag_clear:n { @@_cs }
+    \tl_set:Nx \l_@@_internal_a_tl
       {
-        \@@_compile_end:
-        \@@_compile_one:x
+        \exp_after:wN \@@_compile_cs_aux:Nn \l_@@_internal_regex
+        \q_nil \q_nil \q_recursion_stop
+      }
+    \exp_args:Nx \@@_compile_one:x
+      {
+        \flag_if_raised:nTF { @@_cs }
           { \@@_item_cs:n { \exp_not:o \l_@@_internal_regex } }
+          { \@@_item_exact_cs:n { \tl_tail:N \l_@@_internal_a_tl } }
       }
-      { \exp_after:wN \@@_compile_raw:N \c_right_brace_str }
   }
+\cs_new:Npn \@@_compile_cs_aux:Nn #1#2
+  {
+    \cs_if_eq:NNTF #1 \@@_branch:n
+      {
+        \scan_stop:
+        \@@_compile_cs_aux:NNnnnN #2
+        \q_nil \q_nil \q_nil \q_nil \q_nil \q_nil \q_recursion_stop
+        \@@_compile_cs_aux:Nn
+      }
+      {
+        \quark_if_nil:NF #1 { \flag_raise:n { @@_cs } }
+        \use_none_delimit_by_q_recursion_stop:w
+      }
+  }
+\cs_new:Npn \@@_compile_cs_aux:NNnnnN #1#2#3#4#5#6
+  {
+    \bool_lazy_all:nTF
+      {
+        { \cs_if_eq_p:NN #1 \@@_class:NnnnN }
+        {#2}
+        { \tl_if_head_eq_meaning_p:nN {#3} \@@_item_caseful_equal:n }
+        { \int_compare_p:nNn { \tl_count:n {#3} } = { 2 } }
+        { \int_compare_p:nNn {#5} = { 0 } }
+      }
+      {
+        \prg_replicate:nn {#4}
+          { \char_generate:nn { \use_ii:nn #3 } {12} }
+        \@@_compile_cs_aux:NNnnnN
+      }
+      {
+        \quark_if_nil:NF #1
+          {
+            \flag_raise:n { @@_cs }
+            \use_i_delimit_by_q_recursion_stop:nw
+          }
+        \use_none_delimit_by_q_recursion_stop:w
+      }
+  }
 %    \end{macrocode}
 % \end{macro}
+% \end{macro}
+% \end{macro}
 %
 % \subsubsection{Raw token lists with \cs{u}}
 %
@@ -3082,7 +3141,7 @@
             \@@_class:NnnnN \c_true_bool
               {
                 \if_int_compare:w "##2 = 0 \exp_stop_f:
-                  \@@_item_exact_cs:c { \exp_after:wN \cs_to_str:N ##1 }
+                  \@@_item_exact_cs:n { \exp_after:wN \cs_to_str:N ##1 }
                 \else:
                   \@@_item_exact:nn { \__int_value:w "##2 } { ##3 }
                 \fi:
@@ -3160,8 +3219,7 @@
         { \@@_show_scope:nn { Reversed~match } }
       \cs_set_protected:Npn \@@_item_exact:nn ##1##2
         { \@@_show_one:n { char~##2,~catcode~##1 } }
-      \cs_set_protected:Npn \@@_item_exact_cs:c ##1
-        { \@@_show_one:n { control~sequence~\iow_char:N\\##1 } }
+      \cs_set_eq:NN \@@_item_exact_cs:n \@@_show_item_exact_cs:n
       \cs_set_protected:Npn \@@_item_cs:n
         { \@@_show_scope:nn { control~sequence } }
       \cs_set:cpn { @@_prop_.: } { \@@_show_one:n { any~token } }
@@ -3322,6 +3380,19 @@
 %    \end{macrocode}
 % \end{macro}
 %
+% \begin{macro}[aux]{\@@_show_item_exact_cs:n}
+%    \begin{macrocode}
+\cs_new_protected:Npn \@@_show_item_exact_cs:n #1
+  {
+    \seq_set_split:Nnn \l_@@_internal_seq { \scan_stop: } {#1}
+    \seq_set_map:NNn \l_@@_internal_seq
+      \l_@@_internal_seq { \iow_char:N\\##1 }
+    \@@_show_one:n
+      { control~sequence~ \seq_use:Nn \l_@@_internal_seq { ~or~ } }
+  }
+%    \end{macrocode}
+% \end{macro}
+%
 % \subsection{Building}
 %
 % \subsubsection{Variables used while building}

Modified: trunk/l3experimental/l3str/testfiles/m3regex005.tlg
===================================================================
--- trunk/l3experimental/l3str/testfiles/m3regex005.tlg	2017-02-20 19:22:46 UTC (rev 6970)
+++ trunk/l3experimental/l3str/testfiles/m3regex005.tlg	2017-02-20 20:59:37 UTC (rev 6971)
@@ -264,13 +264,7 @@
 |...............................................
 > Compiled regex {\c {a|b\K }}:
 +-branch
-  Match
-    control sequence
-    +-branch
-      char code 97
-    +-branch
-      char code 98
-      char code 75.
+  control sequence \a or \bK.
 <recently read> }
 l. ...  }
 {1} {a} 

Modified: trunk/l3experimental/l3str/testfiles/m3regex007.tlg
===================================================================
--- trunk/l3experimental/l3str/testfiles/m3regex007.tlg	2017-02-20 19:22:46 UTC (rev 6970)
+++ trunk/l3experimental/l3str/testfiles/m3regex007.tlg	2017-02-20 20:59:37 UTC (rev 6971)
@@ -46,17 +46,9 @@
 > Compiled regex {a\c {bc}\u {c_space_tl}\c {\u {c_space_tl}|}}:
 +-branch
   char code 97
-  Match
-    control sequence
-    +-branch
-      char code 98
-      char code 99
+  control sequence \bc
   char 32, catcode 10
-  Match
-    control sequence
-    +-branch
-      char code 32
-    +-branch.
+  control sequence \  or \.
 <recently read> }
 l. ...  }
 > Compiled regex {\u {l_tmpa_tl}*\c {\u {l_tmpa_tl}|x?}{3}|y*}:



More information about the latex3-commits mailing list