[latex3-commits] [l3svn] r7077 - Remove l3regex dependence on l3str-convert

Wed Apr 12 20:15:52 CEST 2017

Author: bruno
Date: 2017-04-12 20:15:50 +0200 (Wed, 12 Apr 2017)
New Revision: 7077

Modified:
   trunk/l3experimental/l3str/l3regex.dtx
   trunk/l3experimental/l3str/l3str-convert.dtx
   trunk/l3experimental/l3str/testfiles/m3regex001.lvt
Log:
Remove l3regex dependence on l3str-convert

That requires to have a copy of a helper in both packages, but
decoupling the two packages is well worth that.


Modified: trunk/l3experimental/l3str/l3regex.dtx
===================================================================

--- trunk/l3experimental/l3str/l3regex.dtx	2017-04-12 17:46:46 UTC (rev 7076)
+++ trunk/l3experimental/l3str/l3regex.dtx	2017-04-12 18:15:50 UTC (rev 7077)
@@ -736,7 +736,7 @@
 %<*package>
 \ProvidesExplPackage{l3regex}{2017/04/01}{}
   {L3 Experimental regular expressions}
-\RequirePackage{l3tl-build, l3tl-analysis, l3str-convert}
+\RequirePackage{l3tl-build, l3tl-analysis}
 %</package>
 %    \end{macrocode}
 %
@@ -918,6 +918,20 @@
 %
 % \subsubsection{Testing characters}
 %
+% \begin{macro}{\c_@@_ascii_min_int, \c_@@_ascii_max_control_int, \c_@@_ascii_max_int}
+%    \begin{macrocode}
+\int_const:Nn \c_@@_ascii_min_int { 0 }
+\int_const:Nn \c_@@_ascii_max_control_int { 31 }
+\int_const:Nn \c_@@_ascii_max_int { 127 }
+%    \end{macrocode}
+% \end{macro}
+%
+% \begin{variable}{\c_@@_ascii_lower_int}
+%    \begin{macrocode}
+\int_const:Nn \c_@@_ascii_lower_int { `a - `A }
+%    \end{macrocode}
+% \end{variable}
+%
 % \begin{macro}[int]{\@@_break_point:TF}
 % \begin{macro}[int]{\@@_break_true:w}
 %   When testing whether a character of the query token list matches
@@ -1027,12 +1041,12 @@
     \if_int_compare:w \l_@@_current_char_int > `Z \exp_stop_f:
       \if_int_compare:w \l_@@_current_char_int > `z \exp_stop_f: \else:
         \if_int_compare:w \l_@@_current_char_int < `a \exp_stop_f: \else:
-          \int_sub:Nn \l_@@_case_changed_char_int { \c__str_ascii_lower_int }
+          \int_sub:Nn \l_@@_case_changed_char_int { \c_@@_ascii_lower_int }
         \fi:
       \fi:
     \else:
       \if_int_compare:w \l_@@_current_char_int < `A \exp_stop_f: \else:
-        \int_add:Nn \l_@@_case_changed_char_int { \c__str_ascii_lower_int }
+        \int_add:Nn \l_@@_case_changed_char_int { \c_@@_ascii_lower_int }
       \fi:
     \fi:
   }
@@ -1214,16 +1228,16 @@
 \cs_new_protected:Npn \@@_posix_ascii:
   {
     \@@_item_caseful_range:nn
-      \c__str_ascii_min_int
-      \c__str_ascii_max_int
+      \c_@@_ascii_min_int
+      \c_@@_ascii_max_int
   }
 \cs_new_eq:NN \@@_posix_blank: \@@_prop_h:
 \cs_new_protected:Npn \@@_posix_cntrl:
   {
     \@@_item_caseful_range:nn
-      \c__str_ascii_min_int
-      \c__str_ascii_max_control_int
-    \@@_item_caseful_equal:n \c__str_ascii_max_int
+      \c_@@_ascii_min_int
+      \c_@@_ascii_max_control_int
+    \@@_item_caseful_equal:n \c_@@_ascii_max_int
   }
 \cs_new_eq:NN \@@_posix_digit: \@@_prop_d:
 \cs_new_protected:Npn \@@_posix_graph:
@@ -1302,7 +1316,7 @@
       \cs_set:Npn \@@_escape_escaped:N ##1 { #2 }
       \cs_set:Npn \@@_escape_raw:N ##1 { #3 }
       \int_set:Nn \tex_escapechar:D { `\\ }
-      \__str_gset_other:Nn \g_@@_internal_tl { #4 }
+      \tl_gset:Nx \g_@@_internal_tl { \__str_to_other_fast:n {#4} }
       \tl_set:Nx \l_@@_internal_b_tl
         {
           \exp_after:wN \@@_escape_loop:N \g_@@_internal_tl
@@ -1389,12 +1403,11 @@
 %
 % \begin{macro}[aux]{\@@_escape_/x:w}
 % \begin{macro}[aux]{\@@_escape_x_end:w, \@@_escape_x_large:n}
-%   When |\x| is encountered, \cs{@@_escape_x_test:N} is responsible
-%   for grabbing some hexadecimal digits, and feeding the result to
-%   \cs{@@_escape_x_end:w}. If the number is $<256$, then it is
-%   turned into a byte and fed to \cs{@@_escape_raw:N}. Otherwise,
-%   interrupt the assignment, and either produce an error, or use a
-%   standard \tn{lowercase} trick depending on the precise value.
+%   When |\x| is encountered, \cs{@@_escape_x_test:N} is responsible for
+%   grabbing some hexadecimal digits, and feeding the result to
+%   \cs{@@_escape_x_end:w}. If the number is too big interrupt the
+%   assignment and produce an error, otherwise call \cs{@@_escape_raw:N}
+%   on the corresponding character token.
 %    \begin{macrocode}
 \cs_new:cpn { @@_escape_/x:w } \@@_escape_loop:N
   {
@@ -1403,40 +1416,23 @@
   }
 \cs_new:Npn \@@_escape_x_end:w #1 ;
   {
-    \int_compare:nNnTF {#1} > \c__str_max_byte_int
-      { \@@_escape_x_large:n {#1} }
+    \int_compare:nNnTF {#1} > \c_max_char_int
       {
+        \if_false: { \fi: }
+        \__tl_build_one:o \l_@@_internal_b_tl
+        \__msg_kernel_error:nnx { regex } { x-overflow } {#1}
+        \tl_set:Nx \l_@@_internal_b_tl
+          { \if_false: } \fi:
+      }
+      {
         \exp_last_unbraced:Nf \@@_escape_raw:N
-          { \__str_output_byte:n {#1} }
+          { \char_generate:nn {#1} { 12 } }
       }
   }
-\group_begin:
-  \char_set_catcode_other:n { 0 }
-  \cs_new:Npn \@@_escape_x_large:n #1
-    {
-      \if_false: { \fi: }
-      \__tl_build_one:o \l_@@_internal_b_tl
-      \int_compare:nNnTF {#1} > \c_max_char_int
-        {
-          \__msg_kernel_error:nnx { regex } { x-overflow } {#1}
-          \tl_set:Nx \l_@@_internal_b_tl
-            { \if_false: } \fi:
-        }
-        {
-          \char_set_lccode:nn { 0 } {#1}
-          \tex_lowercase:D
-            {
-              \tl_set:Nx \l_@@_internal_b_tl
-                { \if_false: } \fi:
-                \@@_escape_raw:N ^^@
-            }
-        }
-    }
-\group_end:
 %    \end{macrocode}
 % \end{macro}
 % \end{macro}
-% ^^A todo: use char_generate in above and rest of file
+% ^^A todo: use char_generate in the rest of the file
 % \begin{macro}[aux]{\@@_escape_x_test:N, \@@_escape_x_testii:N}
 %   Find out whether the first character is a left brace (allowing any
 %   number of hexadecimal digits), or not (allowing up to two
@@ -1461,7 +1457,7 @@
     \if_charcode:w \c_left_brace_str #1
       \exp_after:wN \@@_escape_x_loop:N
     \else:
-      \__str_hexadecimal_use:NTF #1
+      \@@_hexadecimal_use:NTF #1
         { \exp_after:wN \@@_escape_x:N }
         { ; \exp_after:wN \@@_escape_loop:N \exp_after:wN #1 }
     \fi:
@@ -1476,7 +1472,7 @@
   {
     \str_if_eq_x:nnTF {#1} { break } { ; }
       {
-        \__str_hexadecimal_use:NTF #1
+        \@@_hexadecimal_use:NTF #1
           { ; \@@_escape_loop:N }
           { ; \@@_escape_loop:N #1 }
       }
@@ -1494,7 +1490,7 @@
     \str_if_eq_x:nnTF {#1} { break }
       { ; \@@_escape_x_loop_error:n { } {#1} }
       {
-        \__str_hexadecimal_use:NTF #1
+        \@@_hexadecimal_use:NTF #1
           { \@@_escape_x_loop:N }
           {
             \token_if_eq_charcode:NNTF \c_space_token #1
@@ -1520,6 +1516,35 @@
 %    \end{macrocode}
 % \end{macro}
 %
+% \begin{macro}[aux, rEXP]{\@@_hexadecimal_use:NTF}
+%   \TeX{} detects uppercase hexadecimal digits for us but not the
+%   lowercase letters, which we need to detect and replace by their
+%   uppercase counterpart.
+%    \begin{macrocode}
+\prg_new_conditional:Npnn \@@_hexadecimal_use:N #1 { TF }
+  {
+    \if_int_compare:w 1 < "1 \token_to_str:N #1 \exp_stop_f:
+      #1 \prg_return_true:
+    \else:
+      \if_case:w \__int_eval:w
+          \exp_after:wN ` \token_to_str:N #1 - `a
+        \__int_eval_end:
+           A
+      \or: B
+      \or: C
+      \or: D
+      \or: E
+      \or: F
+      \else:
+        \prg_return_false:
+        \exp_after:wN \use_none:n
+      \fi:
+      \prg_return_true:
+    \fi:
+  }
+%    \end{macrocode}
+% \end{macro}
+%
 % \begin{macro}[EXP, aux]
 %   {\@@_char_if_alphanumeric:NTF, \@@_char_if_special:NTF}
 %   These two tests are used in the first pass when parsing a regular
@@ -1549,7 +1574,7 @@
   {
     \if_int_compare:w `#1 > `Z \exp_stop_f:
       \if_int_compare:w `#1 > `z \exp_stop_f:
-        \if_int_compare:w `#1 < \c__str_ascii_max_int
+        \if_int_compare:w `#1 < \c_@@_ascii_max_int
           \prg_return_true: \else: \prg_return_false: \fi:
       \else:
         \if_int_compare:w `#1 < `a \exp_stop_f:
@@ -3105,8 +3130,8 @@
 %    \begin{macrocode}
 \cs_new_protected:Npn \@@_compile_u_in_cs:
   {
-    \exp_args:NNo \__str_gset_other:Nn \g_@@_internal_tl
-      { \l_@@_internal_a_tl }
+    \tl_gset:Nx \g_@@_internal_tl
+      { \exp_args:No \__str_to_other_fast:n { \l_@@_internal_a_tl } }
     \__tl_build_one:x
       {
         \tl_map_function:NN \g_@@_internal_tl

Modified: trunk/l3experimental/l3str/l3str-convert.dtx
===================================================================
--- trunk/l3experimental/l3str/l3str-convert.dtx	2017-04-12 17:46:46 UTC (rev 7076)
+++ trunk/l3experimental/l3str/l3str-convert.dtx	2017-04-12 18:15:50 UTC (rev 7077)
@@ -192,15 +192,6 @@
 %
 % \section{Internal string functions}
 %
-% \begin{function}{\__str_gset_other:Nn}
-%   \begin{syntax}
-%     \cs{__str_gset_other:Nn} \meta{tl~var} \Arg{token list}
-%   \end{syntax}
-%   Converts the \meta{token list} to an \meta{other string}, where
-%   spaces have category code \enquote{other}, and assigns the result to
-%   the \meta{tl~var}, globally.
-% \end{function}
-%
 % \begin{function}{\__str_hexadecimal_use:NTF}
 %   \begin{syntax}
 %     \cs{__str_hexadecimal_use:NTF} \meta{token} \Arg{true code} \Arg{false code}
@@ -216,17 +207,6 @@
 %   \end{texnote}
 % \end{function}
 %
-% \begin{function}[EXP]{\__str_output_byte:n}
-%   \begin{syntax}
-%     \cs{__str_output_byte:n} \Arg{intexpr}
-%   \end{syntax}
-%   Expands to a character token with category other and character code
-%   equal to the value of \meta{intexpr}.  The value of \meta{intexpr}
-%   must be in the range $[-1, 255]$, and any value outside this range
-%   results in undefined behaviour.  The special value $-1$ is used to
-%   produce an empty result.
-% \end{function}
-%
 % \section{Possibilities, and things to do}
 %
 % Encoding/escaping-related tasks.
@@ -330,20 +310,6 @@
 %    \end{macrocode}
 % \end{variable}
 %
-% \begin{macro}{\c_@@_ascii_min_int, \c_@@_ascii_max_control_int, \c_@@_ascii_max_int}
-%    \begin{macrocode}
-\int_const:Nn \c_@@_ascii_min_int { 0 }
-\int_const:Nn \c_@@_ascii_max_control_int { 31 }
-\int_const:Nn \c_@@_ascii_max_int { 127 }
-%    \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\c_@@_ascii_lower_int}
-%    \begin{macrocode}
-\int_const:Nn \c_@@_ascii_lower_int { `a - `A }
-%    \end{macrocode}
-% \end{macro}
-%
 % \begin{variable}{\g_@@_alias_prop}
 %   To avoid needing one file per encoding/escaping alias, we keep track
 %   of those in a property list.
@@ -387,48 +353,6 @@
 %    \end{macrocode}
 % \end{variable}
 %
-% \subsubsection{Escaping spaces}
-% ^^A todo: use char_generate in this file
-% \begin{macro}[int]{\@@_gset_other:Nn}
-% \begin{macro}[aux,EXP]{\@@_gset_other_loop:w}
-% \begin{macro}[aux,EXP]{\@@_gset_other_end:w}
-%   This function could be done by using \cs{@@_to_other:n} within
-%   an \texttt{x}-expansion, but that would take a time quadratic in the
-%   size of the string. Instead, we can \enquote{leave the result behind
-%     us} in the input stream, to be captured into the expanding
-%   assignment. This gives us a linear time.
-%    \begin{macrocode}
-\group_begin:
-\char_set_lccode:nn { `\* } { `\  }
-\char_set_lccode:nn { `\A } { `\A }
-\tex_lowercase:D
-  {
-    \group_end:
-    \cs_new_protected:Npn \@@_gset_other:Nn #1#2
-      {
-        \tl_gset:Nx #1
-          {
-            \exp_after:wN \@@_gset_other_loop:w \tl_to_str:n {#2} ~ %
-            A ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ \q_stop
-          }
-      }
-    \cs_new:Npn \@@_gset_other_loop:w
-      #1 ~ #2 ~ #3 ~ #4 ~ #5 ~ #6 ~ #7 ~ #8 ~ #9 ~
-      {
-        \if_meaning:w A #9
-          \@@_gset_other_end:w
-        \fi:
-        #1 * #2 * #3 * #4 * #5 * #6 * #7 * #8 * #9
-        \@@_gset_other_loop:w *
-      }
-    \cs_new:Npn \@@_gset_other_end:w \fi: #1 * A #2 \q_stop
-      { \fi: #1 }
-  }
-%    \end{macrocode}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-%
 % \subsection{String conditionals}
 %
 % \begin{macro}[EXP]{\@@_if_contains_char:NNT, \@@_if_contains_char:NNTF}
@@ -544,12 +468,12 @@
 %   an empty result for the input $-1$.
 %    \begin{macrocode}
 \group_begin:
-  \tl_set:Nx \l__str_internal_tl { \tl_to_str:n { 0123456789ABCDEF } }
-   \tl_map_inline:Nn \l__str_internal_tl
+  \tl_set:Nx \l_@@_internal_tl { \tl_to_str:n { 0123456789ABCDEF } }
+   \tl_map_inline:Nn \l_@@_internal_tl
      {
-        \tl_map_inline:Nn \l__str_internal_tl
+        \tl_map_inline:Nn \l_@@_internal_tl
           {
-            \tl_const:cx { c__str_byte_ \int_eval:n {"#1##1} _tl }
+            \tl_const:cx { c_@@_byte_ \int_eval:n {"#1##1} _tl }
                { \char_generate:nn { "#1##1 } { 12 } #1 ##1 }
           }
      }
@@ -783,7 +707,7 @@
   {
     \group_begin:
       #1
-      \@@_gset_other:Nn \g_@@_result_tl {#4}
+      \tl_gset:Nx \g_@@_result_tl { \@@_to_other_fast:n {#4} }
       \exp_after:wN \@@_convert:wwwnn
         \tl_to_str:n {#5} /// \q_stop
         { decode } { unescape }
@@ -951,7 +875,7 @@
           #1
         \fi:
       \else:
-        \@@_output_byte:n { `#1 + \c_@@_ascii_lower_int }
+        \@@_output_byte:n { `#1 + `a - `A }
       \fi:
     \fi:
     \@@_convert_lowercase_alphanum_loop:N

Modified: trunk/l3experimental/l3str/testfiles/m3regex001.lvt
===================================================================
--- trunk/l3experimental/l3str/testfiles/m3regex001.lvt	2017-04-12 17:46:46 UTC (rev 7076)
+++ trunk/l3experimental/l3str/testfiles/m3regex001.lvt	2017-04-12 18:15:50 UTC (rev 7077)
@@ -40,7 +40,7 @@
     \int_compare:nNnTF { `#1 } < { `\  }
       {
         \int_compare:nNnTF { `#1 } = { 10 }
-          {#1} { ^ ^ \__str_output_byte:n { `#1 + `@ } }
+          {#1} { ^ ^ \char_generate:nn { `#1 + `@ } { 12 } }
       }
       { \int_compare:nNnTF { `#1 } = { 127 } { ^ ^ ? } {#1} }
   }
@@ -80,8 +80,9 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \OMIT
 \tl_new:N \g_ascii_chars_str
+\cs_set:Npn \test_tmp:n #1 { \char_generate:nn {#1} { 12 } }
 \tl_gset:Nx \g_ascii_chars_str
-  { \int_step_function:nnnN { 0 } { 1 } { 127 } \__str_output_byte:n }
+  { \int_step_function:nnnN { 0 } { 1 } { 127 } \test_tmp:n }
 \tl_greplace_once:Nnn \g_ascii_chars_str { ' } { ' ^^J }
 \tl_greplace_once:Nnn \g_ascii_chars_str { ` } { ^^J ` }
 % We add a couple line-breaks to avoid over-long lines