[latex3-commits] [l3svn] r7077 - Remove l3regex dependence on l3str-convert
noreply at latex-project.org
noreply at latex-project.org
Wed Apr 12 20:15:52 CEST 2017
Author: bruno
Date: 2017-04-12 20:15:50 +0200 (Wed, 12 Apr 2017)
New Revision: 7077
Modified:
trunk/l3experimental/l3str/l3regex.dtx
trunk/l3experimental/l3str/l3str-convert.dtx
trunk/l3experimental/l3str/testfiles/m3regex001.lvt
Log:
Remove l3regex dependence on l3str-convert
That requires to have a copy of a helper in both packages, but
decoupling the two packages is well worth that.
Modified: trunk/l3experimental/l3str/l3regex.dtx
===================================================================
--- trunk/l3experimental/l3str/l3regex.dtx 2017-04-12 17:46:46 UTC (rev 7076)
+++ trunk/l3experimental/l3str/l3regex.dtx 2017-04-12 18:15:50 UTC (rev 7077)
@@ -736,7 +736,7 @@
%<*package>
\ProvidesExplPackage{l3regex}{2017/04/01}{}
{L3 Experimental regular expressions}
-\RequirePackage{l3tl-build, l3tl-analysis, l3str-convert}
+\RequirePackage{l3tl-build, l3tl-analysis}
%</package>
% \end{macrocode}
%
@@ -918,6 +918,20 @@
%
% \subsubsection{Testing characters}
%
+% \begin{macro}{\c_@@_ascii_min_int, \c_@@_ascii_max_control_int, \c_@@_ascii_max_int}
+% \begin{macrocode}
+\int_const:Nn \c_@@_ascii_min_int { 0 }
+\int_const:Nn \c_@@_ascii_max_control_int { 31 }
+\int_const:Nn \c_@@_ascii_max_int { 127 }
+% \end{macrocode}
+% \end{macro}
+%
+% \begin{variable}{\c_@@_ascii_lower_int}
+% \begin{macrocode}
+\int_const:Nn \c_@@_ascii_lower_int { `a - `A }
+% \end{macrocode}
+% \end{variable}
+%
% \begin{macro}[int]{\@@_break_point:TF}
% \begin{macro}[int]{\@@_break_true:w}
% When testing whether a character of the query token list matches
@@ -1027,12 +1041,12 @@
\if_int_compare:w \l_@@_current_char_int > `Z \exp_stop_f:
\if_int_compare:w \l_@@_current_char_int > `z \exp_stop_f: \else:
\if_int_compare:w \l_@@_current_char_int < `a \exp_stop_f: \else:
- \int_sub:Nn \l_@@_case_changed_char_int { \c__str_ascii_lower_int }
+ \int_sub:Nn \l_@@_case_changed_char_int { \c_@@_ascii_lower_int }
\fi:
\fi:
\else:
\if_int_compare:w \l_@@_current_char_int < `A \exp_stop_f: \else:
- \int_add:Nn \l_@@_case_changed_char_int { \c__str_ascii_lower_int }
+ \int_add:Nn \l_@@_case_changed_char_int { \c_@@_ascii_lower_int }
\fi:
\fi:
}
@@ -1214,16 +1228,16 @@
\cs_new_protected:Npn \@@_posix_ascii:
{
\@@_item_caseful_range:nn
- \c__str_ascii_min_int
- \c__str_ascii_max_int
+ \c_@@_ascii_min_int
+ \c_@@_ascii_max_int
}
\cs_new_eq:NN \@@_posix_blank: \@@_prop_h:
\cs_new_protected:Npn \@@_posix_cntrl:
{
\@@_item_caseful_range:nn
- \c__str_ascii_min_int
- \c__str_ascii_max_control_int
- \@@_item_caseful_equal:n \c__str_ascii_max_int
+ \c_@@_ascii_min_int
+ \c_@@_ascii_max_control_int
+ \@@_item_caseful_equal:n \c_@@_ascii_max_int
}
\cs_new_eq:NN \@@_posix_digit: \@@_prop_d:
\cs_new_protected:Npn \@@_posix_graph:
@@ -1302,7 +1316,7 @@
\cs_set:Npn \@@_escape_escaped:N ##1 { #2 }
\cs_set:Npn \@@_escape_raw:N ##1 { #3 }
\int_set:Nn \tex_escapechar:D { `\\ }
- \__str_gset_other:Nn \g_@@_internal_tl { #4 }
+ \tl_gset:Nx \g_@@_internal_tl { \__str_to_other_fast:n {#4} }
\tl_set:Nx \l_@@_internal_b_tl
{
\exp_after:wN \@@_escape_loop:N \g_@@_internal_tl
@@ -1389,12 +1403,11 @@
%
% \begin{macro}[aux]{\@@_escape_/x:w}
% \begin{macro}[aux]{\@@_escape_x_end:w, \@@_escape_x_large:n}
-% When |\x| is encountered, \cs{@@_escape_x_test:N} is responsible
-% for grabbing some hexadecimal digits, and feeding the result to
-% \cs{@@_escape_x_end:w}. If the number is $<256$, then it is
-% turned into a byte and fed to \cs{@@_escape_raw:N}. Otherwise,
-% interrupt the assignment, and either produce an error, or use a
-% standard \tn{lowercase} trick depending on the precise value.
+% When |\x| is encountered, \cs{@@_escape_x_test:N} is responsible for
+% grabbing some hexadecimal digits, and feeding the result to
+% \cs{@@_escape_x_end:w}. If the number is too big interrupt the
+% assignment and produce an error, otherwise call \cs{@@_escape_raw:N}
+% on the corresponding character token.
% \begin{macrocode}
\cs_new:cpn { @@_escape_/x:w } \@@_escape_loop:N
{
@@ -1403,40 +1416,23 @@
}
\cs_new:Npn \@@_escape_x_end:w #1 ;
{
- \int_compare:nNnTF {#1} > \c__str_max_byte_int
- { \@@_escape_x_large:n {#1} }
+ \int_compare:nNnTF {#1} > \c_max_char_int
{
+ \if_false: { \fi: }
+ \__tl_build_one:o \l_@@_internal_b_tl
+ \__msg_kernel_error:nnx { regex } { x-overflow } {#1}
+ \tl_set:Nx \l_@@_internal_b_tl
+ { \if_false: } \fi:
+ }
+ {
\exp_last_unbraced:Nf \@@_escape_raw:N
- { \__str_output_byte:n {#1} }
+ { \char_generate:nn {#1} { 12 } }
}
}
-\group_begin:
- \char_set_catcode_other:n { 0 }
- \cs_new:Npn \@@_escape_x_large:n #1
- {
- \if_false: { \fi: }
- \__tl_build_one:o \l_@@_internal_b_tl
- \int_compare:nNnTF {#1} > \c_max_char_int
- {
- \__msg_kernel_error:nnx { regex } { x-overflow } {#1}
- \tl_set:Nx \l_@@_internal_b_tl
- { \if_false: } \fi:
- }
- {
- \char_set_lccode:nn { 0 } {#1}
- \tex_lowercase:D
- {
- \tl_set:Nx \l_@@_internal_b_tl
- { \if_false: } \fi:
- \@@_escape_raw:N ^^@
- }
- }
- }
-\group_end:
% \end{macrocode}
% \end{macro}
% \end{macro}
-% ^^A todo: use char_generate in above and rest of file
+% ^^A todo: use char_generate in the rest of the file
% \begin{macro}[aux]{\@@_escape_x_test:N, \@@_escape_x_testii:N}
% Find out whether the first character is a left brace (allowing any
% number of hexadecimal digits), or not (allowing up to two
@@ -1461,7 +1457,7 @@
\if_charcode:w \c_left_brace_str #1
\exp_after:wN \@@_escape_x_loop:N
\else:
- \__str_hexadecimal_use:NTF #1
+ \@@_hexadecimal_use:NTF #1
{ \exp_after:wN \@@_escape_x:N }
{ ; \exp_after:wN \@@_escape_loop:N \exp_after:wN #1 }
\fi:
@@ -1476,7 +1472,7 @@
{
\str_if_eq_x:nnTF {#1} { break } { ; }
{
- \__str_hexadecimal_use:NTF #1
+ \@@_hexadecimal_use:NTF #1
{ ; \@@_escape_loop:N }
{ ; \@@_escape_loop:N #1 }
}
@@ -1494,7 +1490,7 @@
\str_if_eq_x:nnTF {#1} { break }
{ ; \@@_escape_x_loop_error:n { } {#1} }
{
- \__str_hexadecimal_use:NTF #1
+ \@@_hexadecimal_use:NTF #1
{ \@@_escape_x_loop:N }
{
\token_if_eq_charcode:NNTF \c_space_token #1
@@ -1520,6 +1516,35 @@
% \end{macrocode}
% \end{macro}
%
+% \begin{macro}[aux, rEXP]{\@@_hexadecimal_use:NTF}
+% \TeX{} detects uppercase hexadecimal digits for us but not the
+% lowercase letters, which we need to detect and replace by their
+% uppercase counterpart.
+% \begin{macrocode}
+\prg_new_conditional:Npnn \@@_hexadecimal_use:N #1 { TF }
+ {
+ \if_int_compare:w 1 < "1 \token_to_str:N #1 \exp_stop_f:
+ #1 \prg_return_true:
+ \else:
+ \if_case:w \__int_eval:w
+ \exp_after:wN ` \token_to_str:N #1 - `a
+ \__int_eval_end:
+ A
+ \or: B
+ \or: C
+ \or: D
+ \or: E
+ \or: F
+ \else:
+ \prg_return_false:
+ \exp_after:wN \use_none:n
+ \fi:
+ \prg_return_true:
+ \fi:
+ }
+% \end{macrocode}
+% \end{macro}
+%
% \begin{macro}[EXP, aux]
% {\@@_char_if_alphanumeric:NTF, \@@_char_if_special:NTF}
% These two tests are used in the first pass when parsing a regular
@@ -1549,7 +1574,7 @@
{
\if_int_compare:w `#1 > `Z \exp_stop_f:
\if_int_compare:w `#1 > `z \exp_stop_f:
- \if_int_compare:w `#1 < \c__str_ascii_max_int
+ \if_int_compare:w `#1 < \c_@@_ascii_max_int
\prg_return_true: \else: \prg_return_false: \fi:
\else:
\if_int_compare:w `#1 < `a \exp_stop_f:
@@ -3105,8 +3130,8 @@
% \begin{macrocode}
\cs_new_protected:Npn \@@_compile_u_in_cs:
{
- \exp_args:NNo \__str_gset_other:Nn \g_@@_internal_tl
- { \l_@@_internal_a_tl }
+ \tl_gset:Nx \g_@@_internal_tl
+ { \exp_args:No \__str_to_other_fast:n { \l_@@_internal_a_tl } }
\__tl_build_one:x
{
\tl_map_function:NN \g_@@_internal_tl
Modified: trunk/l3experimental/l3str/l3str-convert.dtx
===================================================================
--- trunk/l3experimental/l3str/l3str-convert.dtx 2017-04-12 17:46:46 UTC (rev 7076)
+++ trunk/l3experimental/l3str/l3str-convert.dtx 2017-04-12 18:15:50 UTC (rev 7077)
@@ -192,15 +192,6 @@
%
% \section{Internal string functions}
%
-% \begin{function}{\__str_gset_other:Nn}
-% \begin{syntax}
-% \cs{__str_gset_other:Nn} \meta{tl~var} \Arg{token list}
-% \end{syntax}
-% Converts the \meta{token list} to an \meta{other string}, where
-% spaces have category code \enquote{other}, and assigns the result to
-% the \meta{tl~var}, globally.
-% \end{function}
-%
% \begin{function}{\__str_hexadecimal_use:NTF}
% \begin{syntax}
% \cs{__str_hexadecimal_use:NTF} \meta{token} \Arg{true code} \Arg{false code}
@@ -216,17 +207,6 @@
% \end{texnote}
% \end{function}
%
-% \begin{function}[EXP]{\__str_output_byte:n}
-% \begin{syntax}
-% \cs{__str_output_byte:n} \Arg{intexpr}
-% \end{syntax}
-% Expands to a character token with category other and character code
-% equal to the value of \meta{intexpr}. The value of \meta{intexpr}
-% must be in the range $[-1, 255]$, and any value outside this range
-% results in undefined behaviour. The special value $-1$ is used to
-% produce an empty result.
-% \end{function}
-%
% \section{Possibilities, and things to do}
%
% Encoding/escaping-related tasks.
@@ -330,20 +310,6 @@
% \end{macrocode}
% \end{variable}
%
-% \begin{macro}{\c_@@_ascii_min_int, \c_@@_ascii_max_control_int, \c_@@_ascii_max_int}
-% \begin{macrocode}
-\int_const:Nn \c_@@_ascii_min_int { 0 }
-\int_const:Nn \c_@@_ascii_max_control_int { 31 }
-\int_const:Nn \c_@@_ascii_max_int { 127 }
-% \end{macrocode}
-% \end{macro}
-%
-% \begin{macro}[aux]{\c_@@_ascii_lower_int}
-% \begin{macrocode}
-\int_const:Nn \c_@@_ascii_lower_int { `a - `A }
-% \end{macrocode}
-% \end{macro}
-%
% \begin{variable}{\g_@@_alias_prop}
% To avoid needing one file per encoding/escaping alias, we keep track
% of those in a property list.
@@ -387,48 +353,6 @@
% \end{macrocode}
% \end{variable}
%
-% \subsubsection{Escaping spaces}
-% ^^A todo: use char_generate in this file
-% \begin{macro}[int]{\@@_gset_other:Nn}
-% \begin{macro}[aux,EXP]{\@@_gset_other_loop:w}
-% \begin{macro}[aux,EXP]{\@@_gset_other_end:w}
-% This function could be done by using \cs{@@_to_other:n} within
-% an \texttt{x}-expansion, but that would take a time quadratic in the
-% size of the string. Instead, we can \enquote{leave the result behind
-% us} in the input stream, to be captured into the expanding
-% assignment. This gives us a linear time.
-% \begin{macrocode}
-\group_begin:
-\char_set_lccode:nn { `\* } { `\ }
-\char_set_lccode:nn { `\A } { `\A }
-\tex_lowercase:D
- {
- \group_end:
- \cs_new_protected:Npn \@@_gset_other:Nn #1#2
- {
- \tl_gset:Nx #1
- {
- \exp_after:wN \@@_gset_other_loop:w \tl_to_str:n {#2} ~ %
- A ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ A ~ \q_stop
- }
- }
- \cs_new:Npn \@@_gset_other_loop:w
- #1 ~ #2 ~ #3 ~ #4 ~ #5 ~ #6 ~ #7 ~ #8 ~ #9 ~
- {
- \if_meaning:w A #9
- \@@_gset_other_end:w
- \fi:
- #1 * #2 * #3 * #4 * #5 * #6 * #7 * #8 * #9
- \@@_gset_other_loop:w *
- }
- \cs_new:Npn \@@_gset_other_end:w \fi: #1 * A #2 \q_stop
- { \fi: #1 }
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-%
% \subsection{String conditionals}
%
% \begin{macro}[EXP]{\@@_if_contains_char:NNT, \@@_if_contains_char:NNTF}
@@ -544,12 +468,12 @@
% an empty result for the input $-1$.
% \begin{macrocode}
\group_begin:
- \tl_set:Nx \l__str_internal_tl { \tl_to_str:n { 0123456789ABCDEF } }
- \tl_map_inline:Nn \l__str_internal_tl
+ \tl_set:Nx \l_@@_internal_tl { \tl_to_str:n { 0123456789ABCDEF } }
+ \tl_map_inline:Nn \l_@@_internal_tl
{
- \tl_map_inline:Nn \l__str_internal_tl
+ \tl_map_inline:Nn \l_@@_internal_tl
{
- \tl_const:cx { c__str_byte_ \int_eval:n {"#1##1} _tl }
+ \tl_const:cx { c_@@_byte_ \int_eval:n {"#1##1} _tl }
{ \char_generate:nn { "#1##1 } { 12 } #1 ##1 }
}
}
@@ -783,7 +707,7 @@
{
\group_begin:
#1
- \@@_gset_other:Nn \g_@@_result_tl {#4}
+ \tl_gset:Nx \g_@@_result_tl { \@@_to_other_fast:n {#4} }
\exp_after:wN \@@_convert:wwwnn
\tl_to_str:n {#5} /// \q_stop
{ decode } { unescape }
@@ -951,7 +875,7 @@
#1
\fi:
\else:
- \@@_output_byte:n { `#1 + \c_@@_ascii_lower_int }
+ \@@_output_byte:n { `#1 + `a - `A }
\fi:
\fi:
\@@_convert_lowercase_alphanum_loop:N
Modified: trunk/l3experimental/l3str/testfiles/m3regex001.lvt
===================================================================
--- trunk/l3experimental/l3str/testfiles/m3regex001.lvt 2017-04-12 17:46:46 UTC (rev 7076)
+++ trunk/l3experimental/l3str/testfiles/m3regex001.lvt 2017-04-12 18:15:50 UTC (rev 7077)
@@ -40,7 +40,7 @@
\int_compare:nNnTF { `#1 } < { `\ }
{
\int_compare:nNnTF { `#1 } = { 10 }
- {#1} { ^ ^ \__str_output_byte:n { `#1 + `@ } }
+ {#1} { ^ ^ \char_generate:nn { `#1 + `@ } { 12 } }
}
{ \int_compare:nNnTF { `#1 } = { 127 } { ^ ^ ? } {#1} }
}
@@ -80,8 +80,9 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\OMIT
\tl_new:N \g_ascii_chars_str
+\cs_set:Npn \test_tmp:n #1 { \char_generate:nn {#1} { 12 } }
\tl_gset:Nx \g_ascii_chars_str
- { \int_step_function:nnnN { 0 } { 1 } { 127 } \__str_output_byte:n }
+ { \int_step_function:nnnN { 0 } { 1 } { 127 } \test_tmp:n }
\tl_greplace_once:Nnn \g_ascii_chars_str { ' } { ' ^^J }
\tl_greplace_once:Nnn \g_ascii_chars_str { ` } { ^^J ` }
% We add a couple line-breaks to avoid over-long lines
More information about the latex3-commits
mailing list