[latex3-commits] [git/LaTeX3-latex3-latex3] master: Pre-load Unicode encodings and escape approaches for str convert (28262bc)

Joseph Wright joseph.wright at morningstar2.co.uk
Sun Jun 2 17:11:38 CEST 2019


Repository : https://github.com/latex3/latex3
On branch  : master
Link       : https://github.com/latex3/latex3/commit/28262bccb4524b611bd93a1b597c4a513ba2a8ac

>---------------------------------------------------------------

commit 28262bccb4524b611bd93a1b597c4a513ba2a8ac
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Sun Jun 2 16:09:09 2019 +0100

    Pre-load Unicode encodings and escape approaches for str convert
    
    In advance of slipstreaming into the kernel.


>---------------------------------------------------------------

28262bccb4524b611bd93a1b597c4a513ba2a8ac
 l3experimental/l3str/l3str-convert.dtx             |   42 ++-------
 l3experimental/l3str/l3str.ins                     |   15 ----
 .../l3str/testfiles/m3str-convert002.luatex.tlg    |    7 +-
 .../l3str/testfiles/m3str-convert002.tlg           |    7 +-
 .../l3str/testfiles/m3str-convert002.xetex.tlg     |    7 +-
 .../l3str/testfiles/m3str-convert004.lvt           |    7 --
 .../l3str/testfiles/m3str-convert004.tlg           |   90 --------------------
 7 files changed, 9 insertions(+), 166 deletions(-)

diff --git a/l3experimental/l3str/l3str-convert.dtx b/l3experimental/l3str/l3str-convert.dtx
index fb21877..8a7b937 100644
--- a/l3experimental/l3str/l3str-convert.dtx
+++ b/l3experimental/l3str/l3str-convert.dtx
@@ -1299,11 +1299,7 @@
   }
 %    \end{macrocode}
 %
-%    \begin{macrocode}
-%</initex|package>
-%    \end{macrocode}
-%
-% \subsection{Escaping definition files}
+% \subsection{Escaping definitions}
 %
 % Several of those encodings are defined by the pdf file format.  The
 % following byte storage methods are defined:
@@ -1330,7 +1326,6 @@
 %   hexadecimal digits gets |0| appended to it: this is equivalent to
 %   appending a |0| in all cases, and dropping it if it is alone.
 %    \begin{macrocode}
-%<*hex>
 \cs_new_protected:Npn \@@_convert_unescape_hex:
   {
     \group_begin:
@@ -1377,7 +1372,6 @@
     Some~characters~in~the~string~you~asked~to~convert~are~not~
     hexadecimal~digits~(0-9,~A-F,~a-f)~nor~spaces.
   }
-%</hex>
 %    \end{macrocode}
 % \end{macro}
 % \end{macro}
@@ -1408,7 +1402,6 @@
 %   flag, and call the looping function followed by the two characters
 %   (remove \cs{use_i:nnn}).
 %    \begin{macrocode}
-%<*name|url>
 \cs_set_protected:Npn \@@_tmp:w #1#2#3
   {
     \cs_new_protected:cpn { @@_convert_unescape_#2: }
@@ -1455,15 +1448,10 @@
         two~hexadecimal~digits.~This~is~invalid~in~the~escaping~'#2'.
       }
   }
-%</name|url>
-%<*name>
 \exp_after:wN \@@_tmp:w \c_hash_str { name }
   \@@_unescape_name_loop:wNN
-%</name>
-%<*url>
 \exp_after:wN \@@_tmp:w \c_percent_str { url }
   \@@_unescape_url_loop:wNN
-%</url>
 %    \end{macrocode}
 % \end{macro}
 % \end{macro}
@@ -1497,7 +1485,6 @@
 %   end-of-line are ignored. If followed by anything else, the backslash
 %   is ignored, raising the error flag.
 %    \begin{macrocode}
-%<*string>
 \group_begin:
   \char_set_catcode_other:N \^^J
   \char_set_catcode_other:N \^^M
@@ -1586,7 +1573,6 @@
       of~a~line.
     }
 \group_end:
-%</string>
 %    \end{macrocode}
 % \end{macro}
 % \end{macro}
@@ -1602,12 +1588,10 @@
 % \begin{macro}[rEXP]{\@@_escape_hex_char:N}
 %   Loop and convert each byte to hexadecimal.
 %    \begin{macrocode}
-%<*hex>
 \cs_new_protected:Npn \@@_convert_escape_hex:
   { \@@_convert_gmap:N \@@_escape_hex_char:N }
 \cs_new:Npn \@@_escape_hex_char:N #1
   { \@@_output_hexadecimal:n { `#1 } }
-%</hex>
 %    \end{macrocode}
 % \end{macro}
 % \end{macro}
@@ -1624,7 +1608,6 @@
 %   hash-encoded, and characters in the \cs{c_@@_escape_name_str} are
 %   encoded.
 %    \begin{macrocode}
-%<*name>
 \str_const:Nn \c_@@_escape_name_not_str { ! " $ & ' } %$
 \str_const:Nn \c_@@_escape_name_str { {}/<>[] }
 \cs_new_protected:Npn \@@_convert_escape_name:
@@ -1648,7 +1631,6 @@
       \fi:
     \fi:
   }
-%</name>
 %    \end{macrocode}
 % \end{variable}
 % \end{variable}
@@ -1664,7 +1646,6 @@
 %   (and including) \texttt{del}, are converted to octal.  One backslash
 %   is added before each parenthesis and backslash.
 %    \begin{macrocode}
-%<*string>
 \str_const:Nx \c_@@_escape_string_str
   { \c_backslash_str ( ) }
 \cs_new_protected:Npn \@@_convert_escape_string:
@@ -1697,7 +1678,6 @@
       \fi:
     \fi:
   }
-%</string>
 %    \end{macrocode}
 % \end{variable}
 % \end{macro}
@@ -1710,7 +1690,6 @@
 %   This function is similar to \cs{@@_convert_escape_name:}, escaping
 %   different characters.
 %    \begin{macrocode}
-%<*url>
 \cs_new_protected:Npn \@@_convert_escape_url:
   { \@@_convert_gmap:N \@@_escape_url_char:N }
 \cs_new:Npn \@@_escape_url_char:N #1
@@ -1732,13 +1711,12 @@
       \fi:
     \fi:
   }
-%</url>
 %    \end{macrocode}
 % \end{macro}
 % \end{macro}
 % \end{macro}
 %
-% \subsection{Encoding definition files}
+% \subsection{Encoding definitions}
 %
 % The \texttt{native} encoding is automatically defined. Other encodings
 % are loaded as needed. The following encodings are supported:
@@ -1752,10 +1730,6 @@
 %
 % \subsubsection{\textsc{utf-8} support}
 %
-%    \begin{macrocode}
-%<*utf8>
-%    \end{macrocode}
-%
 % \begin{macro}{\@@_convert_encode_utf8:}
 % \begin{macro}[rEXP]{\@@_encode_utf_viii_char:n}
 % \begin{macro}[rEXP]{\@@_encode_utf_viii_loop:wwnnw}
@@ -2070,16 +2044,11 @@
 % \end{macro}
 % \end{macro}
 %
-%    \begin{macrocode}
-%</utf8>
-%    \end{macrocode}
-%
 % \subsubsection{\textsc{utf-16} support}
 %
 % The definitions are done in a category code regime where the bytes
 % $254$ and $255$ used by the byte order mark have catcode~$12$.
 %    \begin{macrocode}
-%<*utf16>
 \group_begin:
   \char_set_catcode_other:N \^^fe
   \char_set_catcode_other:N \^^ff
@@ -2398,7 +2367,6 @@
 % Restore the original catcodes of bytes $254$ and $255$.
 %    \begin{macrocode}
 \group_end:
-%</utf16>
 %    \end{macrocode}
 %
 % \subsubsection{\textsc{utf-32} support}
@@ -2407,7 +2375,6 @@
 % $0$, $254$ and $255$ used by the byte order mark have catcode
 % \enquote{other}.
 %    \begin{macrocode}
-%<*utf32>
 \group_begin:
   \char_set_catcode_other:N \^^00
   \char_set_catcode_other:N \^^fe
@@ -2614,7 +2581,10 @@
 % Restore the original catcodes of bytes $0$, $254$ and $255$.
 %    \begin{macrocode}
 \group_end:
-%</utf32>
+%    \end{macrocode}
+%
+%    \begin{macrocode}
+%</initex|package>
 %    \end{macrocode}
 %
 % \subsubsection{\textsc{iso 8859} support}
diff --git a/l3experimental/l3str/l3str.ins b/l3experimental/l3str/l3str.ins
index 959deb3..1ce1399 100644
--- a/l3experimental/l3str/l3str.ins
+++ b/l3experimental/l3str/l3str.ins
@@ -56,21 +56,6 @@ and all files in that bundle must be distributed together.
 \generate{\file{l3str-convert.sty}  {\from{l3str-convert.dtx}  {package}}}
 \generate{\file{l3str-format.sty}   {\from{l3str-format.dtx}   {package}}}
 
-% Escapings.
-\generate{%
-  \file{l3str-esc-hex.def}           {\from{l3str-convert.dtx}{hex}}%
-  \file{l3str-esc-name.def}          {\from{l3str-convert.dtx}{name}}%
-  \file{l3str-esc-string.def}        {\from{l3str-convert.dtx}{string}}%
-  \file{l3str-esc-url.def}           {\from{l3str-convert.dtx}{url}}%
-}
-
-% UTF encodings.
-\generate{%
-  \file{l3str-enc-utf8.def}   {\from{l3str-convert.dtx}{utf8}}%
-  \file{l3str-enc-utf16.def}  {\from{l3str-convert.dtx}{utf16}}%
-  \file{l3str-enc-utf32.def}  {\from{l3str-convert.dtx}{utf32}}%
-}
-
 % ISO-8859 encodings.
 \generate{%
   \file{l3str-enc-iso88591.def}  {\from{l3str-convert.dtx}{iso88591}}%
diff --git a/l3experimental/l3str/testfiles/m3str-convert002.luatex.tlg b/l3experimental/l3str/testfiles/m3str-convert002.luatex.tlg
index 5fa1760..42c66ae 100644
--- a/l3experimental/l3str/testfiles/m3str-convert002.luatex.tlg
+++ b/l3experimental/l3str/testfiles/m3str-convert002.luatex.tlg
@@ -4,7 +4,7 @@ Author: Bruno Le Floch
 ============================================================
 TEST 1: Escaping hex
 ============================================================
-(l3str-enc-iso88591.def) (l3str-esc-hex.def)
+(l3str-enc-iso88591.def)
 000102030405060708090A0B0C0D0E0F
 TRUE
 101112131415161718191A1B1C1D1E1F
@@ -41,7 +41,6 @@ TRUE
 ============================================================
 TEST 2: Escaping name
 ============================================================
-(l3str-esc-name.def)
 #00#01#02#03#04#05#06#07#08#09#0A#0B#0C#0D#0E#0F
 TRUE
 #10#11#12#13#14#15#16#17#18#19#1A#1B#1C#1D#1E#1F
@@ -78,7 +77,6 @@ TRUE
 ============================================================
 TEST 3: Escaping string
 ============================================================
-(l3str-esc-string.def)
 \000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017
 TRUE
 \020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037
@@ -115,7 +113,6 @@ TRUE
 ============================================================
 TEST 4: Escaping url
 ============================================================
-(l3str-esc-url.def)
 %00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F
 TRUE
 %10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F
@@ -152,7 +149,6 @@ TRUE
 ============================================================
 TEST 5: From utf8 to native
 ============================================================
-(l3str-enc-utf8.def)
 TRUE
 TRUE
 ============================================================
@@ -164,7 +160,6 @@ TRUE
 ============================================================
 TEST 7: misc
 ============================================================
-(l3str-enc-utf16.def)
 TRUE
 ============================================================
 ============================================================
diff --git a/l3experimental/l3str/testfiles/m3str-convert002.tlg b/l3experimental/l3str/testfiles/m3str-convert002.tlg
index 7c1b2d7..9bb0ba6 100644
--- a/l3experimental/l3str/testfiles/m3str-convert002.tlg
+++ b/l3experimental/l3str/testfiles/m3str-convert002.tlg
@@ -4,7 +4,7 @@ Author: Bruno Le Floch
 ============================================================
 TEST 1: Escaping hex
 ============================================================
-(l3str-enc-iso88591.def) (l3str-esc-hex.def)
+(l3str-enc-iso88591.def)
 000102030405060708090A0B0C0D0E0F
 TRUE
 101112131415161718191A1B1C1D1E1F
@@ -41,7 +41,6 @@ TRUE
 ============================================================
 TEST 2: Escaping name
 ============================================================
-(l3str-esc-name.def)
 #00#01#02#03#04#05#06#07#08#09#0A#0B#0C#0D#0E#0F
 TRUE
 #10#11#12#13#14#15#16#17#18#19#1A#1B#1C#1D#1E#1F
@@ -78,7 +77,6 @@ TRUE
 ============================================================
 TEST 3: Escaping string
 ============================================================
-(l3str-esc-string.def)
 \000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017
 TRUE
 \020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037
@@ -115,7 +113,6 @@ TRUE
 ============================================================
 TEST 4: Escaping url
 ============================================================
-(l3str-esc-url.def)
 %00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F
 TRUE
 %10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F
@@ -152,7 +149,6 @@ TRUE
 ============================================================
 TEST 5: From utf8 to native
 ============================================================
-(l3str-enc-utf8.def)
 TRUE
 ! LaTeX3 Error: Character code too large for this engine.
 For immediate help type H <return>.
@@ -170,7 +166,6 @@ TRUE
 ============================================================
 TEST 7: misc
 ============================================================
-(l3str-enc-utf16.def)
 TRUE
 ============================================================
 ============================================================
diff --git a/l3experimental/l3str/testfiles/m3str-convert002.xetex.tlg b/l3experimental/l3str/testfiles/m3str-convert002.xetex.tlg
index 5fa1760..42c66ae 100644
--- a/l3experimental/l3str/testfiles/m3str-convert002.xetex.tlg
+++ b/l3experimental/l3str/testfiles/m3str-convert002.xetex.tlg
@@ -4,7 +4,7 @@ Author: Bruno Le Floch
 ============================================================
 TEST 1: Escaping hex
 ============================================================
-(l3str-enc-iso88591.def) (l3str-esc-hex.def)
+(l3str-enc-iso88591.def)
 000102030405060708090A0B0C0D0E0F
 TRUE
 101112131415161718191A1B1C1D1E1F
@@ -41,7 +41,6 @@ TRUE
 ============================================================
 TEST 2: Escaping name
 ============================================================
-(l3str-esc-name.def)
 #00#01#02#03#04#05#06#07#08#09#0A#0B#0C#0D#0E#0F
 TRUE
 #10#11#12#13#14#15#16#17#18#19#1A#1B#1C#1D#1E#1F
@@ -78,7 +77,6 @@ TRUE
 ============================================================
 TEST 3: Escaping string
 ============================================================
-(l3str-esc-string.def)
 \000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017
 TRUE
 \020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037
@@ -115,7 +113,6 @@ TRUE
 ============================================================
 TEST 4: Escaping url
 ============================================================
-(l3str-esc-url.def)
 %00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F
 TRUE
 %10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F
@@ -152,7 +149,6 @@ TRUE
 ============================================================
 TEST 5: From utf8 to native
 ============================================================
-(l3str-enc-utf8.def)
 TRUE
 TRUE
 ============================================================
@@ -164,7 +160,6 @@ TRUE
 ============================================================
 TEST 7: misc
 ============================================================
-(l3str-enc-utf16.def)
 TRUE
 ============================================================
 ============================================================
diff --git a/l3experimental/l3str/testfiles/m3str-convert004.lvt b/l3experimental/l3str/testfiles/m3str-convert004.lvt
index 4088205..e4da9cd 100644
--- a/l3experimental/l3str/testfiles/m3str-convert004.lvt
+++ b/l3experimental/l3str/testfiles/m3str-convert004.lvt
@@ -25,14 +25,7 @@
         { \char_set_catcode_invalid:n {#1} }
       \__str_load_catcodes:
       \clist_map_inline:nn
-        { hex , name , string , url }
         {
-          \tex_message:D { \iow_newline: }
-          \file_input:n { l3str-esc-#1.def }
-        }
-      \clist_map_inline:nn
-        {
-          utf8 , utf16 , utf32 ,
           iso 8859 1 , iso 8859 2 , iso 8859 3 , iso 8859 4 , iso 8859 5 ,
           iso 8859 6 , iso 8859 7 , iso 8859 8 , iso 8859 9 , iso 8859 10 ,
           iso 8859 11 , iso 8859 13 , iso 8859 14 , iso 8859 15 , iso 8859 16 ,
diff --git a/l3experimental/l3str/testfiles/m3str-convert004.tlg b/l3experimental/l3str/testfiles/m3str-convert004.tlg
index 512bf21..03cf524 100644
--- a/l3experimental/l3str/testfiles/m3str-convert004.tlg
+++ b/l3experimental/l3str/testfiles/m3str-convert004.tlg
@@ -4,96 +4,6 @@ Author: Bruno Le Floch
 ============================================================
 TEST 1: Load all definition files
 ============================================================
-(l3str-esc-hex.def
-Defining \__str_convert_unescape_hex: on line ...
-Defining \__str_unescape_hex_auxi:N on line ...
-Defining \__str_unescape_hex_auxii:N on line ...
-Defining message LaTeX/str/unescape-hex on line ...
-Defining \__str_convert_escape_hex: on line ...
-Defining \__str_escape_hex_char:N on line ...
-) 
-(l3str-esc-name.def
-Defining \__str_convert_unescape_name: on line ...
-Defining \__str_unescape_name_loop:wNN on line ...
-Defining message LaTeX/str/unescape-name on line ...
-Defining \c__str_escape_name_not_str on line ...
-Defining \c__str_escape_name_str on line ...
-Defining \__str_convert_escape_name: on line ...
-Defining \__str_escape_name_char:N on line ...
-Defining \__str_if_escape_name:NTF on line ...
-) 
-(l3str-esc-string.def
-Defining \__str_convert_unescape_string: on line ...
-Defining \__str_unescape_string_loop:wNNN on line ...
-Defining \__str_unescape_string_repeat:NNNNNN on line ...
-Defining \__str_unescape_string_newlines:wN on line ...
-Defining message LaTeX/str/unescape-string on line ...
-Defining \c__str_escape_string_str on line ...
-Defining \__str_convert_escape_string: on line ...
-Defining \__str_escape_string_char:N on line ...
-Defining \__str_if_escape_string:NTF on line ...
-) 
-(l3str-esc-url.def
-Defining \__str_convert_unescape_url: on line ...
-Defining \__str_unescape_url_loop:wNN on line ...
-Defining message LaTeX/str/unescape-url on line ...
-Defining \__str_convert_escape_url: on line ...
-Defining \__str_escape_url_char:N on line ...
-Defining \__str_if_escape_url:NTF on line ...
-) 
-(l3str-enc-utf8.def
-Defining \__str_convert_encode_utf8: on line ...
-Defining \__str_encode_utf_viii_char:n on line ...
-Defining \__str_encode_utf_viii_loop:wwnnw on line ...
-Defining \flag str_missing on line ...
-Defining \flag str_extra on line ...
-Defining \flag str_overlong on line ...
-Defining \flag str_overflow on line ...
-Defining message LaTeX/str/utf8-decode on line ...
-Defining \__str_convert_decode_utf8: on line ...
-Defining \__str_decode_utf_viii_start:N on line ...
-Defining \__str_decode_utf_viii_continuation:wwN on line ...
-Defining \__str_decode_utf_viii_aux:wNnnwN on line ...
-Defining \__str_decode_utf_viii_overflow:w on line ...
-Defining \__str_decode_utf_viii_end: on line ...
-) 
-(l3str-enc-utf16.def
-Defining \__str_convert_encode_utf16: on line ...
-Defining \__str_convert_encode_utf16be: on line ...
-Defining \__str_convert_encode_utf16le: on line ...
-Defining \__str_encode_utf_xvi_aux:N on line ...
-Defining \__str_encode_utf_xvi_char:n on line ...
-Defining \flag str_end on line ...
-Defining message LaTeX/str/utf16-encode on line ...
-Defining message LaTeX/str/utf16-decode on line ...
-Defining \__str_convert_decode_utf16be: on line ...
-Defining \__str_convert_decode_utf16le: on line ...
-Defining \__str_convert_decode_utf16: on line ...
-Defining \__str_decode_utf_xvi_bom:NN on line ...
-Defining \__str_decode_utf_xvi:Nw on line ...
-Defining \__str_decode_utf_xvi_pair:NN on line ...
-Defining \__str_decode_utf_xvi_quad:NNwNN on line ...
-Defining \__str_decode_utf_xvi_pair_end:Nw on line ...
-Defining \__str_decode_utf_xvi_extra:NNw on line ...
-Defining \__str_decode_utf_xvi_error:nNN on line ...
-) 
-(l3str-enc-utf32.def
-Defining \__str_convert_encode_utf32: on line ...
-Defining \__str_convert_encode_utf32be: on line ...
-Defining \__str_convert_encode_utf32le: on line ...
-Defining \__str_encode_utf_xxxii_be:n on line ...
-Defining \__str_encode_utf_xxxii_be_aux:nn on line ...
-Defining \__str_encode_utf_xxxii_le:n on line ...
-Defining \__str_encode_utf_xxxii_le_aux:nn on line ...
-Defining message LaTeX/str/utf32-decode on line ...
-Defining \__str_convert_decode_utf32be: on line ...
-Defining \__str_convert_decode_utf32le: on line ...
-Defining \__str_convert_decode_utf32: on line ...
-Defining \__str_decode_utf_xxxii_bom:NNNN on line ...
-Defining \__str_decode_utf_xxxii:Nw on line ...
-Defining \__str_decode_utf_xxxii_loop:NNNN on line ...
-Defining \__str_decode_utf_xxxii_end:w on line ...
-) 
 (l3str-enc-iso88591.def
 Defining \__str_convert_decode_iso88591: on line ...
 Defining \__str_convert_encode_iso88591: on line ...





More information about the latex3-commits mailing list