[latex3-commits] [git/LaTeX3-latex3-latex3] master: Avoid tl_build in l3str-convert by simplifying the code (66ae261)

Bruno Le Floch bruno at le-floch.fr
Sun Apr 1 20:58:56 CEST 2018


Repository : https://github.com/latex3/latex3
On branch  : master
Link       : https://github.com/latex3/latex3/commit/66ae261bb6ac27bad6a16f96982ebc4fcf2f3b96

>---------------------------------------------------------------

commit 66ae261bb6ac27bad6a16f96982ebc4fcf2f3b96
Author: Bruno Le Floch <bruno at le-floch.fr>
Date:   Sun Apr 1 11:36:29 2018 -0400

    Avoid tl_build in l3str-convert by simplifying the code
    
    Also actually test an error message


>---------------------------------------------------------------

66ae261bb6ac27bad6a16f96982ebc4fcf2f3b96
 l3experimental/l3str/l3str-convert.dtx             |  126 +++++++-------------
 ...-convert002.tlg => m3str-convert002.luatex.tlg} |    1 +
 .../l3str/testfiles/m3str-convert002.lvt           |    6 +-
 .../l3str/testfiles/m3str-convert002.tlg           |   16 +++
 ...r-convert002.tlg => m3str-convert002.xetex.tlg} |    1 +
 5 files changed, 65 insertions(+), 85 deletions(-)

diff --git a/l3experimental/l3str/l3str-convert.dtx b/l3experimental/l3str/l3str-convert.dtx
index 2cdd1b3..729cd7e 100644
--- a/l3experimental/l3str/l3str-convert.dtx
+++ b/l3experimental/l3str/l3str-convert.dtx
@@ -1000,95 +1000,53 @@
 % \end{macro}
 %
 % \begin{macro}{\@@_convert_encode_:}
-%   The conversion from an internal string to native character tokens is
-%   very different in pdf\TeX{} and in other engines. For Unicode-aware
-%   engines, we need the definitions to be read when the null byte has
-%   category code $12$, so we set that inside a group.
-%    \begin{macrocode}
-\group_begin:
-  \char_set_catcode_other:n { 0 }
-  \bool_lazy_any:nTF
-    {
-      \sys_if_engine_luatex_p:
-      \sys_if_engine_xetex_p:
-    }
-%    \end{macrocode}
-% \begin{macro}{\@@_encode_native_loop:w}
-% \begin{macro}{\@@_encode_native_flush:}
-% \begin{macro}[rEXP]{\@@_encode_native_filter:N}
-%   In Unicode-aware engines, since building particular characters
-%   cannot be done expandably in \TeX{}, we cannot hope to get a
-%   linear-time function. However, we get quite close using the
-%   \pkg{l3tl-build} module, which abuses \tn{toks} to reach an almost
-%   linear time. We produce an arbitrary character of category code 12
-%   and add that character
-%   to the end of the token list being built. At the end of the loop,
-%   put the token list together with \cs{tl_build_end:}. Note that we
-%   use an \texttt{x}-expanding assignment because it is slightly
-%   faster. Unicode-aware engines will never incur an overflow because
-%   the internal string is guaranteed to only contain code points in
-%   $[0,\hexnum{10FFFF}]$.
-%    \begin{macrocode}
-    {
-      \cs_new_protected:Npn \@@_convert_encode_:
-        {
-          \tl_gbuild_x:Nw \g_@@_result_tl
-            \exp_after:wN \@@_encode_native_loop:w
-              \g_@@_result_tl \s__tl { \q_stop \prg_break: } \s__tl
-            \prg_break_point:
-          \tl_build_end:
-        }
-      \cs_new_protected:Npn \@@_encode_native_loop:w #1 \s__tl #2 \s__tl
-        {
-          \use_none_delimit_by_q_stop:w #2 \q_stop
-          \tl_build_add:x
-            { \char_generate:nn {#2} {12} }
-          \@@_encode_native_loop:w
-        }
-    }
-%    \end{macrocode}
 % \begin{macro}[EXP]{\@@_encode_native_char:n}
-%   Since pdf\TeX{} only supports 8-bit characters, and we have a table
-%   of all bytes, the conversion can be done in linear time within an
-%   \texttt{x}-expanding assignment. Look out for character codes larger
-%   than $255$, those characters are replaced by |?|, and raise a flag,
-%   which then triggers a pdf\TeX{}-specific error.
+%   The conversion from an internal string to native character tokens
+%   basically maps \cs{char_generate:nn} through the code-points, but in
+%   non-Unicode-aware engines we use a fall-back character |?| rather
+%   than nothing when given a character code outside $[0,255]$.  We
+%   detect the presence of bad characters using a flag and only produce
+%   a single error after the \texttt{x}-expanding assignment.
 %    \begin{macrocode}
-    {
-      \cs_new_protected:Npn \@@_convert_encode_:
-        {
-          \flag_clear:n { str_error }
-          \@@_convert_gmap_internal:N \@@_encode_native_char:n
-          \@@_if_flag_error:nnx { str_error }
-            { pdfTeX-native-overflow } { }
-        }
-      \cs_new:Npn \@@_encode_native_char:n #1
-        {
-          \if_int_compare:w #1 > \c_@@_max_byte_int
-            \flag_raise:n { str_error }
-            ?
-          \else:
-            \@@_output_byte:n {#1}
-          \fi:
-        }
-      \__kernel_msg_new:nnnn { str } { pdfTeX-native-overflow }
-        { Character~code~too~large~for~pdfTeX. }
-        {
-          The~pdfTeX~engine~only~supports~8-bit~characters:~
-          valid~character~codes~are~in~the~range~[0,255].~
-          To~manipulate~arbitrary~Unicode,~use~LuaTeX~or~XeTeX.
-        }
-    }
+\bool_lazy_any:nTF
+  {
+    \sys_if_engine_luatex_p:
+    \sys_if_engine_xetex_p:
+  }
+  {
+    \cs_new_protected:Npn \@@_convert_encode_:
+      { \@@_convert_gmap_internal:N \@@_encode_native_char:n }
+    \cs_new:Npn \@@_encode_native_char:n #1
+      { \char_generate:nn {#1} {12} }
+  }
+  {
+    \cs_new_protected:Npn \@@_convert_encode_:
+      {
+        \flag_clear:n { str_error }
+        \@@_convert_gmap_internal:N \@@_encode_native_char:n
+        \@@_if_flag_error:nnx { str_error }
+          { native-overflow } { }
+      }
+    \cs_new:Npn \@@_encode_native_char:n #1
+      {
+        \if_int_compare:w #1 > \c_@@_max_byte_int
+          \flag_raise:n { str_error }
+          ?
+        \else:
+          \char_generate:nn {#1} {12}
+        \fi:
+      }
+    \__kernel_msg_new:nnnn { str } { native-overflow }
+      { Character~code~too~large~for~this~engine. }
+      {
+        This~engine~only~support~8-bit~characters:~
+        valid~character~codes~are~in~the~range~[0,255].~
+        To~manipulate~arbitrary~Unicode,~use~LuaTeX~or~XeTeX.
+      }
+  }
 %    \end{macrocode}
 % \end{macro}
 % \end{macro}
-% \end{macro}
-% \end{macro}
-%   End the group to restore the catcode of the null byte.
-%    \begin{macrocode}
-\group_end:
-%    \end{macrocode}
-% \end{macro}
 %
 % \subsubsection{\texttt{clist}}
 %
diff --git a/l3experimental/l3str/testfiles/m3str-convert002.tlg b/l3experimental/l3str/testfiles/m3str-convert002.luatex.tlg
similarity index 99%
copy from l3experimental/l3str/testfiles/m3str-convert002.tlg
copy to l3experimental/l3str/testfiles/m3str-convert002.luatex.tlg
index bcdaab0..eca3b69 100644
--- a/l3experimental/l3str/testfiles/m3str-convert002.tlg
+++ b/l3experimental/l3str/testfiles/m3str-convert002.luatex.tlg
@@ -155,6 +155,7 @@ TEST 5: From utf8 to native
 ============================================================
 (l3str-enc-utf8.def)
 TRUE
+TRUE
 ============================================================
 ============================================================
 TEST 6: From native to utf8
diff --git a/l3experimental/l3str/testfiles/m3str-convert002.lvt b/l3experimental/l3str/testfiles/m3str-convert002.lvt
index 9998807..e3c7e5d 100644
--- a/l3experimental/l3str/testfiles/m3str-convert002.lvt
+++ b/l3experimental/l3str/testfiles/m3str-convert002.lvt
@@ -1,5 +1,5 @@
 %
-% Copyright (C) 2011, 2013, 2014 LaTeX3 Project
+% Copyright (C) 2011, 2013, 2014, 2018 LaTeX3 Project
 %
 
 \documentclass{minimal}
@@ -68,6 +68,10 @@
   {
     \str_set_convert:Nnnn \l_tmpa_str { c2 83 } { utf8/hex } { }
     \str_if_eq:onTF \l_tmpa_str { ^^83 } { \TRUE } { \ERROR }
+    \str_set_convert:Nnnn \l_tmpa_str { ce b1 } { utf8/hex } { } % alpha
+    \bool_lazy_or:nnTF \sys_if_engine_luatex_p: \sys_if_engine_xetex_p:
+      { \str_if_eq_x:nnTF \l_tmpa_str { \cs_to_str:N \^^^^03b1 } { \TRUE } { \ERROR } }
+      { \str_if_eq:onTF \l_tmpa_str { ? } { \TRUE } { \ERROR } }
   }
 
 \TEST { From~native~to~utf8 }
diff --git a/l3experimental/l3str/testfiles/m3str-convert002.tlg b/l3experimental/l3str/testfiles/m3str-convert002.tlg
index bcdaab0..4bbc32a 100644
--- a/l3experimental/l3str/testfiles/m3str-convert002.tlg
+++ b/l3experimental/l3str/testfiles/m3str-convert002.tlg
@@ -155,6 +155,22 @@ TEST 5: From utf8 to native
 ============================================================
 (l3str-enc-utf8.def)
 TRUE
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+! LaTeX error: "str/native-overflow"
+! 
+! Character code too large for this engine.
+! 
+! See the LaTeX3 documentation for further information.
+! 
+! For immediate help type H <return>.
+!...............................................  
+l. ...  }
+|'''''''''''''''''''''''''''''''''''''''''''''''
+| This engine only support 8-bit characters: valid character codes are in the
+| range [0,255]. To manipulate arbitrary Unicode, use LuaTeX or XeTeX.
+|...............................................
+TRUE
 ============================================================
 ============================================================
 TEST 6: From native to utf8
diff --git a/l3experimental/l3str/testfiles/m3str-convert002.tlg b/l3experimental/l3str/testfiles/m3str-convert002.xetex.tlg
similarity index 99%
copy from l3experimental/l3str/testfiles/m3str-convert002.tlg
copy to l3experimental/l3str/testfiles/m3str-convert002.xetex.tlg
index bcdaab0..eca3b69 100644
--- a/l3experimental/l3str/testfiles/m3str-convert002.tlg
+++ b/l3experimental/l3str/testfiles/m3str-convert002.xetex.tlg
@@ -155,6 +155,7 @@ TEST 5: From utf8 to native
 ============================================================
 (l3str-enc-utf8.def)
 TRUE
+TRUE
 ============================================================
 ============================================================
 TEST 6: From native to utf8





More information about the latex3-commits mailing list