[latex3-commits] [git/LaTeX3-latex3-latex3] master: Avoid tl_build in l3str-convert by simplifying the code (66ae261)
Bruno Le Floch
bruno at le-floch.fr
Sun Apr 1 20:58:56 CEST 2018
Repository : https://github.com/latex3/latex3
On branch : master
Link : https://github.com/latex3/latex3/commit/66ae261bb6ac27bad6a16f96982ebc4fcf2f3b96
>---------------------------------------------------------------
commit 66ae261bb6ac27bad6a16f96982ebc4fcf2f3b96
Author: Bruno Le Floch <bruno at le-floch.fr>
Date: Sun Apr 1 11:36:29 2018 -0400
Avoid tl_build in l3str-convert by simplifying the code
Also actually test an error message
>---------------------------------------------------------------
66ae261bb6ac27bad6a16f96982ebc4fcf2f3b96
l3experimental/l3str/l3str-convert.dtx | 126 +++++++-------------
...-convert002.tlg => m3str-convert002.luatex.tlg} | 1 +
.../l3str/testfiles/m3str-convert002.lvt | 6 +-
.../l3str/testfiles/m3str-convert002.tlg | 16 +++
...r-convert002.tlg => m3str-convert002.xetex.tlg} | 1 +
5 files changed, 65 insertions(+), 85 deletions(-)
diff --git a/l3experimental/l3str/l3str-convert.dtx b/l3experimental/l3str/l3str-convert.dtx
index 2cdd1b3..729cd7e 100644
--- a/l3experimental/l3str/l3str-convert.dtx
+++ b/l3experimental/l3str/l3str-convert.dtx
@@ -1000,95 +1000,53 @@
% \end{macro}
%
% \begin{macro}{\@@_convert_encode_:}
-% The conversion from an internal string to native character tokens is
-% very different in pdf\TeX{} and in other engines. For Unicode-aware
-% engines, we need the definitions to be read when the null byte has
-% category code $12$, so we set that inside a group.
-% \begin{macrocode}
-\group_begin:
- \char_set_catcode_other:n { 0 }
- \bool_lazy_any:nTF
- {
- \sys_if_engine_luatex_p:
- \sys_if_engine_xetex_p:
- }
-% \end{macrocode}
-% \begin{macro}{\@@_encode_native_loop:w}
-% \begin{macro}{\@@_encode_native_flush:}
-% \begin{macro}[rEXP]{\@@_encode_native_filter:N}
-% In Unicode-aware engines, since building particular characters
-% cannot be done expandably in \TeX{}, we cannot hope to get a
-% linear-time function. However, we get quite close using the
-% \pkg{l3tl-build} module, which abuses \tn{toks} to reach an almost
-% linear time. We produce an arbitrary character of category code 12
-% and add that character
-% to the end of the token list being built. At the end of the loop,
-% put the token list together with \cs{tl_build_end:}. Note that we
-% use an \texttt{x}-expanding assignment because it is slightly
-% faster. Unicode-aware engines will never incur an overflow because
-% the internal string is guaranteed to only contain code points in
-% $[0,\hexnum{10FFFF}]$.
-% \begin{macrocode}
- {
- \cs_new_protected:Npn \@@_convert_encode_:
- {
- \tl_gbuild_x:Nw \g_@@_result_tl
- \exp_after:wN \@@_encode_native_loop:w
- \g_@@_result_tl \s__tl { \q_stop \prg_break: } \s__tl
- \prg_break_point:
- \tl_build_end:
- }
- \cs_new_protected:Npn \@@_encode_native_loop:w #1 \s__tl #2 \s__tl
- {
- \use_none_delimit_by_q_stop:w #2 \q_stop
- \tl_build_add:x
- { \char_generate:nn {#2} {12} }
- \@@_encode_native_loop:w
- }
- }
-% \end{macrocode}
% \begin{macro}[EXP]{\@@_encode_native_char:n}
-% Since pdf\TeX{} only supports 8-bit characters, and we have a table
-% of all bytes, the conversion can be done in linear time within an
-% \texttt{x}-expanding assignment. Look out for character codes larger
-% than $255$, those characters are replaced by |?|, and raise a flag,
-% which then triggers a pdf\TeX{}-specific error.
+% The conversion from an internal string to native character tokens
+% basically maps \cs{char_generate:nn} through the code-points, but in
+% non-Unicode-aware engines we use a fall-back character |?| rather
+% than nothing when given a character code outside $[0,255]$. We
+% detect the presence of bad characters using a flag and only produce
+% a single error after the \texttt{x}-expanding assignment.
% \begin{macrocode}
- {
- \cs_new_protected:Npn \@@_convert_encode_:
- {
- \flag_clear:n { str_error }
- \@@_convert_gmap_internal:N \@@_encode_native_char:n
- \@@_if_flag_error:nnx { str_error }
- { pdfTeX-native-overflow } { }
- }
- \cs_new:Npn \@@_encode_native_char:n #1
- {
- \if_int_compare:w #1 > \c_@@_max_byte_int
- \flag_raise:n { str_error }
- ?
- \else:
- \@@_output_byte:n {#1}
- \fi:
- }
- \__kernel_msg_new:nnnn { str } { pdfTeX-native-overflow }
- { Character~code~too~large~for~pdfTeX. }
- {
- The~pdfTeX~engine~only~supports~8-bit~characters:~
- valid~character~codes~are~in~the~range~[0,255].~
- To~manipulate~arbitrary~Unicode,~use~LuaTeX~or~XeTeX.
- }
- }
+\bool_lazy_any:nTF
+ {
+ \sys_if_engine_luatex_p:
+ \sys_if_engine_xetex_p:
+ }
+ {
+ \cs_new_protected:Npn \@@_convert_encode_:
+ { \@@_convert_gmap_internal:N \@@_encode_native_char:n }
+ \cs_new:Npn \@@_encode_native_char:n #1
+ { \char_generate:nn {#1} {12} }
+ }
+ {
+ \cs_new_protected:Npn \@@_convert_encode_:
+ {
+ \flag_clear:n { str_error }
+ \@@_convert_gmap_internal:N \@@_encode_native_char:n
+ \@@_if_flag_error:nnx { str_error }
+ { native-overflow } { }
+ }
+ \cs_new:Npn \@@_encode_native_char:n #1
+ {
+ \if_int_compare:w #1 > \c_@@_max_byte_int
+ \flag_raise:n { str_error }
+ ?
+ \else:
+ \char_generate:nn {#1} {12}
+ \fi:
+ }
+ \__kernel_msg_new:nnnn { str } { native-overflow }
+ { Character~code~too~large~for~this~engine. }
+ {
+ This~engine~only~support~8-bit~characters:~
+ valid~character~codes~are~in~the~range~[0,255].~
+ To~manipulate~arbitrary~Unicode,~use~LuaTeX~or~XeTeX.
+ }
+ }
% \end{macrocode}
% \end{macro}
% \end{macro}
-% \end{macro}
-% \end{macro}
-% End the group to restore the catcode of the null byte.
-% \begin{macrocode}
-\group_end:
-% \end{macrocode}
-% \end{macro}
%
% \subsubsection{\texttt{clist}}
%
diff --git a/l3experimental/l3str/testfiles/m3str-convert002.tlg b/l3experimental/l3str/testfiles/m3str-convert002.luatex.tlg
similarity index 99%
copy from l3experimental/l3str/testfiles/m3str-convert002.tlg
copy to l3experimental/l3str/testfiles/m3str-convert002.luatex.tlg
index bcdaab0..eca3b69 100644
--- a/l3experimental/l3str/testfiles/m3str-convert002.tlg
+++ b/l3experimental/l3str/testfiles/m3str-convert002.luatex.tlg
@@ -155,6 +155,7 @@ TEST 5: From utf8 to native
============================================================
(l3str-enc-utf8.def)
TRUE
+TRUE
============================================================
============================================================
TEST 6: From native to utf8
diff --git a/l3experimental/l3str/testfiles/m3str-convert002.lvt b/l3experimental/l3str/testfiles/m3str-convert002.lvt
index 9998807..e3c7e5d 100644
--- a/l3experimental/l3str/testfiles/m3str-convert002.lvt
+++ b/l3experimental/l3str/testfiles/m3str-convert002.lvt
@@ -1,5 +1,5 @@
%
-% Copyright (C) 2011, 2013, 2014 LaTeX3 Project
+% Copyright (C) 2011, 2013, 2014, 2018 LaTeX3 Project
%
\documentclass{minimal}
@@ -68,6 +68,10 @@
{
\str_set_convert:Nnnn \l_tmpa_str { c2 83 } { utf8/hex } { }
\str_if_eq:onTF \l_tmpa_str { ^^83 } { \TRUE } { \ERROR }
+ \str_set_convert:Nnnn \l_tmpa_str { ce b1 } { utf8/hex } { } % alpha
+ \bool_lazy_or:nnTF \sys_if_engine_luatex_p: \sys_if_engine_xetex_p:
+ { \str_if_eq_x:nnTF \l_tmpa_str { \cs_to_str:N \^^^^03b1 } { \TRUE } { \ERROR } }
+ { \str_if_eq:onTF \l_tmpa_str { ? } { \TRUE } { \ERROR } }
}
\TEST { From~native~to~utf8 }
diff --git a/l3experimental/l3str/testfiles/m3str-convert002.tlg b/l3experimental/l3str/testfiles/m3str-convert002.tlg
index bcdaab0..4bbc32a 100644
--- a/l3experimental/l3str/testfiles/m3str-convert002.tlg
+++ b/l3experimental/l3str/testfiles/m3str-convert002.tlg
@@ -155,6 +155,22 @@ TEST 5: From utf8 to native
============================================================
(l3str-enc-utf8.def)
TRUE
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!
+! LaTeX error: "str/native-overflow"
+!
+! Character code too large for this engine.
+!
+! See the LaTeX3 documentation for further information.
+!
+! For immediate help type H <return>.
+!...............................................
+l. ... }
+|'''''''''''''''''''''''''''''''''''''''''''''''
+| This engine only support 8-bit characters: valid character codes are in the
+| range [0,255]. To manipulate arbitrary Unicode, use LuaTeX or XeTeX.
+|...............................................
+TRUE
============================================================
============================================================
TEST 6: From native to utf8
diff --git a/l3experimental/l3str/testfiles/m3str-convert002.tlg b/l3experimental/l3str/testfiles/m3str-convert002.xetex.tlg
similarity index 99%
copy from l3experimental/l3str/testfiles/m3str-convert002.tlg
copy to l3experimental/l3str/testfiles/m3str-convert002.xetex.tlg
index bcdaab0..eca3b69 100644
--- a/l3experimental/l3str/testfiles/m3str-convert002.tlg
+++ b/l3experimental/l3str/testfiles/m3str-convert002.xetex.tlg
@@ -155,6 +155,7 @@ TEST 5: From utf8 to native
============================================================
(l3str-enc-utf8.def)
TRUE
+TRUE
============================================================
============================================================
TEST 6: From native to utf8
More information about the latex3-commits
mailing list