[latex3-commits] [l3svn] 03/03: Update Unicode data loading in format mode

noreply at latex-project.org noreply at latex-project.org
Mon Dec 14 23:49:58 CET 2015


This is an automated email from the git hooks/post-receive script.

joseph pushed a commit to branch master
in repository l3svn.

commit 6eecdc9e228222b443b443b0d8faad2c2ac30c89
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Mon Dec 14 22:48:27 2015 +0000

    Update Unicode data loading in format mode
    
    Probably the 8-bit set up should be moved to 'very early' also
    such that there is nothing to worry about at the 'final' stage.
    (It's really the wrong place ...). Look out for another commit
    doing this, perhaps tomorrow!
---
 l3kernel/l3bootstrap.dtx |    5 +--
 l3kernel/l3final.dtx     |   83 +++++-----------------------------------------
 2 files changed, 9 insertions(+), 79 deletions(-)

diff --git a/l3kernel/l3bootstrap.dtx b/l3kernel/l3bootstrap.dtx
index c171b58..07883c6 100644
--- a/l3kernel/l3bootstrap.dtx
+++ b/l3kernel/l3bootstrap.dtx
@@ -518,10 +518,7 @@
 %    \being{macrocode}
 %<*initex>
 \ifdefined\Umathcode
-  \input load-unicode-casing %
-  \input load-unicode-punctuation %
-  \input load-unicode-east-asian-classes %
-  \input load-unicode-math-classes %
+  \input load-unicode-data %
 \fi
 %</initex>
 %    \end{macrocode}
diff --git a/l3kernel/l3final.dtx b/l3kernel/l3final.dtx
index ccd390f..03a643f 100644
--- a/l3kernel/l3final.dtx
+++ b/l3kernel/l3final.dtx
@@ -104,83 +104,16 @@
 % For \XeTeX{} and \LuaTeX{}, which are natively Unicode engines, the
 % encoding set up is exactly Unicode. For the other supported engines
 % input has to be $8$-bit and so an encoding scheme must be chosen. At present,
-% this is the EC (|T1|) scheme, with the assumption that languages for which this
-% is not appropriate will be used with one of the Unicode engines.
+% this is the EC (|T1|) scheme, with the assumption that languages for which
+% this is not appropriate will be used with one of the Unicode engines.
+% The latter read the Unicode data very early such that the same data
+% files can be used as for plain \TeX{} and \LaTeXe{}. Thus only the $8$-bit
+% engines are of concern here.
 %    \begin{macrocode}
-\bool_if:nTF
-  {
-       \sys_if_engine_luatex_p:
-    || \sys_if_engine_xetex_p:
-  }
-%    \end{macrocode}
-% For the Unicode engines, the core data used is derived automatically from
-% the master Unicode Consortium files and is thus read here. At present, this
-% is done by reading the \LaTeXe{} data file, setting up so only the data
-% part is extracted. For setting up letter codes, |\L| lines are cased letters,
-% |\l| uncased letters and |\C| cased non-letters. See |ltunicode.dtx| for
-% details of \XeTeX{} inter-character class types.
-%    \begin{macrocode}
-  {
-    \group_begin:
-      \cs_set:Npn \begingroup #1 \fi \fi { }
-      \sys_if_engine_xetex:TF
-        {
-          \cs_set_protected:Npn \endgroup #1 \fi \ID
-            { 
-              \cs_set_eq:NN \endgroup \tex_endinput:D
-              \ID 
-            } 
-        }
-        { \cs_set_eq:NN \endgroup \tex_endinput:D }
-      \cs_set_protected:Npn \C #1 ~ #2 ~ #3 ~
-        {
-          \tex_global:D \tex_lccode:D "#1 = "#3 \scan_stop:
-          \tex_global:D \tex_uccode:D "#1 = "#2 \scan_stop:
-        }
-      \cs_set_protected:Npn \L #1 ~ #2 ~ #3 ~
-        {
-          \C #1 ~ #2 ~ #3 ~
-          \int_compare:nNnF { "#1 } = { "#3 }
-            { \tex_global:D \tex_sfcode:D "#1 = 999 \scan_stop: }
-          \tex_global:D \utex_mathcode:D "#1 =  
-            \int_compare:nNnTF { "#1 } < { "10000 } { "7 } { "0 }
-            "01 "#1 \scan_stop:
-        }
-      \cs_set_protected:Npn \l #1 ~ { \L #1 ~ #1 ~ #1 ~ }
-      \sys_if_engine_xetex:T
-        {  
-          \cs_set_protected:Npn \ID #1 ~ #2 ~
-            { \__char_set_class:nnn {#1} {#2} { 1 } }
-          \cs_set_protected:Npn \OP #1 ~
-            { \__char_set_class:nnn {#1} {#1} { 2 } }
-          \cs_set_protected:Npn \CL #1 ~
-            { \__char_set_class:nnn {#1} {#1} { 3 } }
-          \cs_set_protected:Npn \EX #1 ~
-            { \__char_set_class:nnn {#1} {#1} { 3 } }
-          \cs_set_protected:Npn \IS #1 ~
-            { \__char_set_class:nnn {#1} {#1} { 3 } }
-          \cs_set_protected:Npn \NS #1 ~
-            { \__char_set_class:nnn {#1} {#1} { 3 } }
-          \cs_set_protected:Npn \CM #1 ~
-            { \__char_set_class:nnn {#1} {#1} { 256 } }
-          \cs_set_protected:Npn \__char_set_class:nnn #1#2#3
-            {
-              \int_step_inline:nnnn { "#1 } { 1 } { "#2 }
-                { \tex_global:D \xetex_charclass:D ##1 = #3 \scan_stop: }
-            }
-        }
-      \char_set_catcode_space:n { `\  }%
-      \file_input:n{unicode-letters.def}%
-    \group_end:
-%    \end{macrocode}
-% A couple of special cases that make sense for \TeX{} but don't derive
-% readily from the Unicode files.
-%    \begin{macrocode}
-    \tex_global:D \tex_sfcode:D "2019 = 0 \scan_stop:
-    \tex_global:D \tex_sfcode:D "201D = 0 \scan_stop:
-  }
+\bool_if:nF
+  { \sys_if_engine_luatex_p: || \sys_if_engine_xetex_p: }
 %    \end{macrocode}
-% For the other engines, set up the Cork T1 encoding data. Most of this can be
+% Set up the Cork T1 encoding data. Most of this can be
 % done using a few loops. We don't provide a global interface for setting
 % these codes so it is done at a low level (to avoid code repetition).
 %    \begin{macrocode}

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the latex3-commits mailing list