[latex3-commits] [l3svn] branch master updated: Update Unicode data loader to v1.3a

noreply at latex-project.org noreply at latex-project.org
Sat Feb 6 16:23:41 CET 2016


This is an automated email from the git hooks/post-receive script.

joseph pushed a commit to branch master
in repository l3svn.

The following commit(s) were added to refs/heads/master by this push:
       new  2120d31   Update Unicode data loader to v1.3a
2120d31 is described below

commit 2120d314083df544988d05340e71590b6b9b17a8
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Sat Feb 6 15:23:19 2016 +0000

    Update Unicode data loader to v1.3a
---
 support/load-unicode-data.tex |  108 ++++++-----------------------------------
 1 file changed, 16 insertions(+), 92 deletions(-)

diff --git a/support/load-unicode-data.tex b/support/load-unicode-data.tex
index e5af512..3c0b7d0 100644
--- a/support/load-unicode-data.tex
+++ b/support/load-unicode-data.tex
@@ -1,6 +1,6 @@
 % File load-unicode-data.tex
 %
-% Copyright 2015 The LaTeX3 Project
+% Copyright 2015,2016 The LaTeX3 Project
 %
 % It may be distributed and/or modified under the conditions of
 % the LaTeX Project Public License (LPPL), either version 1.3c of
@@ -19,35 +19,21 @@
 % - \catcode 11 for all letters (Unicode class "L")
 % - \catcode 11 for all combining marks (Unicode class "M")
 % - \sfcode 999 for all code points of class "Lu" (upper case letters)
-% - \lccode for all of class "Ll" (upper case letters) to the code point
+% - \lccode for all of class "Ll" (lower case letters) to the code point
 %   itself, and \uccode to the upper case mapping (or if not given
 %   to the code point itself)
 % - \uccode for all of class "Lu" (upper case letters) to the code point
 %   itself, and \lccode to the lower case mapping (or if not given
 %   to the code point itself)
 % - \lccode and \uccode for all of class "Lt" (title case letters) to the
-%   lower can upper case mappings (or if not given to the code point itself)
+%   lower and upper case mappings (or if not given to the code point itself)
 % - \lccode and \uccode for all other letter code points are set to
 %   the code point itself
 % - \lccode and/or \uccode for non-letter code points for which an upper
 %   or lower case mapping is given
 % - \sfcode 0 (ignored) for code points of Unicode classes "Pe" (closing
 %   punctuation marks) and "Pf" (final quotation marks)
-%
-% From the file MathClass.txt the following mapping are implemented between
-% Unicode classes and TeX math types
-% - "L" (large)       \mathop
-% - "B" (binary)      \mathbin
-% - "V" (vary)        \mathbin
-% - "R" (relation)    \mathrel
-% - "O" (opening)     \mathopen
-% - "C" (closing)     \mathclose
-% - "P" (punctuation) \mathpunct
-% - "A" (alphabetic)  \mathalpha
-%
-% For each code point processed, the result is of the form
-%
-%    \Umathcode <codepoint> = <type> 1 <codepoint>
+% - \Umathcode for all letters as math type 7 (var)
 %
 % =============================================================================
 %
@@ -71,7 +57,7 @@
   \catcode`\^=7 %
   \newlinechar=`\^^J %
   \message{^^J}%
-  \message{load-unicode-data.tex v1.0 (2015-12-10)^^J}%
+  \message{load-unicode-data.tex v1.3 (2016-02-06)^^J}%
   \message{Reading Unicode data^^J}%
 % The first stage of parsing is dealing with the fact that there are lots of
 % data items separated by |;|. Of those, only a few are needed so they are
@@ -125,7 +111,10 @@
         \parseunicodedataVI\lccode{#1}{#4}%
         \global\sfcode"#1=999 %
       \fi
+% All letters in math mode should be variables.
+      \global\Umathcode"#1="7"01"#1 %
     \else
+      \def\temp{#2}%
       \ifnum 0\ifx\temp\Pe 1\fi\ifx\temp\Pf 1\fi>0 %
         \global\sfcode"#1=0 %
       \fi
@@ -164,12 +153,14 @@
   }%
   \def\parseunicodedataXII#1;#2\relax#3;#4\relax{%
     \if L\firsttoken#4?\relax
-      \count0="#3 %
-      \loop
-        \unless\ifnum\count0>"#1 %
-          \parseunicodedataV{\count0}%
-          \advance\count0 by 1 %
-      \repeat
+      \begingroup
+        \count0="#3 %
+        \loop
+          \unless\ifnum\count0>"#1 %
+            \parseunicodedataV{\count0 }%
+            \advance\count0 by 1 %
+        \repeat
+      \endgroup
     \fi
   }%
 % From plain: may not be defined (yet).
@@ -200,71 +191,4 @@
     \fi
   \repeat
   \closein0 %
-% All of the other data files have some common aspects to their format. We
-% therefore begin with some shared code. First a check for a comment line:
-% these can be skipped. (Currently only |MathClass.txt| is used by this code
-% is also usable with the other Unicode data files.)
-  \edef\hash{\string#}%
-  \def\parseunicodedataI#1\relax{%
-    \unless\if\hash\firsttoken#1?\relax
-      \parseunicodedataII#1\relax
-    \fi
-  }%
-% The first entry in all of the files is a code point or range of code points:
-% set up to find a range. The definition of |\parseunicodedataIV| will depend on
-% the data being processed and may need to split the remainder of the line
-% further.
-  \def\parseunicodedataII#1;#2\relax{%
-    \parseunicodedataIII#1....\relax{#2}%
-  }%
-  \def\parseunicodedataIII#1..#2..#3\relax#4{%
-    \ifx\relax#2\relax
-      \parseunicodedataIV{#1}{#1}#4\relax
-    \else
-      \parseunicodedataIV{#1}{#2}#4\relax
-    \fi
-  }%
-% A shared routine for reading the data files: only one part of the parser
-% has to be altered.
-  \def\readandparse#1{%
-    \catcode`\#=12 %
-    \openin0=#1.txt %
-% Read two lines from the source file to extract the version information: it is
-% always the first two lines of the file.
-    \read0 to \unicodedataline
-    \message{\unicodedataline ^^J}%
-    \read0 to \unicodedataline
-    \message{\unicodedataline ^^J}%
-    \loop\unless\ifeof0 %
-      \read0 to \unicodedataline
-      \unless\ifx\unicodedataline\storedpar
-        \expandafter\parseunicodedataI\unicodedataline\relax
-      \fi
-    \repeat
-    \catcode`\#=6 %
-    \closein0 %
-  }%
-% |MathClass.txt|: this just needs one token of the 'rest' of the line.
-% Examine the Unicode class and if known set up the math code appropriately.
-  \chardef\L=1 %
-  \chardef\B=2 %
-  \chardef\V=2 %
-  \chardef\R=3 %
-  \chardef\O=4 %
-  \chardef\C=5 %
-  \chardef\P=6 %
-  \chardef\A=7 %
-  \def\parseunicodedataIV#1#2#3#4\relax{%
-    \begingroup
-      \count0="#1 %
-      \loop
-        \ifnum\count0<"#2 %
-          \ifcsname #3\endcsname
-            \global\Umathcode\count0=\csname #3\endcsname 1 \count0 %
-          \fi
-        \advance\count0 by 1 %
-      \repeat
-    \endgroup
-  }%
-  \readandparse{MathClass}%
 \endgroup

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the latex3-commits mailing list