[latex3-commits] [git/LaTeX3-latex3-latex2e] master: Improve help texts on UTF-8 errors Add support for declaring 1-byte active characters Make most 1-byte control characters active by default Remove some "comments" from utf8 handling sources. (64000c0)

David Carlisle d.p.carlisle at gmail.com
Fri Mar 30 00:06:12 CEST 2018


Repository : https://github.com/latex3/latex2e
On branch  : master
Link       : https://github.com/latex3/latex2e/commit/64000c0a937cd375b36f0bcca6d0e0a6d0030714

>---------------------------------------------------------------

commit 64000c0a937cd375b36f0bcca6d0e0a6d0030714
Author: David Carlisle <d.p.carlisle at gmail.com>
Date:   Thu Mar 29 23:06:12 2018 +0100

    Improve help texts on UTF-8 errors
    Add support for declaring 1-byte active characters
    Make most 1-byte control characters active by default
    Remove some "comments" from utf8 handling sources.


>---------------------------------------------------------------

64000c0a937cd375b36f0bcca6d0e0a6d0030714
 base/ltfinal.dtx                         |   42 +++++++---
 base/testfiles/tlb-inputenc-001.tlg      |   22 +++---
 base/testfiles/tlb-utf8-c0.luatex.tlg    |   12 +--
 base/testfiles/tlb-utf8-c0.tlg           |   18 +++--
 base/testfiles/tlb-utf8-c0.xetex.tlg     |   12 +--
 base/testfiles/tlb-utf8-undec-cp1252.tlg |   19 +++--
 base/testfiles/tlb1144.tlg               |    6 +-
 base/testfiles/tlb3480.tlg               |   25 +++---
 base/utf8ienc.dtx                        |  122 +++++++++++++++++++++---------
 9 files changed, 171 insertions(+), 107 deletions(-)

diff --git a/base/ltfinal.dtx b/base/ltfinal.dtx
index fe8433a..1bd72a0 100644
--- a/base/ltfinal.dtx
+++ b/base/ltfinal.dtx
@@ -581,12 +581,31 @@
     \noexpand\IeC
   \fi
 }
+%    \end{macrocode}
+%
+% Make characters active for UTF-8 input
+%    \begin{macrocode}
+\@tempcnta=1
+\loop
+  \catcode\@tempcnta=13
+  \advance\@tempcnta\@ne
+\ifnum\@tempcnta<32
+\repeat
+\catcode0=15  %null
+\catcode9=10  %tab
+\catcode10=12 % ctrl J
+\catcode12=13 %ctrl L
+\def^^L{\par}
+\catcode13=5  %newline
 \@tempcnta=128
 \loop
-\catcode\@tempcnta=13
-\advance\@tempcnta\@ne
+  \catcode\@tempcnta=13
+  \advance\@tempcnta\@ne
 \ifnum\@tempcnta<256
 \repeat
+%    \end{macrocode}
+%
+%    \begin{macrocode}
 \edef\inputencodingname{utf8}%
 \input{utf8.def}
 \let\@inpenc at test\@undefined
@@ -598,6 +617,16 @@
 %    \end{macrocode}
 %
 %    \begin{macrocode}
+%<latexrelease>\@tempcnta=0
+%<latexrelease>\loop
+%<latexrelease>  \catcode\@tempcnta=15
+%<latexrelease>  \advance\@tempcnta\@ne
+%<latexrelease>\ifnum\@tempcnta<32
+%<latexrelease>\repeat
+%<latexrelease>\catcode9=10  % tab
+%<latexrelease>\catcode10=12 % ctrl J
+%<latexrelease>\catcode12=13 % ctrl L
+%<latexrelease>\catcode13=5  % newline
 %<latexrelease>\@tempcnta=128
 %<latexrelease>\loop
 %<latexrelease>\catcode\@tempcnta=12
@@ -694,15 +723,6 @@
 % All the characters in the range 0--31 and 127--255 are illegal,
 % \emph{except} tab (|^^I|), nl (|^^J|), ff (|^^L|) and cr (|^^M|).
 %
-% Now allow 8-bit characters, although their use in this way is
-% strongly discouraged. See |inputenc.dtx| for a supported mechanism
-% for 8-bit input.
-%    \begin{macrocode}
-\def\reserved at c#1{\catcode#1=15\relax}
-\reserved at a{0}{`\^^H}
-\reserved at c{`\^^K}
-\reserved at a{`\^^N}{31}
-%    \end{macrocode}
 %
 % \subsection{Lccodes and uccodes}
 %
diff --git a/base/testfiles/tlb-inputenc-001.tlg b/base/testfiles/tlb-inputenc-001.tlg
index a766e01..afcea0f 100644
--- a/base/testfiles/tlb-inputenc-001.tlg
+++ b/base/testfiles/tlb-inputenc-001.tlg
@@ -10,9 +10,9 @@ Type  H <return>  for immediate help.
  ...                                              
 l. ...zz ^^c3X
               zz
-Your command was ignored.
-Type  I <command> <return>  to replace it with another command,
-or  <return>  to continue without it.
+The document does not appear to be in UTF-8 encoding.
+Try specifying \UseRawEncoding or
+\usepackage [latin1]{inputenc}
 ! LaTeX Error: Command \th unavailable in encoding OT1.
 See the LaTeX manual or LaTeX Companion for explanation.
 Type  H <return>  for immediate help.
@@ -22,25 +22,23 @@ l. ...thorn U+00fe [^^c3^^be
 Your command was ignored.
 Type  I <command> <return>  to replace it with another command,
 or  <return>  to continue without it.
-! Package inputenc Error: Unicode char ^^cc^^81 (U+301)
+! Package inputenc Error: Unicode character ^^cc^^81 (U+301)
 (inputenc)                not set up for use with LaTeX.
 See the inputenc package documentation for explanation.
 Type  H <return>  for immediate help.
  ...                                              
 l. ...comb acute U+0301 [^^cc^^81
                                 ]
-Your command was ignored.
-Type  I <command> <return>  to replace it with another command,
-or  <return>  to continue without it.
-! Package inputenc Error: Unicode char ^^f0^^9d^^94^^84 (U+1D504)
+You may provide a definition with
+\DeclareUnicodeCharacter 
+! Package inputenc Error: Unicode character ^^f0^^9d^^94^^84 (U+1D504)
 (inputenc)                not set up for use with LaTeX.
 See the inputenc package documentation for explanation.
 Type  H <return>  for immediate help.
  ...                                              
 l. ...fraktur A [^^f0^^9d^^94^^84
                                 ]
-Your command was ignored.
-Type  I <command> <return>  to replace it with another command,
-or  <return>  to continue without it.
+You may provide a definition with
+\DeclareUnicodeCharacter 
 a\IeC {\nobreakspace }nbsp
-Fraktur A [\GenericError {(inputenc)                }{Package inputenc Error: Unicode char ^^f0^^9d^^94^^84 (U+1D504)\MessageBreak not set up for use with LaTeX}{See the inputenc package documentation for explanation.}{Your command was ignored.\MessageBreak Type  I <command> <return>  to replace it with another command,\MessageBreak or  <return>  to continue without it.}]
+Fraktur A [\GenericError {(inputenc)                }{Package inputenc Error: Unicode character ^^f0^^9d^^94^^84 (U+1D504)\MessageBreak not set up for use with LaTeX}{See the inputenc package documentation for explanation.}{You may provide a definition with\MessageBreak \DeclareUnicodeCharacter }]
diff --git a/base/testfiles/tlb-utf8-c0.luatex.tlg b/base/testfiles/tlb-utf8-c0.luatex.tlg
index ed2f166..f858b75 100644
--- a/base/testfiles/tlb-utf8-c0.luatex.tlg
+++ b/base/testfiles/tlb-utf8-c0.luatex.tlg
@@ -2,21 +2,17 @@ This is a generated file for the LaTeX2e validation system.
 Don't change this file in any respect.
 > 15.
 l. ...\showthe\catcode0
-> 15.
+> 12.
 l. ...\showthe\catcode4
-> 15.
+> 12.
 l. ...\showthe\catcode11
 > 12.
 l. ...\showthe\catcode150
 > 15.
 l. ...\showthe\catcode0
-> 15.
+> 12.
 l. ...\showthe\catcode4
-> 15.
+> 12.
 l. ...\showthe\catcode11
 > 12.
 l. ...\showthe\catcode150
-! Text line contains an invalid character.
-l. ...^^07
-A funny symbol that I can't read has just been input.
-Continue, and I'll forget that it ever happened.
diff --git a/base/testfiles/tlb-utf8-c0.tlg b/base/testfiles/tlb-utf8-c0.tlg
index b327a64..9c38c78 100644
--- a/base/testfiles/tlb-utf8-c0.tlg
+++ b/base/testfiles/tlb-utf8-c0.tlg
@@ -2,21 +2,25 @@ This is a generated file for the LaTeX2e validation system.
 Don't change this file in any respect.
 > 15.
 l. ...\showthe\catcode0
-> 15.
+> 13.
 l. ...\showthe\catcode4
-> 15.
+> 13.
 l. ...\showthe\catcode11
 > 13.
 l. ...\showthe\catcode150
 > 15.
 l. ...\showthe\catcode0
-> 15.
+> 13.
 l. ...\showthe\catcode4
-> 15.
+> 13.
 l. ...\showthe\catcode11
 > 13.
 l. ...\showthe\catcode150
-! Text line contains an invalid character.
+! Package inputenc Error: Unicode character ^^G (U+7)
+(inputenc)                not set up for use with LaTeX.
+See the inputenc package documentation for explanation.
+Type  H <return>  for immediate help.
+ ...                                              
 l. ...^^07
-A funny symbol that I can't read has just been input.
-Continue, and I'll forget that it ever happened.
+You may provide a definition with
+\DeclareUnicodeCharacter 
diff --git a/base/testfiles/tlb-utf8-c0.xetex.tlg b/base/testfiles/tlb-utf8-c0.xetex.tlg
index ed2f166..f858b75 100644
--- a/base/testfiles/tlb-utf8-c0.xetex.tlg
+++ b/base/testfiles/tlb-utf8-c0.xetex.tlg
@@ -2,21 +2,17 @@ This is a generated file for the LaTeX2e validation system.
 Don't change this file in any respect.
 > 15.
 l. ...\showthe\catcode0
-> 15.
+> 12.
 l. ...\showthe\catcode4
-> 15.
+> 12.
 l. ...\showthe\catcode11
 > 12.
 l. ...\showthe\catcode150
 > 15.
 l. ...\showthe\catcode0
-> 15.
+> 12.
 l. ...\showthe\catcode4
-> 15.
+> 12.
 l. ...\showthe\catcode11
 > 12.
 l. ...\showthe\catcode150
-! Text line contains an invalid character.
-l. ...^^07
-A funny symbol that I can't read has just been input.
-Continue, and I'll forget that it ever happened.
diff --git a/base/testfiles/tlb-utf8-undec-cp1252.tlg b/base/testfiles/tlb-utf8-undec-cp1252.tlg
index 676aa91..ab7fb08 100644
--- a/base/testfiles/tlb-utf8-undec-cp1252.tlg
+++ b/base/testfiles/tlb-utf8-undec-cp1252.tlg
@@ -1,32 +1,31 @@
 This is a generated file for the LaTeX2e validation system.
 Don't change this file in any respect.
-! Package inputenc Error: Unicode char ^^d6^^93 (U+593)
+! Package inputenc Error: Unicode character ^^d6^^93 (U+593)
 (inputenc)                not set up for use with LaTeX.
 See the inputenc package documentation for explanation.
 Type  H <return>  for immediate help.
  ...                                              
 l. ...a^^d6 ^^93
                x^^94 ^^df
-Your command was ignored.
-Type  I <command> <return>  to replace it with another command,
-or  <return>  to continue without it.
+You may provide a definition with
+\DeclareUnicodeCharacter 
 ! Package inputenc Error: Invalid UTF-8 byte 148.
 See the inputenc package documentation for explanation.
 Type  H <return>  for immediate help.
  ...                                              
 l. ...a^^d6 ^^93x^^94
                      ^^df
-Your command was ignored.
-Type  I <command> <return>  to replace it with another command,
-or  <return>  to continue without it.
+The document does not appear to be in UTF-8 encoding.
+Try specifying \UseRawInputEncoding or
+\usepackage [latin1]{inputenc}
 ! Package inputenc Error: Invalid UTF-8 byte sequence.
 See the inputenc package documentation for explanation.
 Type  H <return>  for immediate help.
  ...                                              
 l. ...
-Your command was ignored.
-Type  I <command> <return>  to replace it with another command,
-or  <return>  to continue without it.
+The document does not appear to be in UTF-8 encoding.
+Try specifying \UseRawInputEncoding or
+\usepackage [latin1]{inputenc}
 Completed box being shipped out [1]
 \vbox(633.0+0.0)x407.0
 .\glue 16.0
diff --git a/base/testfiles/tlb1144.tlg b/base/testfiles/tlb1144.tlg
index c34e8f3..2d0757f 100644
--- a/base/testfiles/tlb1144.tlg
+++ b/base/testfiles/tlb1144.tlg
@@ -182,9 +182,9 @@ See the inputenc package documentation for explanation.
 Type  H <return>  for immediate help.
  ...                                              
 l. ...\showoutput
-Your command was ignored.
-Type  I <command> <return>  to replace it with another command,
-or  <return>  to continue without it.
+The document does not appear to be in UTF-8 encoding.
+Try specifying \UseRawInputEncoding or
+\usepackage [latin1]{inputenc}
 LaTeX Font Info:    External font `cmr17 at24.88pt' loaded as
 (Font)              OT1/cmr/m/n/24.88  on input line ....
 LaTeX Font Info:    Switching to \OT1/cmr/m/n/24.88  on input line ....
diff --git a/base/testfiles/tlb3480.tlg b/base/testfiles/tlb3480.tlg
index 56cb425..8346e90 100644
--- a/base/testfiles/tlb3480.tlg
+++ b/base/testfiles/tlb3480.tlg
@@ -27,35 +27,34 @@ Type  H <return>  for immediate help.
  ...                                              
 l. ...for them: ^^c3F
                      ^^e1^^a4^^b6
-Your command was ignored.
-Type  I <command> <return>  to replace it with another command,
-or  <return>  to continue without it.
-! Package inputenc Error: Unicode char ^^e1^^a4^^b6 (U+1936)
+The document does not appear to be in UTF-8 encoding.
+Try specifying \UseRawInputEncoding or
+\usepackage [latin1]{inputenc}
+! Package inputenc Error: Unicode character ^^e1^^a4^^b6 (U+1936)
 (inputenc)                not set up for use with LaTeX.
 See the inputenc package documentation for explanation.
 Type  H <return>  for immediate help.
  ...                                              
 l. ...for them: ^^c3F ^^e1^^a4^^b6
-Your command was ignored.
-Type  I <command> <return>  to replace it with another command,
-or  <return>  to continue without it.
+You may provide a definition with
+\DeclareUnicodeCharacter 
 ! Package inputenc Error: Invalid UTF-8 byte sequence.
 See the inputenc package documentation for explanation.
 Type  H <return>  for immediate help.
  ...                                              
 l. ......e that are not legal utf8 sequences: ^^c3X
                                                    ^^e1XY
-Your command was ignored.
-Type  I <command> <return>  to replace it with another command,
-or  <return>  to continue without it.
+The document does not appear to be in UTF-8 encoding.
+Try specifying \UseRawInputEncoding or
+\usepackage [latin1]{inputenc}
 ! Package inputenc Error: Invalid UTF-8 byte sequence.
 See the inputenc package documentation for explanation.
 Type  H <return>  for immediate help.
  ...                                              
 l. ......are not legal utf8 sequences: ^^c3X ^^e1XY
-Your command was ignored.
-Type  I <command> <return>  to replace it with another command,
-or  <return>  to continue without it.
+The document does not appear to be in UTF-8 encoding.
+Try specifying \UseRawInputEncoding or
+\usepackage [latin1]{inputenc}
 Completed box being shipped out [1]
 \vbox(578.15999+0.0)x469.75499
 .\glue 0.0
diff --git a/base/utf8ienc.dtx b/base/utf8ienc.dtx
index b66abc3..9f412d6 100644
--- a/base/utf8ienc.dtx
+++ b/base/utf8ienc.dtx
@@ -34,8 +34,7 @@
 \title{Providing some UTF-8 support via \texttt{inputenc}}
 \date{\fileversion\space\filedate{} printed \today}
  \author{%
-  Frank Mittelbach \and Chris Rowley\thanks{Borrowing heavily from
-      code by David Carlisle and tables by Sebastian Rahtz; some table
+  David Carlisle \and Frank Mittelbach \and Chris Rowley\thanks{Borrowing heavily from tables by Sebastian Rahtz; some table
       and code cleanup by Javier Bezos}}
 \usepackage[utf8]{inputenc}
 \begin{document}
@@ -52,7 +51,6 @@
 %
 % \section{Introduction}
 %
-% [The whole section is rather unfinished \ldots\ just like the code, sorry!]
 %
 % \subsection{Background and general stuff}
 %
@@ -290,10 +288,10 @@
 \def\UTFviii at defined#1{%
   \ifx#1\relax
 %    \end{macrocode}
-% Test if tthe sequence is invalid UTF-8 or valid UTF-8 but without
+% Test if the sequence is invalid UTF-8 or valid UTF-8 but without
 % a \LaTeX\ definition.
 %    \begin{macrocode}
-      \if\relax\expandafter\UTFviii at checkseq\string#1\relax\relax
+     \if\relax\expandafter\UTFviii at checkseq\string#1\relax\relax
 %    \end{macrocode}
 %    The endline character has a special definition within the
 %    inputenc package (it is gobbling spaces). For this reason we
@@ -301,18 +299,15 @@
 % \changes{v1.1b}{2004/02/09}{No newlines allowed in error messages}
 % \changes{v1.1g}{2005/09/27}{Real spaces do not show up so use \cs{space}}
 % \changes{v1.1o}{2015/08/28}{Show Unicode number of character in hex}
-% \changes{v1.2a}{2018/03/24}{Error message inproved for non-UTF-8 sequences}%
+% \changes{v1.2a}{2018/03/24}{Error message improved for non-UTF-8 sequences}%
 %    \begin{macrocode}
-      \PackageError{inputenc}{Unicode\space char\space\expandafter
-                              \UTFviii at splitcsname\string#1\relax
-                              \MessageBreak
-                              not\space set\space up\space
-                              for\space use\space with\space LaTeX}\@eha
+      \UTFviii at undefined@err{#1}%
 %    \end{macrocode}
 %
 %    \begin{macrocode}
      \else
-      \PackageError{inputenc}{Invalid UTF-8 byte sequence}\@eha
+      \PackageError{inputenc}{Invalid UTF-8 byte sequence}%
+                             \UTFviii at invalid@help
      \fi         
 %    \end{macrocode}
 %
@@ -324,11 +319,35 @@
 %    \end{macrocode}
 % \end{macro}
 %
-% \begin{macro}{\UTFviii at invalid}
+% \begin{macro}{\UTFviii at invalid@err}
+% \begin{macro}{\UTFviii at invalid@help}
 % \changes{v1.2a}{2018/03/24}{Macro added}%
 %    \begin{macrocode}
-\def\UTFviii at invalid#1{%
- \PackageError{inputenc}{Invalid UTF-8 byte \number`#1}\@eha}
+\def\UTFviii at invalid@err#1{%
+ \PackageError{inputenc}{Invalid UTF-8 byte \number`#1}%
+                        \UTFviii at invalid@help}
+%    \end{macrocode}
+%
+%    \begin{macrocode}
+\def\UTFviii at invalid@help{%
+   The document does not appear to be in UTF-8 encoding.\MessageBreak
+   Try specifying \noexpand\UseRawInputEncoding or\MessageBreak
+   \noexpand\usepackage[latin1]{inputenc}}%
+%    \end{macrocode}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}{\UTFviii at undefined@err}
+% \changes{v1.2a}{2018/03/24}{Macro added}%
+%    \begin{macrocode}
+\def\UTFviii at undefined@err#1{%
+  \PackageError{inputenc}{Unicode character \expandafter
+                          \UTFviii at splitcsname\string#1\relax
+                          \MessageBreak
+                          not set up for use with LaTeX}%
+                         {You may provide a definition with\MessageBreak
+                          \noexpand\DeclareUnicodeCharacter}%
+   }
 %    \end{macrocode}
 % \end{macro}
 %
@@ -365,9 +384,9 @@
 %
 %
 % \begin{macro}{\UTFviii at loop}
-%    This wonderful bit of code from Dr Carlisle defines the starting
-%    octets to call |\UTFviii at two@octets| etc as appropriate. The starting
-%    octet itself is passed directly as the first argument, the others
+%    This bit of code derived from \texttt{xmltex}  defines the active character
+%    correspnding to starting octets to call |\UTFviii at two@octets| etc as appropriate.
+%    The starting octet itself is passed directly as the first argument, the others
 %    are picked up later en route.
 %
 %    The |\UTFviii at loop| loops through the numbers starting at
@@ -377,10 +396,6 @@
 %    All this is done in a group so that temporary catcode changes
 %    etc.~vanish after everything is set up.
 %
-%    It may be a good idea to add code to deal with `illegal utf8 octets':
-%    at present these will be handled by whatever code was in use for 8-bit
-%    input before this code is executed.
-%
 %    \begin{macrocode}
 \begingroup
 \catcode`\~13
@@ -397,11 +412,34 @@
   \fi}
 %    \end{macrocode}
 %
+% Handle the single byte control characters.
+% \changes{v1.2a}{2018/03/24}{Loop over C0 controls added}%
+% C0 controls are valid UTF-8 but defined to give the ``Character not defined error''
+% They may be defined with |\DeclareUnicodeCharacter|.
+%    \begin{macrocode}
+    \def\UTFviii at tmp{\xdef~{\noexpand\UTFviii at undefined@err{:\string~}}}
+% 0 ^^@ null
+    \count@"1
+    \@tempcnta"9
+% 9 ^^I tab
+% 10 ^^J nl
+\UTFviii at loop
+    \count@"11
+    \@tempcnta"12
+\UTFviii at loop
+% 12 ^^L
+% 13 ^^M
+    \count@"14
+    \@tempcnta"20
+\UTFviii at loop
+%    \end{macrocode}
+%
+%
 % Bytes with leading bits |10| are not valid UTF-8 starting bytes
 %    \begin{macrocode}
     \count@"80
     \@tempcnta"C2
-    \def\UTFviii at tmp{\xdef~{\noexpand\UTFviii at invalid\string~}}
+    \def\UTFviii at tmp{\xdef~{\noexpand\UTFviii at invalid@err\string~}}
 \UTFviii at loop
 %    \end{macrocode}
 %
@@ -434,7 +472,7 @@
 %    \begin{macrocode}
     \count@"F4
     \@tempcnta"100
-    \def\UTFviii at tmp{\xdef~{\noexpand\UTFviii at invalid\string~}}
+    \def\UTFviii at tmp{\xdef~{\noexpand\UTFviii at invalid@err\string~}}
 \UTFviii at loop
 %    \end{macrocode}
 %
@@ -565,7 +603,7 @@
 %    \begin{macrocode}
 %  \begingroup
 %    \end{macrocode}
-%    The original code from David supported the convention that a
+%    The original code from \texttt{xmltex} supported the convention that a
 %    Unicode slot number could be given either as a decimal or as a
 %    hexadecimal (by starting with \texttt{x}).  We do not do this so
 %    this code is also removed.  This could be reactivated if one
@@ -578,18 +616,28 @@
 %    As |\count@| already contains the right value we make
 %    |\parse at XML@charref| work without arguments.
 % \changes{v1.1g}{2005/09/27}{Real spaces do not show up so use \cs{space}}
+% \changes{v1.2a}{2018/03/24}{Allow control characters if active}
+% In the case single byte UTF-8 sequences, only allw definition if
+% the character os already active.  The definition of |\UTFviii at tmp|
+% looks slightly strange but is designed for the sequence of |\expandafter|
+% in |\DeclareUnicodeCharacter|.
+% 
 %    \begin{macrocode}
   \ifnum\count@<"A0\relax
-     \PackageError{inputenc}{Cannot\space define\space Unicode\space
-                             char\space value\space <\space 00A0}\@eha
+    \ifnum\catcode\count@=13
+      \uccode`\~=\count@\uppercase{\def\UTFviii at tmp{\@empty\@empty~}}%
+    \else
+      \PackageError{inputenc}%
+                   {Cannot define non-active Unicode char value < 00A0}%
+                   \@eha
+      \def\UTFviii at tmp{\UTFviii at tmp}%
+    \fi
 %    \end{macrocode}
-%    Do not ask us to provide an explanation for the code below, it is
-%    borrowed straight from \texttt{xmltex} by David and we trust him
-%    totally (and we are too lazy to reread the Unicode book to see if
-%    this is the correct algorithm).\footnote{We were hoping to also
-%    find in his work the \TeX{} code for going the other way: from
-%    UTF-8 octets to Unicode slot number, but no luck!
-%    This has now been added as \cs{decode at UTFviii}}
+%    The code below is derived from \texttt{xmltex} and generates the UTF-8 byte sequence
+%    for the number in |\count@|.
+%
+%    The reverse operation (just used in error messages) 
+%    has now been added as \cs{decode at UTFviii}.
 %    \begin{macrocode}
   \else\ifnum\count@<"800\relax
      \parse at UTFviii@a,%
@@ -612,7 +660,7 @@
 % \end{macro}
 %
 % \begin{macro}{\parse at UTFviii@a}
-%    \ldots so somebody else can document this part :-) \ldots~David?:-))))!
+%    \ldots so somebody else can document this part :-)
 % \changes{v1.1b}{2004/02/09}{Space in the wrong place \cs{count @64}}
 %    \begin{macrocode}
 \gdef\parse at UTFviii@a#1{%
@@ -909,6 +957,9 @@
 %
 % \subsection{The mapping table}
 %
+% \begingroup
+% \hfuzz=12pt ^^A just because
+%
 % Note that the first argument must be a hex-digit number greater
 % than \texttt{00BF} and at most \texttt{10FFFF}.
 %
@@ -1553,6 +1604,7 @@
 %<all,t1>\DeclareUnicodeCharacter{1E21}{\@tabacckludge=g}
 %    \end{macrocode}
 % 
+% \endgroup
 % \subsection{Notes}
 %
 % \changes{v1.1e}{2004/05/22}{Added notes on inconsistency with `8-bit files'.}





More information about the latex3-commits mailing list