[latex3-commits] [git/LaTeX3-latex3-latex2e] master: update to default to UTF-8 see issue #24 (4090b0b)
David Carlisle
d.p.carlisle at gmail.com
Sun Mar 25 11:24:57 CEST 2018
Repository : https://github.com/latex3/latex2e
On branch : master
Link : https://github.com/latex3/latex2e/commit/4090b0bcbd2ddca0e0ab09f307939a6acf7f6908
>---------------------------------------------------------------
commit 4090b0bcbd2ddca0e0ab09f307939a6acf7f6908
Author: David Carlisle <d.p.carlisle at gmail.com>
Date: Sun Mar 25 10:24:57 2018 +0100
update to default to UTF-8 see issue #24
>---------------------------------------------------------------
4090b0bcbd2ddca0e0ab09f307939a6acf7f6908
base/changes.txt | 13 ++++
base/inputenc.dtx | 15 +++-
base/ltfinal.dtx | 69 ++++++++++++++++++
base/testfiles/tlb-utf8-dec-cp1252-2017.lvt | 18 +++++
.../{tlb2783.tlg => tlb-utf8-dec-cp1252-2017.tlg} | 46 ++++++------
base/testfiles/tlb-utf8-dec-cp1252.lvt | 17 +++++
.../{tlb2783.tlg => tlb-utf8-dec-cp1252.tlg} | 46 ++++++------
base/testfiles/tlb-utf8-dec-utf8-2017.lvt | 18 +++++
base/testfiles/tlb-utf8-dec-utf8-2017.tlg | 60 ++++++++++++++++
base/testfiles/tlb-utf8-dec-utf8.lvt | 17 +++++
.../{tlb2783.tlg => tlb-utf8-dec-utf8.tlg} | 46 ++++++------
base/testfiles/tlb-utf8-undec-cp1252-2017.lvt | 16 +++++
...{tlb2783.tlg => tlb-utf8-undec-cp1252-2017.tlg} | 46 ++++++------
base/testfiles/tlb-utf8-undec-cp1252.lvt | 15 ++++
base/testfiles/tlb-utf8-undec-cp1252.tlg | 74 ++++++++++++++++++++
base/testfiles/tlb-utf8-undec-utf8-2017.lvt | 16 +++++
base/testfiles/tlb-utf8-undec-utf8-2017.tlg | 60 ++++++++++++++++
base/testfiles/tlb-utf8-undec-utf8.lvt | 15 ++++
.../{tlb2783.tlg => tlb-utf8-undec-utf8.tlg} | 46 ++++++------
base/utf8ienc.dtx | 42 +++++++++--
20 files changed, 572 insertions(+), 123 deletions(-)
diff --git a/base/changes.txt b/base/changes.txt
index 376d133..09d1f0f 100644
--- a/base/changes.txt
+++ b/base/changes.txt
@@ -4,6 +4,19 @@ completeness or accuracy and it contains some references to files that
are not part of the distribution.
=======================================================================
+#########################
+# 2018-04-01 Release
+#########################
+
+
+2018-03-24 David Carlisle <latex-bugs at latex-project.org>
+
+ * utf8ienc.dtx: Add \UTFviii at invalid
+
+ * inputenc.dtx: Make \inputencoding{..} do nothing if it specified the current encoding.
+
+ * ltfinal.dtx: Default to UTF-8 input encoding
+
2018-03-24 Frank Mittelbach <Frank.Mittelbach at latex-project.org>
* ltclass.dtx: Package/class rollback mechanism: use full file
diff --git a/base/inputenc.dtx b/base/inputenc.dtx
index 1987850..9842c60 100644
--- a/base/inputenc.dtx
+++ b/base/inputenc.dtx
@@ -1,6 +1,6 @@
% \iffalse meta-comment
%
-% Copyright 1993-2016
+% Copyright 1993-2018
% LaTeX3 Project and any individual authors listed elsewhere
% in this file.
%
@@ -308,7 +308,7 @@
%
% \subsubsection{\normalfont\ttfamily Keyboard character used is undefined in
% inputencoding `\meta{name}'}
-
+%
% The document contains an 8-bit character that is not defined by the
% current input encoding in force. This means that either there is a
% mismatch between the document encoding that the document claims it
@@ -410,7 +410,7 @@
%<cp1252&!ansinew> \ProvidesFile{cp1252.def}
%<cp1250> \ProvidesFile{cp1250.def}
%<cp1257> \ProvidesFile{cp1257.def}
- [2015/03/17 v1.2c Input encoding file]
+ [2018/03/24 v1.3a Input encoding file]
%<cp850>%%
%<cp850>%% If you need a Euro symbol, try cp858 instead.
%<cp850>%%
@@ -534,6 +534,8 @@
% \changes{v0.95}{1997/12/20}{Changed to work in any vmode, for David}
% \changes{v1.2a}{2014/04/20}{Added alternative definition for xe/lua tex (pr/4024)}%
% \changes{v1.2b}{2014/04/30}{Allow (x-)ascii and utf8x aliases for utf8 in xetex/luatex}%
+% \changes{v1.3a}{2018/03/24}{Make \cs{inputencoding} do nothing if
+% the argument matches current encoding}%
% This sets the encoding to be |#1|. It first sets all the
% characters 128--255 to be active (and sets their initial
% definition to be |\@inpenc at undefined|).
@@ -549,6 +551,9 @@
%
% \begin{macrocode}
\def\inputencoding#1{%
+ \edef\reserved at a{#1}%
+ \ifx\reserved at a\inputencodingname
+ \else
% \end{macrocode}
% We start with a hook to be executed before the encoding change
% happens.
@@ -631,6 +636,10 @@
% \changes{v1.1a}{2006/02/22}{Added \cs{inpenc at posthook}}
% \begin{macrocode}
\the\inpenc at posthook
+% \end{macrocode}
+%
+% \begin{macrocode}
+ \fi
}
% \end{macrocode}
%
diff --git a/base/ltfinal.dtx b/base/ltfinal.dtx
index baadd44..f045d2e 100644
--- a/base/ltfinal.dtx
+++ b/base/ltfinal.dtx
@@ -541,6 +541,75 @@
%
% \subsection{Input encoding}
%
+% Starting with the 2018 \LaTeX\ release default the inputencoding
+% to UTF-8. Unless the format is being used with luatex or xetex or enctex.
+%
+% This is done in a way largely compatible with older releases:\\
+% |\usepackage[utf8]{inputenc}|\\
+% is used in the format, but (similar to |fontenc|, the package load information
+% is undefined so that teh encoding may be reset in the document usimg\\
+% |\usepackage[latin1]{inputenc}|\\
+% or whatever encofing is needed.
+%
+% If a document re-specifies UTF-8 this is silently ignored.
+%
+% \begin{macrocode}
+%</2ekernel>
+%<*2ekernel|latexrelease>
+% \end{macrocode}
+%
+% \begin{macrocode}
+\ifnum\@ne=
+\ifx\Umathchar\@undefined\ifx\mubyte\@undefined\@ne\else\z@\fi\else\z@\fi
+% \end{macrocode}
+%
+% \begin{macrocode}
+%<latexrelease>\IncludeInRelease{2018/04/01}%
+%<latexrelease> {\UTFviii at invalid}{UTF-8 default}%
+% \end{macrocode}
+%
+% \begin{macrocode}
+\RequirePackage[utf8]{inputenc}
+\expandafter\let\csname ver at inputenc.sty\endcsname\relax
+\expandafter\let\csname opt at inputenc.sty\endcsname\relax
+%</2ekernel|latexrelease>
+%<latexrelease>\EndIncludeInRelease
+%<latexrelease>\IncludeInRelease{0000/00/00}%
+%<latexrelease> {\UTFviii at invalid}{UTF-8 default}%
+% \end{macrocode}
+%
+% \begin{macrocode}
+%<latexrelease>\@tempcnta=128
+%<latexrelease>\loop
+%<latexrelease>\catcode\@tempcnta=12
+%<latexrelease>\advance\@tempcnta\@ne
+%<latexrelease>\ifnum\@tempcnta<256
+%<latexrelease>\repeat
+%<latexrelease>\def\DeclareFontEncoding@#1#2#3{%
+%<latexrelease> \expandafter
+%<latexrelease> \ifx\csname T@#1\endcsname\relax
+%<latexrelease> \def\cdp at elt{\noexpand\cdp at elt}%
+%<latexrelease> \xdef\cdp at list{\cdp at list\cdp at elt{#1}%
+%<latexrelease> {\default at family}{\default at series}%
+%<latexrelease> {\default at shape}}%
+%<latexrelease> \expandafter\let\csname#1-cmd\endcsname\@changed at cmd
+%<latexrelease> \else
+%<latexrelease> \@font at info{Redeclaring font encoding #1}%
+%<latexrelease> \fi
+%<latexrelease> \global\@namedef{T@#1}{#2}%
+%<latexrelease> \global\@namedef{M@#1}{\default at M#3}%
+%<latexrelease> \xdef\LastDeclaredEncoding{#1}%
+%<latexrelease> }
+%<latexrelease>\EndIncludeInRelease
+% \end{macrocode}
+%
+% \begin{macrocode}
+%<*2ekernel|latexrelease>
+\fi
+%</2ekernel|latexrelease>
+%<*2ekernel>
+% \begin{macrocode}
+%
% We temporarily define |\reserved at a| to apply |\reserved at c| to all the
% numbers in the range of its arguments.
% \begin{macrocode}
diff --git a/base/testfiles/tlb-utf8-dec-cp1252-2017.lvt b/base/testfiles/tlb-utf8-dec-cp1252-2017.lvt
new file mode 100644
index 0000000..aef0262
--- /dev/null
+++ b/base/testfiles/tlb-utf8-dec-cp1252-2017.lvt
@@ -0,0 +1,18 @@
+\input{test2e}
+\RequirePackage[2017-12-31]{latexrelease}
+% declared cp1252, 2017 release
+\documentclass{article}
+\usepackage[T1]{fontenc}
+
+\usepackage[cp1252]{inputenc}
+
+\begin{document}
+
+\START
+\showoutput
+
+a\"O ``x'' \ss
+
+aÖ x ß
+
+\end{document}
diff --git a/base/testfiles/tlb2783.tlg b/base/testfiles/tlb-utf8-dec-cp1252-2017.tlg
similarity index 54%
copy from base/testfiles/tlb2783.tlg
copy to base/testfiles/tlb-utf8-dec-cp1252-2017.tlg
index 9bbb72d..5e63cb4 100644
--- a/base/testfiles/tlb2783.tlg
+++ b/base/testfiles/tlb-utf8-dec-cp1252-2017.tlg
@@ -1,11 +1,5 @@
This is a generated file for the LaTeX2e validation system.
Don't change this file in any respect.
-> \textdollar=macro:
-->\T1-cmd \textdollar \T1\textdollar .
-l. ...\show\textdollar
-> \textperthousand=macro:
-->\T1-cmd \textperthousand \T1\textperthousand .
-l. ...\show\textperthousand
Completed box being shipped out [1]
\vbox(633.0+0.0)x407.0
.\glue 16.0
@@ -17,31 +11,37 @@ Completed box being shipped out [1]
..\glue(\lineskip) 0.0
..\vbox(550.0+0.0)x345.0, glue set 527.9436fil
...\write-{}
-...\glue(\topskip) 2.50183
-...\hbox(7.49817+0.55542)x345.0, glue set 290.28748fil
+...\glue(\topskip) 1.08551
+...\hbox(8.91449+0.0)x345.0, glue set 292.78687fil
....\hbox(0.0+0.0)x15.0
-....\T1/cmr/m/n/10 $
-....\T1/cmr/m/n/10 ^^bf
-....\T1/cmr/m/n/10 %
-....\T1/cmr/m/n/10 ^^X
-....\T1/cmr/m/n/10 %
-....\T1/cmr/m/n/10 ^^X
-....\T1/cmr/m/n/10 ^^X
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 ^^d6
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^P (ligature ``)
+....\T1/cmr/m/n/10 x
+....\T1/cmr/m/n/10 ^^Q (ligature '')
+....\kern0.83313
+....\glue 3.33252 plus 1.66626 minus 1.11084
+....\T1/cmr/m/n/10 ^^ff
....\penalty 10000
....\glue(\parfillskip) 0.0 plus 1.0fil
....\glue(\rightskip) 0.0
...\glue(\parskip) 0.0 plus 1.0
-...\glue(\baselineskip) 3.94641
-...\hbox(7.49817+0.55542)x345.0, glue set 291.82414fil
+...\glue(\baselineskip) 3.08551
+...\hbox(8.91449+0.0)x345.0, glue set 292.78687fil
....\hbox(0.0+0.0)x15.0
-....\TS1/cmr/m/n/10 $
-....\TS1/cmr/m/n/10 ^^a3
-....\TS1/cmr/m/n/10 ^^87
-....\TS1/cmr/m/n/10 ^^98
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 ^^d6
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^P
+....\T1/cmr/m/n/10 x
+....\T1/cmr/m/n/10 ^^Q
+....\kern0.83313
+....\glue 3.33252 plus 1.66626 minus 1.11084
+....\T1/cmr/m/n/10 ^^ff
....\penalty 10000
....\glue(\parfillskip) 0.0 plus 1.0fil
....\glue(\rightskip) 0.0
-...\glue -0.55542
...\glue 0.0 plus 1.0fil
...\glue 0.0
...\glue 0.0 plus 0.0001fil
@@ -50,4 +50,4 @@ Completed box being shipped out [1]
...\glue 0.0 plus 1.0fil
...\T1/cmr/m/n/10 1
...\glue 0.0 plus 1.0fil
-(tlb2783.aux)
+(tlb-utf8-dec-cp1252-2017.aux)
diff --git a/base/testfiles/tlb-utf8-dec-cp1252.lvt b/base/testfiles/tlb-utf8-dec-cp1252.lvt
new file mode 100644
index 0000000..22e1c53
--- /dev/null
+++ b/base/testfiles/tlb-utf8-dec-cp1252.lvt
@@ -0,0 +1,17 @@
+\input{test2e}
+% declared cp1252
+\documentclass{article}
+\usepackage[T1]{fontenc}
+
+\usepackage[cp1252]{inputenc}
+
+\begin{document}
+
+\START
+\showoutput
+
+a\"O ``x'' \ss
+
+aÖ x ß
+
+\end{document}
diff --git a/base/testfiles/tlb2783.tlg b/base/testfiles/tlb-utf8-dec-cp1252.tlg
similarity index 55%
copy from base/testfiles/tlb2783.tlg
copy to base/testfiles/tlb-utf8-dec-cp1252.tlg
index 9bbb72d..467aa24 100644
--- a/base/testfiles/tlb2783.tlg
+++ b/base/testfiles/tlb-utf8-dec-cp1252.tlg
@@ -1,11 +1,5 @@
This is a generated file for the LaTeX2e validation system.
Don't change this file in any respect.
-> \textdollar=macro:
-->\T1-cmd \textdollar \T1\textdollar .
-l. ...\show\textdollar
-> \textperthousand=macro:
-->\T1-cmd \textperthousand \T1\textperthousand .
-l. ...\show\textperthousand
Completed box being shipped out [1]
\vbox(633.0+0.0)x407.0
.\glue 16.0
@@ -17,31 +11,37 @@ Completed box being shipped out [1]
..\glue(\lineskip) 0.0
..\vbox(550.0+0.0)x345.0, glue set 527.9436fil
...\write-{}
-...\glue(\topskip) 2.50183
-...\hbox(7.49817+0.55542)x345.0, glue set 290.28748fil
+...\glue(\topskip) 1.08551
+...\hbox(8.91449+0.0)x345.0, glue set 292.78687fil
....\hbox(0.0+0.0)x15.0
-....\T1/cmr/m/n/10 $
-....\T1/cmr/m/n/10 ^^bf
-....\T1/cmr/m/n/10 %
-....\T1/cmr/m/n/10 ^^X
-....\T1/cmr/m/n/10 %
-....\T1/cmr/m/n/10 ^^X
-....\T1/cmr/m/n/10 ^^X
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 ^^d6
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^P (ligature ``)
+....\T1/cmr/m/n/10 x
+....\T1/cmr/m/n/10 ^^Q (ligature '')
+....\kern0.83313
+....\glue 3.33252 plus 1.66626 minus 1.11084
+....\T1/cmr/m/n/10 ^^ff
....\penalty 10000
....\glue(\parfillskip) 0.0 plus 1.0fil
....\glue(\rightskip) 0.0
...\glue(\parskip) 0.0 plus 1.0
-...\glue(\baselineskip) 3.94641
-...\hbox(7.49817+0.55542)x345.0, glue set 291.82414fil
+...\glue(\baselineskip) 3.08551
+...\hbox(8.91449+0.0)x345.0, glue set 292.78687fil
....\hbox(0.0+0.0)x15.0
-....\TS1/cmr/m/n/10 $
-....\TS1/cmr/m/n/10 ^^a3
-....\TS1/cmr/m/n/10 ^^87
-....\TS1/cmr/m/n/10 ^^98
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 ^^d6
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^P
+....\T1/cmr/m/n/10 x
+....\T1/cmr/m/n/10 ^^Q
+....\kern0.83313
+....\glue 3.33252 plus 1.66626 minus 1.11084
+....\T1/cmr/m/n/10 ^^ff
....\penalty 10000
....\glue(\parfillskip) 0.0 plus 1.0fil
....\glue(\rightskip) 0.0
-...\glue -0.55542
...\glue 0.0 plus 1.0fil
...\glue 0.0
...\glue 0.0 plus 0.0001fil
@@ -50,4 +50,4 @@ Completed box being shipped out [1]
...\glue 0.0 plus 1.0fil
...\T1/cmr/m/n/10 1
...\glue 0.0 plus 1.0fil
-(tlb2783.aux)
+(tlb-utf8-dec-cp1252.aux)
diff --git a/base/testfiles/tlb-utf8-dec-utf8-2017.lvt b/base/testfiles/tlb-utf8-dec-utf8-2017.lvt
new file mode 100644
index 0000000..f1ab319
--- /dev/null
+++ b/base/testfiles/tlb-utf8-dec-utf8-2017.lvt
@@ -0,0 +1,18 @@
+\input{test2e}
+\RequirePackage[2017-12-31]{latexrelease}
+% declared UTF-8, 2017 release
+\documentclass{article}
+\usepackage[T1]{fontenc}
+
+\usepackage[utf8]{inputenc}
+
+\begin{document}
+
+\START
+\showoutput
+
+a\"O ``x'' \ss
+
+aà âxâ Ã
+
+\end{document}
diff --git a/base/testfiles/tlb-utf8-dec-utf8-2017.tlg b/base/testfiles/tlb-utf8-dec-utf8-2017.tlg
new file mode 100644
index 0000000..b0973f9
--- /dev/null
+++ b/base/testfiles/tlb-utf8-dec-utf8-2017.tlg
@@ -0,0 +1,60 @@
+This is a generated file for the LaTeX2e validation system.
+Don't change this file in any respect.
+Completed box being shipped out [1]
+\vbox(633.0+0.0)x407.0
+.\glue 16.0
+.\vbox(617.0+0.0)x345.0, shifted 62.0
+..\vbox(12.0+0.0)x345.0, glue set 12.0fil
+...\glue 0.0 plus 1.0fil
+...\hbox(0.0+0.0)x345.0
+..\glue 25.0
+..\glue(\lineskip) 0.0
+..\vbox(550.0+0.0)x345.0, glue set 527.9436fil
+...\write-{}
+...\glue(\topskip) 1.08551
+...\hbox(8.91449+0.0)x345.0, glue set 292.78687fil
+....\hbox(0.0+0.0)x15.0
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 ^^d6
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^P (ligature ``)
+....\T1/cmr/m/n/10 x
+....\T1/cmr/m/n/10 ^^Q (ligature '')
+....\kern0.83313
+....\glue 3.33252 plus 1.66626 minus 1.11084
+....\T1/cmr/m/n/10 ^^ff
+....\penalty 10000
+....\glue(\parfillskip) 0.0 plus 1.0fil
+....\glue(\rightskip) 0.0
+...\glue(\parskip) 0.0 plus 1.0
+...\glue(\baselineskip) 3.08551
+...\hbox(8.91449+1.94397)x345.0, glue set 249.1864fil
+....\hbox(0.0+0.0)x15.0
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 ^^c3
+....\kern-0.27771
+....\T1/cmr/m/n/10 ^^96
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^e2
+....\T1/cmr/m/n/10 ^^80
+....\T1/cmr/m/n/10 ^^9c
+....\T1/cmr/m/n/10 x
+....\T1/cmr/m/n/10 ^^e2
+....\T1/cmr/m/n/10 ^^80
+....\T1/cmr/m/n/10 ^^9d
+....\glue 3.33252 plus 1.66626 minus 1.11084
+....\T1/cmr/m/n/10 ^^c3
+....\T1/cmr/m/n/10 ^^9f
+....\penalty 10000
+....\glue(\parfillskip) 0.0 plus 1.0fil
+....\glue(\rightskip) 0.0
+...\glue -1.94397
+...\glue 0.0 plus 1.0fil
+...\glue 0.0
+...\glue 0.0 plus 0.0001fil
+..\glue(\baselineskip) 23.5849
+..\hbox(6.4151+0.0)x345.0, glue set 170.00061fil
+...\glue 0.0 plus 1.0fil
+...\T1/cmr/m/n/10 1
+...\glue 0.0 plus 1.0fil
+(tlb-utf8-dec-utf8-2017.aux)
diff --git a/base/testfiles/tlb-utf8-dec-utf8.lvt b/base/testfiles/tlb-utf8-dec-utf8.lvt
new file mode 100644
index 0000000..3f79d21
--- /dev/null
+++ b/base/testfiles/tlb-utf8-dec-utf8.lvt
@@ -0,0 +1,17 @@
+\input{test2e}
+% declared UTF-8
+\documentclass{article}
+\usepackage[T1]{fontenc}
+
+\usepackage[utf8]{inputenc}
+
+\begin{document}
+
+\START
+\showoutput
+
+a\"O ``x'' \ss
+
+aà âxâ Ã
+
+\end{document}
diff --git a/base/testfiles/tlb2783.tlg b/base/testfiles/tlb-utf8-dec-utf8.tlg
similarity index 55%
copy from base/testfiles/tlb2783.tlg
copy to base/testfiles/tlb-utf8-dec-utf8.tlg
index 9bbb72d..633f0e3 100644
--- a/base/testfiles/tlb2783.tlg
+++ b/base/testfiles/tlb-utf8-dec-utf8.tlg
@@ -1,11 +1,5 @@
This is a generated file for the LaTeX2e validation system.
Don't change this file in any respect.
-> \textdollar=macro:
-->\T1-cmd \textdollar \T1\textdollar .
-l. ...\show\textdollar
-> \textperthousand=macro:
-->\T1-cmd \textperthousand \T1\textperthousand .
-l. ...\show\textperthousand
Completed box being shipped out [1]
\vbox(633.0+0.0)x407.0
.\glue 16.0
@@ -17,31 +11,37 @@ Completed box being shipped out [1]
..\glue(\lineskip) 0.0
..\vbox(550.0+0.0)x345.0, glue set 527.9436fil
...\write-{}
-...\glue(\topskip) 2.50183
-...\hbox(7.49817+0.55542)x345.0, glue set 290.28748fil
+...\glue(\topskip) 1.08551
+...\hbox(8.91449+0.0)x345.0, glue set 292.78687fil
....\hbox(0.0+0.0)x15.0
-....\T1/cmr/m/n/10 $
-....\T1/cmr/m/n/10 ^^bf
-....\T1/cmr/m/n/10 %
-....\T1/cmr/m/n/10 ^^X
-....\T1/cmr/m/n/10 %
-....\T1/cmr/m/n/10 ^^X
-....\T1/cmr/m/n/10 ^^X
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 ^^d6
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^P (ligature ``)
+....\T1/cmr/m/n/10 x
+....\T1/cmr/m/n/10 ^^Q (ligature '')
+....\kern0.83313
+....\glue 3.33252 plus 1.66626 minus 1.11084
+....\T1/cmr/m/n/10 ^^ff
....\penalty 10000
....\glue(\parfillskip) 0.0 plus 1.0fil
....\glue(\rightskip) 0.0
...\glue(\parskip) 0.0 plus 1.0
-...\glue(\baselineskip) 3.94641
-...\hbox(7.49817+0.55542)x345.0, glue set 291.82414fil
+...\glue(\baselineskip) 3.08551
+...\hbox(8.91449+0.0)x345.0, glue set 292.78687fil
....\hbox(0.0+0.0)x15.0
-....\TS1/cmr/m/n/10 $
-....\TS1/cmr/m/n/10 ^^a3
-....\TS1/cmr/m/n/10 ^^87
-....\TS1/cmr/m/n/10 ^^98
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 ^^d6
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^P
+....\T1/cmr/m/n/10 x
+....\T1/cmr/m/n/10 ^^Q
+....\kern0.83313
+....\glue 3.33252 plus 1.66626 minus 1.11084
+....\T1/cmr/m/n/10 ^^ff
....\penalty 10000
....\glue(\parfillskip) 0.0 plus 1.0fil
....\glue(\rightskip) 0.0
-...\glue -0.55542
...\glue 0.0 plus 1.0fil
...\glue 0.0
...\glue 0.0 plus 0.0001fil
@@ -50,4 +50,4 @@ Completed box being shipped out [1]
...\glue 0.0 plus 1.0fil
...\T1/cmr/m/n/10 1
...\glue 0.0 plus 1.0fil
-(tlb2783.aux)
+(tlb-utf8-dec-utf8.aux)
diff --git a/base/testfiles/tlb-utf8-undec-cp1252-2017.lvt b/base/testfiles/tlb-utf8-undec-cp1252-2017.lvt
new file mode 100644
index 0000000..371665a
--- /dev/null
+++ b/base/testfiles/tlb-utf8-undec-cp1252-2017.lvt
@@ -0,0 +1,16 @@
+\input{test2e}
+\RequirePackage[2017-12-31]{latexrelease}
+% undeclared cp1252, 2017 release
+\documentclass{article}
+\usepackage[T1]{fontenc}
+
+\begin{document}
+
+\START
+\showoutput
+
+a\"O ``x'' \ss
+
+aÖ x ß
+
+\end{document}
diff --git a/base/testfiles/tlb2783.tlg b/base/testfiles/tlb-utf8-undec-cp1252-2017.tlg
similarity index 54%
copy from base/testfiles/tlb2783.tlg
copy to base/testfiles/tlb-utf8-undec-cp1252-2017.tlg
index 9bbb72d..2a55876 100644
--- a/base/testfiles/tlb2783.tlg
+++ b/base/testfiles/tlb-utf8-undec-cp1252-2017.tlg
@@ -1,11 +1,5 @@
This is a generated file for the LaTeX2e validation system.
Don't change this file in any respect.
-> \textdollar=macro:
-->\T1-cmd \textdollar \T1\textdollar .
-l. ...\show\textdollar
-> \textperthousand=macro:
-->\T1-cmd \textperthousand \T1\textperthousand .
-l. ...\show\textperthousand
Completed box being shipped out [1]
\vbox(633.0+0.0)x407.0
.\glue 16.0
@@ -17,31 +11,37 @@ Completed box being shipped out [1]
..\glue(\lineskip) 0.0
..\vbox(550.0+0.0)x345.0, glue set 527.9436fil
...\write-{}
-...\glue(\topskip) 2.50183
-...\hbox(7.49817+0.55542)x345.0, glue set 290.28748fil
+...\glue(\topskip) 1.08551
+...\hbox(8.91449+0.0)x345.0, glue set 292.78687fil
....\hbox(0.0+0.0)x15.0
-....\T1/cmr/m/n/10 $
-....\T1/cmr/m/n/10 ^^bf
-....\T1/cmr/m/n/10 %
-....\T1/cmr/m/n/10 ^^X
-....\T1/cmr/m/n/10 %
-....\T1/cmr/m/n/10 ^^X
-....\T1/cmr/m/n/10 ^^X
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 ^^d6
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^P (ligature ``)
+....\T1/cmr/m/n/10 x
+....\T1/cmr/m/n/10 ^^Q (ligature '')
+....\kern0.83313
+....\glue 3.33252 plus 1.66626 minus 1.11084
+....\T1/cmr/m/n/10 ^^ff
....\penalty 10000
....\glue(\parfillskip) 0.0 plus 1.0fil
....\glue(\rightskip) 0.0
...\glue(\parskip) 0.0 plus 1.0
-...\glue(\baselineskip) 3.94641
-...\hbox(7.49817+0.55542)x345.0, glue set 291.82414fil
+...\glue(\baselineskip) 3.08551
+...\hbox(8.91449+1.70097)x345.0, glue set 281.40076fil
....\hbox(0.0+0.0)x15.0
-....\TS1/cmr/m/n/10 $
-....\TS1/cmr/m/n/10 ^^a3
-....\TS1/cmr/m/n/10 ^^87
-....\TS1/cmr/m/n/10 ^^98
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 ^^d6
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^93
+....\T1/cmr/m/n/10 x
+....\T1/cmr/m/n/10 ^^94
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^df
....\penalty 10000
....\glue(\parfillskip) 0.0 plus 1.0fil
....\glue(\rightskip) 0.0
-...\glue -0.55542
+...\glue -1.70097
...\glue 0.0 plus 1.0fil
...\glue 0.0
...\glue 0.0 plus 0.0001fil
@@ -50,4 +50,4 @@ Completed box being shipped out [1]
...\glue 0.0 plus 1.0fil
...\T1/cmr/m/n/10 1
...\glue 0.0 plus 1.0fil
-(tlb2783.aux)
+(tlb-utf8-undec-cp1252-2017.aux)
diff --git a/base/testfiles/tlb-utf8-undec-cp1252.lvt b/base/testfiles/tlb-utf8-undec-cp1252.lvt
new file mode 100644
index 0000000..a87259a
--- /dev/null
+++ b/base/testfiles/tlb-utf8-undec-cp1252.lvt
@@ -0,0 +1,15 @@
+\input{test2e}
+% undeclared cp1252
+\documentclass{article}
+\usepackage[T1]{fontenc}
+
+\begin{document}
+
+\START
+\showoutput
+
+a\"O ``x'' \ss
+
+aÖ x ß
+
+\end{document}
diff --git a/base/testfiles/tlb-utf8-undec-cp1252.tlg b/base/testfiles/tlb-utf8-undec-cp1252.tlg
new file mode 100644
index 0000000..9a34996
--- /dev/null
+++ b/base/testfiles/tlb-utf8-undec-cp1252.tlg
@@ -0,0 +1,74 @@
+This is a generated file for the LaTeX2e validation system.
+Don't change this file in any respect.
+! Package inputenc Error: Unicode char ^^d6^^93 (U+593)
+(inputenc) not set up for use with LaTeX.
+See the inputenc package documentation for explanation.
+Type H <return> for immediate help.
+ ...
+l. ...a^^d6 ^^93
+ x^^94 ^^df
+Your command was ignored.
+Type I <command> <return> to replace it with another command,
+or <return> to continue without it.
+! Package inputenc Error: Invalid UTF-8 byte 148.
+See the inputenc package documentation for explanation.
+Type H <return> for immediate help.
+ ...
+l. ...a^^d6 ^^93x^^94
+ ^^df
+Your command was ignored.
+Type I <command> <return> to replace it with another command,
+or <return> to continue without it.
+! Package inputenc Error: Unicode char ^^df\par (U+1BE)
+(inputenc) not set up for use with LaTeX.
+See the inputenc package documentation for explanation.
+Type H <return> for immediate help.
+ ...
+l. ...
+Your command was ignored.
+Type I <command> <return> to replace it with another command,
+or <return> to continue without it.
+Completed box being shipped out [1]
+\vbox(633.0+0.0)x407.0
+.\glue 16.0
+.\vbox(617.0+0.0)x345.0, shifted 62.0
+..\vbox(12.0+0.0)x345.0, glue set 12.0fil
+...\glue 0.0 plus 1.0fil
+...\hbox(0.0+0.0)x345.0
+..\glue 25.0
+..\glue(\lineskip) 0.0
+..\vbox(550.0+0.0)x345.0, glue set 527.9436fil
+...\write-{}
+...\glue(\topskip) 1.08551
+...\hbox(8.91449+0.0)x345.0, glue set 292.78687fil
+....\hbox(0.0+0.0)x15.0
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 ^^d6
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^P (ligature ``)
+....\T1/cmr/m/n/10 x
+....\T1/cmr/m/n/10 ^^Q (ligature '')
+....\kern0.83313
+....\glue 3.33252 plus 1.66626 minus 1.11084
+....\T1/cmr/m/n/10 ^^ff
+....\penalty 10000
+....\glue(\parfillskip) 0.0 plus 1.0fil
+....\glue(\rightskip) 0.0
+...\glue(\parskip) 0.0 plus 1.0
+...\glue(\baselineskip) 7.6955
+...\hbox(4.3045+0.0)x345.0, glue set 319.72473fil
+....\hbox(0.0+0.0)x15.0
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 x
+....\penalty 10000
+....\glue(\parfillskip) 0.0 plus 1.0fil
+....\glue(\rightskip) 0.0
+...\glue 0.0 plus 1.0fil
+...\glue 0.0
+...\glue 0.0 plus 0.0001fil
+..\glue(\baselineskip) 23.5849
+..\hbox(6.4151+0.0)x345.0, glue set 170.00061fil
+...\glue 0.0 plus 1.0fil
+...\T1/cmr/m/n/10 1
+...\glue 0.0 plus 1.0fil
+(tlb-utf8-undec-cp1252.aux)
diff --git a/base/testfiles/tlb-utf8-undec-utf8-2017.lvt b/base/testfiles/tlb-utf8-undec-utf8-2017.lvt
new file mode 100644
index 0000000..07bccf3
--- /dev/null
+++ b/base/testfiles/tlb-utf8-undec-utf8-2017.lvt
@@ -0,0 +1,16 @@
+\input{test2e}
+\RequirePackage[2017-12-31]{latexrelease}
+% undeclared UTF-8, 2017 release
+\documentclass{article}
+\usepackage[T1]{fontenc}
+
+\begin{document}
+
+\START
+\showoutput
+
+a\"O ``x'' \ss
+
+aà âxâ Ã
+
+\end{document}
diff --git a/base/testfiles/tlb-utf8-undec-utf8-2017.tlg b/base/testfiles/tlb-utf8-undec-utf8-2017.tlg
new file mode 100644
index 0000000..47baaaa
--- /dev/null
+++ b/base/testfiles/tlb-utf8-undec-utf8-2017.tlg
@@ -0,0 +1,60 @@
+This is a generated file for the LaTeX2e validation system.
+Don't change this file in any respect.
+Completed box being shipped out [1]
+\vbox(633.0+0.0)x407.0
+.\glue 16.0
+.\vbox(617.0+0.0)x345.0, shifted 62.0
+..\vbox(12.0+0.0)x345.0, glue set 12.0fil
+...\glue 0.0 plus 1.0fil
+...\hbox(0.0+0.0)x345.0
+..\glue 25.0
+..\glue(\lineskip) 0.0
+..\vbox(550.0+0.0)x345.0, glue set 527.9436fil
+...\write-{}
+...\glue(\topskip) 1.08551
+...\hbox(8.91449+0.0)x345.0, glue set 292.78687fil
+....\hbox(0.0+0.0)x15.0
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 ^^d6
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^P (ligature ``)
+....\T1/cmr/m/n/10 x
+....\T1/cmr/m/n/10 ^^Q (ligature '')
+....\kern0.83313
+....\glue 3.33252 plus 1.66626 minus 1.11084
+....\T1/cmr/m/n/10 ^^ff
+....\penalty 10000
+....\glue(\parfillskip) 0.0 plus 1.0fil
+....\glue(\rightskip) 0.0
+...\glue(\parskip) 0.0 plus 1.0
+...\glue(\baselineskip) 3.08551
+...\hbox(8.91449+1.94397)x345.0, glue set 249.1864fil
+....\hbox(0.0+0.0)x15.0
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 ^^c3
+....\kern-0.27771
+....\T1/cmr/m/n/10 ^^96
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^e2
+....\T1/cmr/m/n/10 ^^80
+....\T1/cmr/m/n/10 ^^9c
+....\T1/cmr/m/n/10 x
+....\T1/cmr/m/n/10 ^^e2
+....\T1/cmr/m/n/10 ^^80
+....\T1/cmr/m/n/10 ^^9d
+....\glue 3.33252 plus 1.66626 minus 1.11084
+....\T1/cmr/m/n/10 ^^c3
+....\T1/cmr/m/n/10 ^^9f
+....\penalty 10000
+....\glue(\parfillskip) 0.0 plus 1.0fil
+....\glue(\rightskip) 0.0
+...\glue -1.94397
+...\glue 0.0 plus 1.0fil
+...\glue 0.0
+...\glue 0.0 plus 0.0001fil
+..\glue(\baselineskip) 23.5849
+..\hbox(6.4151+0.0)x345.0, glue set 170.00061fil
+...\glue 0.0 plus 1.0fil
+...\T1/cmr/m/n/10 1
+...\glue 0.0 plus 1.0fil
+(tlb-utf8-undec-utf8-2017.aux)
diff --git a/base/testfiles/tlb-utf8-undec-utf8.lvt b/base/testfiles/tlb-utf8-undec-utf8.lvt
new file mode 100644
index 0000000..163a966
--- /dev/null
+++ b/base/testfiles/tlb-utf8-undec-utf8.lvt
@@ -0,0 +1,15 @@
+\input{test2e}
+% undeclared UTF-8
+\documentclass{article}
+\usepackage[T1]{fontenc}
+
+\begin{document}
+
+\START
+\showoutput
+
+a\"O ``x'' \ss
+
+aà âxâ Ã
+
+\end{document}
diff --git a/base/testfiles/tlb2783.tlg b/base/testfiles/tlb-utf8-undec-utf8.tlg
similarity index 55%
copy from base/testfiles/tlb2783.tlg
copy to base/testfiles/tlb-utf8-undec-utf8.tlg
index 9bbb72d..e20050c 100644
--- a/base/testfiles/tlb2783.tlg
+++ b/base/testfiles/tlb-utf8-undec-utf8.tlg
@@ -1,11 +1,5 @@
This is a generated file for the LaTeX2e validation system.
Don't change this file in any respect.
-> \textdollar=macro:
-->\T1-cmd \textdollar \T1\textdollar .
-l. ...\show\textdollar
-> \textperthousand=macro:
-->\T1-cmd \textperthousand \T1\textperthousand .
-l. ...\show\textperthousand
Completed box being shipped out [1]
\vbox(633.0+0.0)x407.0
.\glue 16.0
@@ -17,31 +11,37 @@ Completed box being shipped out [1]
..\glue(\lineskip) 0.0
..\vbox(550.0+0.0)x345.0, glue set 527.9436fil
...\write-{}
-...\glue(\topskip) 2.50183
-...\hbox(7.49817+0.55542)x345.0, glue set 290.28748fil
+...\glue(\topskip) 1.08551
+...\hbox(8.91449+0.0)x345.0, glue set 292.78687fil
....\hbox(0.0+0.0)x15.0
-....\T1/cmr/m/n/10 $
-....\T1/cmr/m/n/10 ^^bf
-....\T1/cmr/m/n/10 %
-....\T1/cmr/m/n/10 ^^X
-....\T1/cmr/m/n/10 %
-....\T1/cmr/m/n/10 ^^X
-....\T1/cmr/m/n/10 ^^X
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 ^^d6
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^P (ligature ``)
+....\T1/cmr/m/n/10 x
+....\T1/cmr/m/n/10 ^^Q (ligature '')
+....\kern0.83313
+....\glue 3.33252 plus 1.66626 minus 1.11084
+....\T1/cmr/m/n/10 ^^ff
....\penalty 10000
....\glue(\parfillskip) 0.0 plus 1.0fil
....\glue(\rightskip) 0.0
...\glue(\parskip) 0.0 plus 1.0
-...\glue(\baselineskip) 3.94641
-...\hbox(7.49817+0.55542)x345.0, glue set 291.82414fil
+...\glue(\baselineskip) 3.08551
+...\hbox(8.91449+0.0)x345.0, glue set 292.78687fil
....\hbox(0.0+0.0)x15.0
-....\TS1/cmr/m/n/10 $
-....\TS1/cmr/m/n/10 ^^a3
-....\TS1/cmr/m/n/10 ^^87
-....\TS1/cmr/m/n/10 ^^98
+....\T1/cmr/m/n/10 a
+....\T1/cmr/m/n/10 ^^d6
+....\glue 3.33252 plus 1.66458 minus 1.11194
+....\T1/cmr/m/n/10 ^^P
+....\T1/cmr/m/n/10 x
+....\T1/cmr/m/n/10 ^^Q
+....\kern0.83313
+....\glue 3.33252 plus 1.66626 minus 1.11084
+....\T1/cmr/m/n/10 ^^ff
....\penalty 10000
....\glue(\parfillskip) 0.0 plus 1.0fil
....\glue(\rightskip) 0.0
-...\glue -0.55542
...\glue 0.0 plus 1.0fil
...\glue 0.0
...\glue 0.0 plus 0.0001fil
@@ -50,4 +50,4 @@ Completed box being shipped out [1]
...\glue 0.0 plus 1.0fil
...\T1/cmr/m/n/10 1
...\glue 0.0 plus 1.0fil
-(tlb2783.aux)
+(tlb-utf8-undec-utf8.aux)
diff --git a/base/utf8ienc.dtx b/base/utf8ienc.dtx
index 00c6433..ada847c 100644
--- a/base/utf8ienc.dtx
+++ b/base/utf8ienc.dtx
@@ -1,6 +1,6 @@
% \iffalse meta-comment
%
-% Copyright 1993-2017
+% Copyright 1993-2018
% The LaTeX3 Project and any individual authors listed elsewhere
% in this file.
%
@@ -218,7 +218,7 @@
%<+ts1> \ProvidesFile{ts1enc.dfu}
%<+x2> \ProvidesFile{x2enc.dfu}
%<+all> \ProvidesFile{utf8enc.dfu}
- [2017/01/28 v1.1t UTF-8 support for inputenc]
+ [2018/04/24 v1.2a UTF-8 support for inputenc]
% \end{macrocode}
%
% \begin{macrocode}
@@ -241,6 +241,7 @@
% \begin{macro}{\UTFviii at two@octets}
% \begin{macro}{\UTFviii at three@octets}
% \begin{macro}{\UTFviii at four@octets}
+% \changes{v1.2a}{2018/03/24}{Macros made `\cs{long} for improved error messages}%
% A UTF-8 char (that is not actually a 7-bit char, i.e.~a single
% octet) is parsed as follows: each starting octet is an active
% \TeX{} character token; each of these is defined below to be a
@@ -263,19 +264,19 @@
% defined we simply execute the thing (which should then expand to
% an encoding specific internal \LaTeX{} form).
% \begin{macrocode}
-\def\UTFviii at two@octets#1#2{\expandafter
+\long\def\UTFviii at two@octets#1#2{\expandafter
\UTFviii at defined\csname u8:#1\string#2\endcsname}
% \end{macrocode}
% \end{macro}
%
% \begin{macrocode}
-\def\UTFviii at three@octets#1#2#3{\expandafter
+\long\def\UTFviii at three@octets#1#2#3{\expandafter
\UTFviii at defined\csname u8:#1\string#2\string#3\endcsname}
% \end{macrocode}
% \end{macro}
%
% \begin{macrocode}
-\def\UTFviii at four@octets#1#2#3#4{\expandafter
+\long\def\UTFviii at four@octets#1#2#3#4{\expandafter
\UTFviii at defined\csname u8:#1\string#2\string#3\string#4\endcsname}
% \end{macrocode}
% \end{macro}
@@ -308,6 +309,15 @@
% \end{macrocode}
% \end{macro}
%
+% \begin{macro}{\def\UTFviii at invalid}
+% \changes{v1.2a}{2018/03/24}{Macro added}%
+% \begin{macrocode}
+\def\UTFviii at invalid#1{%
+ \PackageError{inputenc}{Invalid UTF-8 byte \number`#1}\@eha}
+% \end{macrocode}
+% \end{macro}
+%
+%
% \begin{macro}{\UTFviii at loop}
% This wonderful bit of code from Dr Carlisle defines the starting
% octets to call |\UTFviii at two@octets| etc as appropriate. The starting
@@ -341,6 +351,14 @@
\fi}
% \end{macrocode}
%
+% Bytes with leading bits |10| are not valid UTF-8 starting bytes
+% \begin{macrocode}
+ \count@"80
+ \@tempcnta"C2
+ \def\UTFviii at tmp{\xdef~{\noexpand\UTFviii at invalid\string~}}
+\UTFviii at loop
+% \end{macrocode}
+%
% Setting up 2-byte UTF-8:
% \begin{macrocode}
\count@"C2
@@ -348,6 +366,7 @@
\def\UTFviii at tmp{\xdef~{\noexpand\UTFviii at two@octets\string~}}
\UTFviii at loop
% \end{macrocode}
+%
% Setting up 3-byte UTF-8:
% \begin{macrocode}
\count@"E0
@@ -362,8 +381,21 @@
\@tempcnta"F4
\def\UTFviii at tmp{\xdef~{\noexpand\UTFviii at four@octets\string~}}
\UTFviii at loop
+% \end{macrocode}
+%
+% Bytes above F4 are not valid UTF-8 starting bytes as they would encode numbers beyond
+% the Unicode range
+% \begin{macrocode}
+ \count@"F4
+ \@tempcnta"100
+ \def\UTFviii at tmp{\xdef~{\noexpand\UTFviii at invalid\string~}}
+\UTFviii at loop
+% \end{macrocode}
+%
+% \begin{macrocode}
\endgroup
% \end{macrocode}
+%
% \end{macro}
%
% For this case we must disable the warning generated by
More information about the latex3-commits
mailing list