texlive[67126] Master/texmf-dist: sanitize-umlaut (15may23)

commits+karl at tug.org commits+karl at tug.org
Mon May 15 23:14:37 CEST 2023


Revision: 67126
          http://tug.org/svn/texlive?view=revision&revision=67126
Author:   karl
Date:     2023-05-15 23:14:37 +0200 (Mon, 15 May 2023)
Log Message:
-----------
sanitize-umlaut (15may23)

Modified Paths:
--------------
    trunk/Master/texmf-dist/doc/latex/sanitize-umlaut/CHANGES.md
    trunk/Master/texmf-dist/doc/latex/sanitize-umlaut/README.md
    trunk/Master/texmf-dist/doc/latex/sanitize-umlaut/sanitize-umlaut.pdf
    trunk/Master/texmf-dist/doc/latex/sanitize-umlaut/sanitize-umlaut.tex
    trunk/Master/texmf-dist/tex/latex/sanitize-umlaut/sanitize-umlaut.sty

Modified: trunk/Master/texmf-dist/doc/latex/sanitize-umlaut/CHANGES.md
===================================================================
--- trunk/Master/texmf-dist/doc/latex/sanitize-umlaut/CHANGES.md	2023-05-15 21:14:24 UTC (rev 67125)
+++ trunk/Master/texmf-dist/doc/latex/sanitize-umlaut/CHANGES.md	2023-05-15 21:14:37 UTC (rev 67126)
@@ -17,6 +17,16 @@
 
 
 
+## [1.3.0] - 2023-05-15
+
+### Added
+- Support for lualatex and xelatex
+
+### Changed
+- The style file `sanitize-umlaut.sty` is UTF-8 encoded now and contains umlauts
+
+
+
 ## [1.2.1] - 2022-06-29
 
 ### Fixed

Modified: trunk/Master/texmf-dist/doc/latex/sanitize-umlaut/README.md
===================================================================
--- trunk/Master/texmf-dist/doc/latex/sanitize-umlaut/README.md	2023-05-15 21:14:24 UTC (rev 67125)
+++ trunk/Master/texmf-dist/doc/latex/sanitize-umlaut/README.md	2023-05-15 21:14:37 UTC (rev 67126)
@@ -1,7 +1,7 @@
-# The LaTeX package sanitize-umlaut - version 1.2.1 (2022/06/29)
+# The LaTeX package sanitize-umlaut - version 1.3.0 (2023/05/15)
 
 
-> Copyright (c) 2016-2022 by Prof. Dr. Dr. Thomas F. Sturm <thomas dot sturm at unibw dot de>
+> Copyright (c) 2016-2023 by Prof. Dr. Dr. Thomas F. Sturm <thomas dot sturm at unibw dot de>
 
 > This work may be distributed and/or modified under the
 > conditions of the LaTeX Project Public License, either version 1.3

Modified: trunk/Master/texmf-dist/doc/latex/sanitize-umlaut/sanitize-umlaut.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/doc/latex/sanitize-umlaut/sanitize-umlaut.tex
===================================================================
--- trunk/Master/texmf-dist/doc/latex/sanitize-umlaut/sanitize-umlaut.tex	2023-05-15 21:14:24 UTC (rev 67125)
+++ trunk/Master/texmf-dist/doc/latex/sanitize-umlaut/sanitize-umlaut.tex	2023-05-15 21:14:37 UTC (rev 67126)
@@ -2,11 +2,11 @@
 % !TeX encoding=UTF-8
 % !TeX spellcheck=en_US
 %%
-%% The LaTeX package sanitize-umlaut - version 1.2.1 (2022/06/29)
+%% The LaTeX package sanitize-umlaut - version 1.3.0 (2023/05/15)
 %% sanitize-umlaut.tex: Manual
 %%
 %% -------------------------------------------------------------------------------------------
-%% Copyright (c) 2016-2022 by Prof. Dr. Dr. Thomas F. Sturm <thomas dot sturm at unibw dot de>
+%% Copyright (c) 2016-2023 by Prof. Dr. Dr. Thomas F. Sturm <thomas dot sturm at unibw dot de>
 %% -------------------------------------------------------------------------------------------
 %%
 %% This work may be distributed and/or modified under the
@@ -27,8 +27,8 @@
 \usepackage{%
   sanitize-umlaut}
 
-\def\version{1.2.1}%
-\def\datum{2022/06/29}%
+\def\version{1.3.0}%
+\def\datum{2023/05/15}%
 
 \hypersetup{
   pdftitle={Manual for the sanitize-umlaut package},
@@ -50,8 +50,8 @@
 \end{tcolorbox}
 {\large Thomas F.~Sturm%
   \footnote{Prof.~Dr.~Dr.~Thomas F.~Sturm, Institut f\"{u}r Mathematik und Informatik,
-    Universit\"{a}t der Bundeswehr M\"{u}nchen, D-85577 Neubiberg, Germany;
-     email: \href{mailto:thomas.sturm at unibw.de}{thomas.sturm at unibw.de}}\par\medskip
+    University of the Bundeswehr Munich, D-85577 Neubiberg, Germany;
+    email: \href{mailto:thomas.sturm at unibw.de}{thomas.sturm at unibw.de}}\par\medskip
 \normalsize\url{https://www.ctan.org/pkg/sanitize-umlaut}\par
 \url{https://github.com/T-F-S/sanitize-umlaut}
 }
@@ -63,7 +63,7 @@
   MakeIndex and friends with |pdflatex|. This means, that inside |\index| an umlaut can
   be used as \texttt{"U} or \texttt{Ü}. In both cases, the letter is written
   as \texttt{"U} into the raw index file for correct processing with MakeIndex
-  and |pdflatex|.
+  and |pdflatex|. |lualatex| and |xelatex| are also supported with a different approach.
 \end{absquote}
 
 \begin{tcolorbox}[breakable,enhanced jigsaw,title={Contents},fonttitle=\bfseries\Large,
@@ -97,7 +97,8 @@
 \begin{itemize}
 \item for documents in German language using the babel package with a setting
   identical or similar to \myverb{\usepackage[ngerman]{babel}}.
-\item for documents which are processed by |latex| or |pdflatex| (not |lualatex| or |xelatex|).
+\item for documents which are processed by |latex| or |pdflatex| (also for |lualatex| or |xelatex|,
+    but with more compilation overhead).
 \item for documents with an index which is processed using the
   MakeIndex program.
 \item for authors who like to use \myverb{\index{Übermaß}} instead of
@@ -145,6 +146,10 @@
 pre 2022 June behaviour. Obviously, you loose |\protected| here, if you
 load |sanitize-umlaut|.
 
+|sanitize-umlaut| version 1.3.0 (or newer) also supports |lualatex| and |xelatex|
+with a different approach. Here, |\index| is patched such that its argument
+is processed to replace umlauts.
+
 \subsection{Future}
 As always, the future is dark and difficult to see. Further changes of
 |inputenc| implementation may force further changes of |sanitize-umlaut|.
@@ -165,6 +170,7 @@
 \end{dispListing}
 
 For |utf8| (UTF-8), modern \LaTeX\ does not need this package inclusion any more!
+Also, for |lualatex| and |xelatex| this has to be omitted.
 
 Just some few encodings are supported by |sanitize-umlaut|.
 These are the most important for German language texts:
@@ -211,6 +217,7 @@
 \end{tabular}}
 
 
+\clearpage
 \subsection{Technical Information}
 
 The package uses \myverb{\inputencodingname} (set by \LaTeX\ and the |inputencoding| package)
@@ -222,7 +229,15 @@
 If another package (besides |babel|) also changes this macro or uses it
 outside \myverb{\index}, strange things may happen.
 
+If \myverb{\inputencodingname} is \emph{not} present, the package checks, if
+the current engine is |luatex| or |xetex| and patches the \myverb{\index} macro
+itself. All umlauts inside the argument of \myverb{\index} are replaced by their
+|babel| shorthand codes using \LaTeX3 token replacement.
+This increases compilation time considerably compared to the \myverb{\@sanitize} hack
+for |pdflatex|. A very rough figure is approximately a plus of 0.8 seconds per
+10000 \myverb{\index} calls (will differ on other maschines and other example codes).
 
+
 \clearpage
 \section{Application Examples}%
 
@@ -357,4 +372,32 @@
 \end{fullexample}
 
 
+\begin{fullexample}{run arara}
+% !TeX encoding=UTF-8
+% arara: lualatex
+\documentclass[a4paper,12pt]{article}
+\usepackage{fontspec}
+\usepackage[ngerman]{babel}
+\usepackage[makeindex]{imakeidx}
+\indexsetup{level=\section*,noclearpage}
+\makeindex[name=personen,title=Personenregister,options=-s german.ist -g]
+\makeindex[name=allgemein,title=Allgemeines Register,options=-s german.ist -g]
+\usepackage{sanitize-umlaut}
+\begin{document}
+\section{Example with multiple indexes for lualatex}
+Test äöüÄÖÜß.
+\index[personen]{Huber, Hans}    \index[personen]{Hübner, Jörg}
+\index[allgemein]{Aber}          \index[allgemein]{Arg}
+\index[allgemein]{Ärger}         \index[allgemein]{Ofen}
+\index[allgemein]{Ö - wie schön} \index[allgemein]{oberhalb}
+\index[allgemein]{Ufer}          \index[allgemein]{Übermaß}
+\index[allgemein]{Latex=\LaTeX}  \index[allgemein]{Ärger>Index}
+Test äöüÄÖÜß.
+\clearpage
+\printindex[allgemein]
+\printindex[personen]
 \end{document}
+\end{fullexample}
+
+
+\end{document}

Modified: trunk/Master/texmf-dist/tex/latex/sanitize-umlaut/sanitize-umlaut.sty
===================================================================
--- trunk/Master/texmf-dist/tex/latex/sanitize-umlaut/sanitize-umlaut.sty	2023-05-15 21:14:24 UTC (rev 67125)
+++ trunk/Master/texmf-dist/tex/latex/sanitize-umlaut/sanitize-umlaut.sty	2023-05-15 21:14:37 UTC (rev 67126)
@@ -1,8 +1,9 @@
-%% The LaTeX package sanitize-umlaut - version 1.2.1 (2022/06/29)
+% !TeX encoding=UTF-8
+%% The LaTeX package sanitize-umlaut - version 1.3.0 (2023/05/15)
 %% sanitize-umlaut.sty: Sanitize umlauts for makeindex
 %%
 %% -------------------------------------------------------------------------------------------
-%% Copyright (c) 2016-2022 by Prof. Dr. Dr. Thomas F. Sturm <thomas dot sturm at unibw dot de>
+%% Copyright (c) 2016-2023 by Prof. Dr. Dr. Thomas F. Sturm <thomas dot sturm at unibw dot de>
 %% -------------------------------------------------------------------------------------------
 %%
 %% This work may be distributed and/or modified under the
@@ -18,7 +19,7 @@
 %% This work consists of all files listed in README
 %%
 \NeedsTeXFormat{LaTeX2e}
-\ProvidesPackage{sanitize-umlaut}[2022/06/29 version 1.2.1 sanitize umlauts]
+\ProvidesPackage{sanitize-umlaut}[2023/05/15 version 1.3.0 sanitize umlauts]
 
 \def\sanitize at unicode@char#1#2{%
   \@namedef{u8:\detokenize{#1}}{#2}%
@@ -115,9 +116,53 @@
     \def\sanitize at umlaut{}%
     \PackageWarning{sanitize-umlaut}{encoding \inputencodingname\space is not supported}
   \fi\fi\fi\fi\fi\fi
+  \AtBeginDocument{\edef\@sanitize{\unexpanded\expandafter{\@sanitize}\unexpanded\expandafter{\sanitize at umlaut}}}
 \else
-  \let\sanitize at umlaut\sanitize at umlaut@utfviii
-  \PackageWarning{sanitize-umlaut}{encoding unknown. utf8 is selected as fallback (possibly nonsense)}
+  \ExplSyntaxOn
+
+  \bool_lazy_or:nnTF { \sys_if_engine_luatex_p: }{ \sys_if_engine_xetex_p: }
+    {
+      \cs_new_protected_nopar:Npn \__sanuml_index:n #1
+        {
+          \__sanuml_index_original:w { #1 }
+        }
+
+      \cs_new_protected_nopar:Npn \__sanuml_index:nn #1#2
+        {
+          \__sanuml_index_original:w [ #1 ] { #2 }
+        }
+
+      \PackageInfo{sanitize-umlaut}{use~LaTeX3~token~replacement~for~\c_sys_engine_str}
+      \AddToHook{begindocument}
+      {
+        \cs_set_eq:NN \__sanuml_index_original:w \index
+
+        \RenewDocumentCommand \index { o m }
+          {
+            \tl_set:Nn \l_tmpa_tl { #2 }
+            \tl_replace_all:Nnn \l_tmpa_tl { ä }{ "a }
+            \tl_replace_all:Nnn \l_tmpa_tl { ö }{ "o }
+            \tl_replace_all:Nnn \l_tmpa_tl { ü }{ "u }
+            \tl_replace_all:Nnn \l_tmpa_tl { Ä }{ "A }
+            \tl_replace_all:Nnn \l_tmpa_tl { Ö }{ "O }
+            \tl_replace_all:Nnn \l_tmpa_tl { Ü }{ "U }
+            \tl_replace_all:Nnn \l_tmpa_tl { ß }{ "s }
+            \IfNoValueTF {#1}
+              {
+                \exp_args:NV \__sanuml_index:n \l_tmpa_tl
+              }
+              {
+                \exp_args:NnV \__sanuml_index:nn {#1} \l_tmpa_tl
+              }
+          }
+      }
+    }
+    {
+      \PackageWarning{sanitize-umlaut}{no~input~encoding~detected.~utf8~is~selected~as~fallback~(possibly~nonsense)}
+      \let\sanitize at umlaut\sanitize at umlaut@utfviii
+      \AtBeginDocument{\edef\@sanitize{\unexpanded\expandafter{\@sanitize}\unexpanded\expandafter{\sanitize at umlaut}}}
+    }
+
+  \ExplSyntaxOff
 \fi
 
-\AtBeginDocument{\edef\@sanitize{\unexpanded\expandafter{\@sanitize}\unexpanded\expandafter{\sanitize at umlaut}}}



More information about the tex-live-commits mailing list.