[latex3-commits] [latex3/babel] main: New: \ShowLocaleProperties. Case mapping in ini files (WIP). (53ad9c0)

github at latex-project.org github at latex-project.org
Thu Nov 30 17:54:39 CET 2023


Repository : https://github.com/latex3/babel
On branch  : main
Link       : https://github.com/latex3/babel/commit/53ad9c02ab4ed71dea0774ce02da5f69800047ba

>---------------------------------------------------------------

commit 53ad9c02ab4ed71dea0774ce02da5f69800047ba
Author: Javier <email at localhost>
Date:   Thu Nov 30 17:54:39 2023 +0100

    New: \ShowLocaleProperties. Case mapping in ini files (WIP).
    
    Removal of the \SetCase stuff.


>---------------------------------------------------------------

53ad9c02ab4ed71dea0774ce02da5f69800047ba
 README.md                         |   8 +-
 babel-code.pdf                    | Bin 641895 -> 640141 bytes
 babel.dtx                         | 163 +++++++++++++++++++-------------------
 babel.ins                         |   2 +-
 babel.pdf                         | Bin 417319 -> 417881 bytes
 bbcompat.dtx                      |   2 +-
 locale/la/babel-la-x-medieval.ini |   1 +
 7 files changed, 92 insertions(+), 84 deletions(-)

diff --git a/README.md b/README.md
index 2957d78..60f058d 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-## Babel 3.97
+## Babel 3.97.33470
 
-2023-11-11
+2023-11-30
 
 This package manages culturally-determined typographical (and other)
 rules, and hyphenation patterns for a wide range of languages. Many
@@ -48,6 +48,10 @@ respective authors.
 
 ### Summary of latest changes
 ```
+3.98 2023-11-30
+     * New: \ShowLocaleProperties.
+     * Case mappings in ini files.
+     
 3.97 2023-11-11
      * Support for 'interchar' (xetex).
      * New locale for Buriat, thanks to J. Khaganov.
diff --git a/babel-code.pdf b/babel-code.pdf
index 130befc..af8b8d6 100644
Binary files a/babel-code.pdf and b/babel-code.pdf differ
diff --git a/babel.dtx b/babel.dtx
index 4345b09..56ba459 100644
--- a/babel.dtx
+++ b/babel.dtx
@@ -32,7 +32,7 @@
 %
 % \iffalse
 %<*filedriver>
-\ProvidesFile{babel.dtx}[2023/11/11 v3.97 The Babel package]
+\ProvidesFile{babel.dtx}[2023/11/30 v3.97.33470 The Babel package]
 \documentclass{ltxdoc}
 \GetFileInfo{babel.dtx}
 \usepackage{fontspec}
@@ -1562,6 +1562,7 @@ captions):
 \tag{bs-Cyrl} Bosnian
 \tag{bs-Latn} Bosnian\hascapu\hascapl
 \tag{bs} Bosnian\hascapu\hascapl
+\tag{bua} Buriat\hascapu\hascapl
 \tag{byn} Blin
 \tag{ca} Catalan\hascapu\hascapl
 \tag{cch} Atsam
@@ -1936,6 +1937,7 @@ breton\\
 british\\
 britishenglish\\
 bulgarian\\
+buriat\\
 burmese\\
 canadian\\
 canadianenglish\\
@@ -3498,6 +3500,22 @@ Unicode engines, spacing is based on the ``current'' em unit (the size
 of the previous char in \luatex, and the font size set by the last
 |\selectfont| in \xetex).
 
+\begin{note}
+  With Unicode engines, a line break can happen just before an explicit
+  combining char (eg, \textit{\~{g}}, used in Guarani and Filipino, is
+  not included as a combined char and it’s represented in Unicode as
+  |U+0067|~|U+0303|. This issue is not directly related to \babel, but
+  to the hyphenation patterns and/or the font renderer. However, at
+  least with \luatex{} there is a workaround (change the language name
+  to what you are using):
+\begin{verbatim}
+\babelposthyphenation{guarani}{ | [{0300}-{036F}] }{ remove, {} }
+\end{verbatim}
+The Lua pattern means ‘a discretionary followed by a character in the
+range |U+0300|--|U+0367| (which contains combining chars)’. An
+alternative to a transform is |\babelpatterns|.
+\end{note}
+
 \subsection{Transforms}
 \label{transforms}
 
@@ -5440,12 +5458,17 @@ to define |\abmoniname|, |\abmoniiname|, etc. (and similarly with
 \end{verbatim}
 |#1| is replaced by the roman numeral.
 
-\Describe\SetCase{\oarg{map-list}\marg{toupper-code}\marg{tolower-code}\qquad\textit{Deprecated}}
+% \Describe\SetCase{\oarg{map-list}\marg{toupper-code}%
+%   \marg{tolower-code}\qquad\textit{Deprecated}}
+% 
+% \begin{warning}
+% This feature doesn’t work any longer after some changes in the \LaTeX{}
+% kernel. It’s now deprecated and an alternative is on the way.
+% \end{warning}
 
-\begin{warning}
-This feature doesn’t work any longer after some changes in the \LaTeX{}
-kernel. It’s now deprecated and an alternative is on the way.
-\end{warning}
+\Describe{\SetCaseMapping}{}
+
+\textit{Work in progress.}
 
 \Describe{\SetHyphenMap}{\marg{to-lower-macros}}
 \New{3.9g} Case mapping for hyphenation is handled with |\SetHyphenMap|
@@ -5630,8 +5653,8 @@ wouldn’t exist.
 % \section{Tools}
 %
 %    \begin{macrocode}
-%<<version=3.97>>
-%<<date=2023/11/11>>
+%<<version=3.97.33470>>
+%<<date=2023/11/30>>
 %    \end{macrocode}
 %
 % \textbf{Do not use the following macros in \texttt{ldf} files. They
@@ -9181,37 +9204,8 @@ wouldn’t exist.
 \def\bbl at toglobal#1{\global\let#1#1}
 %    \end{macrocode}
 %
-% The second one. We need to patch |\@uclclist|, but it is done once
-% and only if |\SetCase| is used or if strings are encoded.  The code
-% is far from satisfactory for several reasons, including the fact
-% |\@uclclist| is not a list any more. Therefore a package option is
-% added to ignore it. Instead of gobbling the macro
-% getting the next two elements (usually |\reserved at a|), we pass it as
-% argument to |\bbl at uclc|. The parser is restarted inside
-% |\|\m{lang}|@bbl at uclc| because we do not know how many expansions
-% are necessary (depends on whether strings are encoded). The last
-% part is tricky -- when uppercasing, we have:
-%\begin{verbatim}
-% \let\bbl at tolower\@empty\bbl at toupper\@empty
-%\end{verbatim}
-% and starts over (and similarly when lowercasing).
-%
-%    \begin{macrocode}
-\@ifpackagewith{babel}{nocase}%
-  {\let\bbl at patchuclc\relax}%
-  {\def\bbl at patchuclc{% TODO. Delete. Doesn’t work any more.
-    \global\let\bbl at patchuclc\relax
-    \g at addto@macro\@uclclist{\reserved at b{\reserved at b\bbl at uclc}}%
-    \gdef\bbl at uclc##1{%
-      \let\bbl at encoded\bbl at encoded@uclc
-      \bbl at ifunset{\languagename @bbl at uclc}% and resumes it
-        {##1}%
-        {\let\bbl at tempa##1\relax % Used by LANG at bbl@uclc
-         \csname\languagename @bbl at uclc\endcsname}%
-      {\bbl at tolower\@empty}{\bbl at toupper\@empty}}%
-    \gdef\bbl at tolower{\csname\languagename @bbl at lc\endcsname}%
-    \gdef\bbl at toupper{\csname\languagename @bbl at uc\endcsname}}}
-%    \end{macrocode}
+% The following option is currently no-op. It was meant for the
+% deprecated |\SetCase|.
 %
 %    \begin{macrocode}
 %<<*More package options>>
@@ -9426,31 +9420,12 @@ wouldn’t exist.
       \csname\bbl at LC\expandafter\endcsname\expandafter{\BabelString}}}
 %    \end{macrocode}
 %
-%     Now, some addtional stuff to be used when encoded strings are
-%     used. Captions then include |\bbl at encoded| for string to be
-%     expanded in case transformations. It is |\relax| by default, but
-%     in |\MakeUppercase| and |\MakeLowercase| its value is a modified
-%     expandable |\@changed at cmd|.
+% A little auxiliary command sets the string. TODO: Formerly used with
+% casing. Very likely no longer necessary, although its used in
+% |\setlocalecaption|.
 %
 %    \begin{macrocode}
-\ifx\bbl at opt@strings\relax
-  \def\bbl at scset#1#2{\def#1{\bbl at encoded#2}}
-  \bbl at patchuclc
-  \let\bbl at encoded\relax
-  \def\bbl at encoded@uclc#1{%
-    \@inmathwarn#1%
-    \expandafter\ifx\csname\cf at encoding\string#1\endcsname\relax
-      \expandafter\ifx\csname ?\string#1\endcsname\relax
-        \TextSymbolUnavailable#1%
-      \else
-        \csname ?\string#1\endcsname
-      \fi
-    \else
-      \csname\cf at encoding\string#1\endcsname
-    \fi}
-\else
-  \def\bbl at scset#1#2{\def#1{#2}}
-\fi
+\def\bbl at scset#1#2{\def#1{#2}}
 %    \end{macrocode}
 %
 % Define |\SetStringLoop|, which is actually set inside
@@ -9484,18 +9459,11 @@ wouldn’t exist.
 %
 % \paragraph{Case mapping}
 %
-% The command |\SetCase| provides a way to change the behavior of
-% |\MakeUppercase| and |\MakeLowercase|. |\bbl at tempa| is set by the
-% patched |\@uclclist| to the parsing command. \textit{Deprecated.}
+% The command |\SetCase| is deprecated, with a dummy definition.
 %
 %    \begin{macrocode}
 %<<*Macros local to BabelCommands>>
-  \newcommand\SetCase[3][]{%
-    \bbl at patchuclc
-    \bbl at forlang\bbl at tempa{%
-      \bbl at carg\bbl at encstring{\bbl at tempa @bbl at uclc}{\bbl at tempa##1}%
-      \bbl at carg\bbl at encstring{\bbl at tempa @bbl at uc}{##2}%
-      \bbl at carg\bbl at encstring{\bbl at tempa @bbl at lc}{##3}}}%
+  \newcommand\SetCase[3][]{}%
 %<</Macros local to BabelCommands>>
 %    \end{macrocode}
 %
@@ -11037,6 +11005,7 @@ wouldn’t exist.
 \let\bbl at inikv@date\bbl at inikv
 \let\bbl at inikv@typography\bbl at inikv
 \let\bbl at inikv@characters\bbl at inikv
+\bbl at csarg\let{bbl at inikv@characters.casing}\bbl at inikv
 \let\bbl at inikv@numbers\bbl at inikv
 %    \end{macrocode}
 %
@@ -11637,6 +11606,12 @@ wouldn’t exist.
 % singletons may change.
 %
 %    \begin{macrocode}
+\ifcase\bbl at engine % Converts utf8 to its code (expandable)
+  \def\bbl at utftocode#1{\the\numexpr\decode at UTFviii#1\relax}
+\else
+  \def\bbl at utftocode#1{\expandafter`\string#1}
+\fi
+% Still somewhat hackish. WIP.
 \providecommand\BCPdata{}
 \ifx\renewcommand\@undefined\else % For plain. TODO. It’s a quick fix
   \renewcommand\BCPdata[1]{\bbl at bcpdata@i#1\@empty}
@@ -11652,18 +11627,40 @@ wouldn’t exist.
       {\bbl at ifunset{bbl@\csname bbl at info@#1.tag.bcp47\endcsname @#2}{}%
         {\bbl at cs{\csname bbl at info@#1.tag.bcp47\endcsname @#2}}}}
 \fi
-% Still somewhat hackish. WIP.
 \@namedef{bbl at info@casing.tag.bcp47}{casing}
 \newcommand\BabelUppercaseMapping[3]{%
-  \let\bbl at tempx\languagename
-  \edef\languagename{#1}%
-  \DeclareUppercaseMapping[\BCPdata{casing}]{#2}{#3}%
-  \let\languagename\bbl at tempx}
+  \DeclareUppercaseMapping[\@nameuse{bbl at casing@#1}]{#2}{#3}}
+\newcommand\BabelTitlecaseMapping[3]{%
+  \DeclareTitlecaseMapping[\@nameuse{bbl at casing@#1}]{#2}{#3}}
 \newcommand\BabelLowercaseMapping[3]{%
-  \let\bbl at tempx\languagename
-  \edef\languagename{#1}%
-  \DeclareLowercaseMapping[\BCPdata{casing}]{#2}{#3}%
-  \let\languagename\bbl at tempx}
+  \DeclareLowercaseMapping[\@nameuse{bbl at casing@#1}]{#2}{#3}}
+% WIP. Tentative and incomplete. To be used by 'ini' files (with a new
+% key).
+\def\SetCaseMapping#1#2{%
+  \def\bbl at tempa##1 ##2{%
+    \bbl at casemapping{##1}%
+    \ifx\@empty##2\else\bbl at afterfi\bbl at tempa##2\fi}%
+  \edef\bbl at tempe{#1}% Language
+  \def\bbl at tempc{#2 }% Casing list
+  \expandafter\bbl at tempa\bbl at tempc\@empty}
+\def\bbl at casemapping#1{%
+  \def\bbl at tempb{#1}%
+  \ifcase\bbl at engine % Handle utf8 chars in pdftex, by surrounding them with {}
+    \@nameuse{regex_replace_all:nnN}%
+      {[\x{c0}-\x{ff}][\x{80}-\x{bf}]*}{{\0}}\bbl at tempb
+  \else
+    \@nameuse{regex_replace_all:nnN}{.}{{\0}}\bbl at tempb
+  \fi
+  \expandafter\bbl at casemapping@i\bbl at tempb\@@}
+\def\bbl at casemapping@i#1#2#3\@@{%
+  \ifx\relax#3\relax
+    \BabelUppercaseMapping{\bbl at tempe}{\bbl at utftocode{#1}}{#2}%
+    \BabelLowercaseMapping{\bbl at tempe}{\bbl at utftocode{#2}}{#1}%
+  \else
+    \BabelTitlecaseMapping{\bbl at tempe}{\bbl at utftocode{#1}}{#2}%
+    \BabelUppercaseMapping{\bbl at tempe}{\bbl at utftocode{#1}}{#3}%
+    \BabelLowercaseMapping{\bbl at tempe}{\bbl at utftocode{#3}}{#1}%
+  \fi}
 %    \end{macrocode}
 %
 % With version 3.75 |\BabelEnsureInfo| is executed always, but there is
@@ -11715,6 +11712,12 @@ wouldn’t exist.
   \fi}
 \let\bbl at ini@loaded\@empty
 \newcommand\LocaleForEach{\bbl at foreach\bbl at ini@loaded}
+\def\ShowLocaleProperties#1{%
+  \typeout{}%
+  \typeout{*** Properties for language '#1' ***}
+  \def\bbl at elt##1##2##3{\typeout{##1/##2 = ##3}}%
+  \@nameuse{bbl at inidata@#1}%
+  \typeout{*******}}
 %    \end{macrocode}
 %
 % \section{Adjusting the Babel bahavior}
diff --git a/babel.ins b/babel.ins
index b47775f..8167ecb 100644
--- a/babel.ins
+++ b/babel.ins
@@ -26,7 +26,7 @@
 %% and covered by LPPL is defined by the unpacking scripts (with
 %% extension .ins) which are part of the distribution.
 %%
-\def\filedate{2023/11/11}
+\def\filedate{2023/11/30}
 \def\batchfile{babel.ins}
 \input docstrip.tex
 
diff --git a/babel.pdf b/babel.pdf
index da4b04f..0da3b71 100644
Binary files a/babel.pdf and b/babel.pdf differ
diff --git a/bbcompat.dtx b/bbcompat.dtx
index 61e273e..c72cf0d 100644
--- a/bbcompat.dtx
+++ b/bbcompat.dtx
@@ -30,7 +30,7 @@
 %
 % \iffalse
 %<*dtx>
-\ProvidesFile{bbcompat.dtx}[2023/11/11 v3.97]
+\ProvidesFile{bbcompat.dtx}[2023/11/30 v3.97.33470]
 %</dtx>
 %
 %% File 'bbcompat.dtx'
diff --git a/locale/la/babel-la-x-medieval.ini b/locale/la/babel-la-x-medieval.ini
index ee14662..52cf18a 100644
--- a/locale/la/babel-la-x-medieval.ini
+++ b/locale/la/babel-la-x-medieval.ini
@@ -147,6 +147,7 @@ hyphenationmin =
 
 [characters]
 delimiters.quotes = 
+casing = uV
 
 [counters]
 





More information about the latex3-commits mailing list.