[latex3-commits] [git/LaTeX3-latex3-babel] master: Improved dad. Experimental code for Uyghur hyphenation. (a76c5dd)
Javier
email at dante.de
Fri Apr 2 11:32:28 CEST 2021
Repository : https://github.com/latex3/babel
On branch : master
Link : https://github.com/latex3/babel/commit/a76c5dd0402534ad12f9249c9e0352022c902604
>---------------------------------------------------------------
commit a76c5dd0402534ad12f9249c9e0352022c902604
Author: Javier <email at localhost>
Date: Fri Apr 2 11:32:28 2021 +0200
Improved dad. Experimental code for Uyghur hyphenation.
>---------------------------------------------------------------
a76c5dd0402534ad12f9249c9e0352022c902604
README.md | 14 +++++-----
babel.dtx | 16 +++++++++---
babel.ins | 2 +-
babel.pdf | Bin 824943 -> 825200 bytes
bbcompat.dtx | 2 +-
locale/ar/babel-ar.ini | 45 ++++++++++++++++++++------------
locale/ug/babel-ug.ini | 1 +
locale/ug/babel-uyghur.tex | 63 +++++++++++++++++++++++++++++++++++++++++++++
8 files changed, 115 insertions(+), 28 deletions(-)
diff --git a/README.md b/README.md
index 17ddcb4..ccf4073 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-## Babel 3.56.2326
+## Babel 3.56.2330
This package manages culturally-determined typographical (and other)
rules, and hyphenation patterns for a wide range of languages. Many
@@ -48,11 +48,13 @@ respective authors.
```
3.57 2021-04-15??
* Transforms:
- - Arabic: transliteration.dad
- - Croatian: digraphs.ligatures
- - Greek: diaeresis.hyphen
- - Hindi: transliteration.hk
- - Hungarian: digraphs.hyphen
+ - Arabic: transliteration.dad
+ - Croatian: digraphs.ligatures
+ - Greek: diaeresis.hyphen
+ - Hindi: transliteration.hk
+ - Hungarian: digraphs.hyphen
+ * {xxxx} syntax also in string=.
+ * Experimental code for Uyghur hyphenation (lua).
3.56 2021-03-24
* Transforms (\babelprehyphenation, \babelposthyphenation)
diff --git a/babel.dtx b/babel.dtx
index 9dfa4fd..f78477c 100644
--- a/babel.dtx
+++ b/babel.dtx
@@ -31,7 +31,7 @@
%
% \iffalse
%<*filedriver>
-\ProvidesFile{babel.dtx}[2021/03/29 v3.56.2326 The Babel package]
+\ProvidesFile{babel.dtx}[2021/04/02 v3.56.2330 The Babel package]
\documentclass{ltxdoc}
\GetFileInfo{babel.dtx}
\usepackage{fontspec}
@@ -4897,8 +4897,8 @@ help from Bernd Raichle, for which I am grateful.
% \section{Tools}
%
% \begin{macrocode}
-%<<version=3.56.2326>>
-%<<date=2021/03/29>>
+%<<version=3.56.2330>>
+%<<date=2021/04/02>>
% \end{macrocode}
%
% \textbf{Do not use the following macros in \texttt{ldf} files. They
@@ -14247,7 +14247,15 @@ end
&% The following functions belong to the next macro
function Babel.capture_func(key, cap)
local ret = "[[" .. cap:gsub('{([0-9])}', "]]..m[%1]..[[") .. "]]"
- ret = ret:gsub('{([0-9])|([^|]+)|(.-)}', Babel.capture_func_map)
+ local cnt
+ local u = unicode.utf8
+ ret, cnt = ret:gsub('{([0-9])|([^|]+)|(.-)}', Babel.capture_func_map)
+ if cnt == 0 then
+ ret = u.gsub(ret, '{(%x%x%x%x+)}',
+ function (n)
+ return u.char(tonumber(n, 16))
+ end)
+ end
ret = ret:gsub("%[%[%]%]%.%.", '')
ret = ret:gsub("%.%.%[%[%]%]", '')
return key .. [[=function(m) return ]] .. ret .. [[ end]]
diff --git a/babel.ins b/babel.ins
index 368a9a1..243b918 100644
--- a/babel.ins
+++ b/babel.ins
@@ -26,7 +26,7 @@
%% and covered by LPPL is defined by the unpacking scripts (with
%% extension .ins) which are part of the distribution.
%%
-\def\filedate{2021/03/29}
+\def\filedate{2021/04/02}
\def\batchfile{babel.ins}
\input docstrip.tex
diff --git a/babel.pdf b/babel.pdf
index fd57465..47b38f3 100644
Binary files a/babel.pdf and b/babel.pdf differ
diff --git a/bbcompat.dtx b/bbcompat.dtx
index e561b4c..292d05d 100644
--- a/bbcompat.dtx
+++ b/bbcompat.dtx
@@ -30,7 +30,7 @@
%
% \iffalse
%<*dtx>
-\ProvidesFile{bbcompat.dtx}[2021/03/29 v3.56.2326]
+\ProvidesFile{bbcompat.dtx}[2021/04/02 v3.56.2330]
%</dtx>
%
%% File 'bbcompat.dtx'
diff --git a/locale/ar/babel-ar.ini b/locale/ar/babel-ar.ini
index 899ddf8..6650377 100644
--- a/locale/ar/babel-ar.ini
+++ b/locale/ar/babel-ar.ini
@@ -177,21 +177,34 @@ abjad = ا ب ج د ه و ز ح ط ي ك ل م ن س ع ف ص ق ر ش ت ث
maghrebi.abjad = ا ب ج د ه و ز ح ط ي ك ل م ن ص ع ف ض ق ر س ت ث خ ذ ظ غ ش
[transforms.prehyphenation]
-; Yannis Haralambous' system in his 'dad'. Unfinished, but the
-; basic rules are here.
-transliteration.dad.1.0 = { ([aui])N }
-transliteration.dad.1.1 = { string = {1|aui|ًٌٍ} }
-transliteration.dad.1.2 = { remove }
-transliteration.dad.2.0 = { ([tAa]){*} }
-transliteration.dad.2.1 = { string = {1|tAa|ةٱٰ} }
-transliteration.dad.2.2 = { remove }
-transliteration.dad.3.0 = { ([tds])h }
-transliteration.dad.3.1 = { string = {1|tds|ثذش} }
-transliteration.dad.3.2 = { remove }
-transliteration.dad.4.0 = { '([AauiI]) }
-transliteration.dad.4.1 = { remove }
-transliteration.dad.4.2 = { string = {1|AauiI|آأؤإئ} }
-transliteration.dad.5.0 = { ([{007C}AbtjHxdrzsSDTZ`RfqklmnhUIYaiu+opCvg]) }
-transliteration.dad.5.1 = { string = {1|{007C}AbtjHxdrzsSDTZ`RfqklmnhUIYaiu+opCvg|ءابتجحخدرزسصضطظعغفقكلمنهوىيَُِّْپچڤگ} }
+; Yannis Haralambous' system in his 'dad'. Only two rules are missing.
+; 3-letter
+transliteration.dad.1.0 = { {+}a{*} }
+transliteration.dad.1.2 = { string = ٰ }
+transliteration.dad.1.2 = { string = ّ }
+transliteration.dad.1.3 = { remove }
+transliteration.dad.2.0 = { {+}([aui])N }
+transliteration.dad.2.2 = { string = {1|aui|ًٌٍ} }
+transliteration.dad.2.2 = { string = ّ }
+transliteration.dad.2.3 = { remove }
+; 2-letter
+transliteration.dad.3.0 = { ([aui])N }
+transliteration.dad.3.1 = { string = {1|aui|ًٌٍ} }
+transliteration.dad.3.2 = { remove }
+transliteration.dad.4.0 = { ([tAa]){*} }
+transliteration.dad.4.1 = { string = {1|tAa|ةٱٰ} }
+transliteration.dad.4.2 = { remove }
+transliteration.dad.5.0 = { ([tds])h }
+transliteration.dad.5.1 = { string = {1|tds|ثذش} }
+transliteration.dad.5.2 = { remove }
+transliteration.dad.6.0 = { {+}([aui]) }
+transliteration.dad.6.1 = { string = {1|aui|َُِ} }
+transliteration.dad.6.2 = { string = ّ }
+transliteration.dad.7.0 = { '([AauiIbnf]) }
+transliteration.dad.7.1 = { remove }
+transliteration.dad.7.2 = { string = {1|AauiIbnf|آأؤإئٮںڡ} }
+; 1-letter
+transliteration.dad.8.0 = { ([{007C}AbtjHxdrzsSDTZ`RfqklmnhUIYaui+opCvgJe]) }
+transliteration.dad.8.1 = { string = {1|{007C}AbtjHxdrzsSDTZ`RfqklmnhUIYaui+opCvgJe|ءابتجحخدرزسصضطظعغفقكلمنهوىيَُِّْپچڤگژۀ} }
diff --git a/locale/ug/babel-ug.ini b/locale/ug/babel-ug.ini
index 104cf00..53aca88 100644
--- a/locale/ug/babel-ug.ini
+++ b/locale/ug/babel-ug.ini
@@ -27,6 +27,7 @@ script.tag.opentype = arab
level = 1
encodings =
derivate = no
+require.babel = uyghur
[captions]
preface = كىرىش سۆز
diff --git a/locale/ug/babel-uyghur.tex b/locale/ug/babel-uyghur.tex
index 0963ec1..188d383 100644
--- a/locale/ug/babel-uyghur.tex
+++ b/locale/ug/babel-uyghur.tex
@@ -9,4 +9,67 @@
\fi
\BabelBeforeIni{ug}{%
}
+
+\directlua{
+
+Babel.ug_conson = {
+[0x0628] = true, [0x067E] = true, [0x062A] = true, [0x062C] = true,
+[0x0686] = true, [0x062E] = true, [0x062F] = true, [0x0631] = true,
+[0x0632] = true, [0x0698] = true, [0x0633] = true, [0x0634] = true,
+[0x0641] = true, [0x063A] = true, [0x0642] = true, [0x0643] = true,
+[0x06AF] = true, [0x06AD] = true, [0x0644] = true, [0x0645] = true,
+[0x0646] = true, [0x0647] = true, [0x064A] = true, [0x06CB] = true
+}
+
+function Babel.ug_hyphenate(head)
+ if not Babel.ug_toisol then return end
+ local d, pre, post
+ for item in node.traverse(head) do
+ if item.id == 29 and item.prev and item.prev.id == 29
+ and item.next and item.next.id == 29 then
+ pre = Babel.ug_toisol[item.char] or item.char
+ post = Babel.ug_toisol[item.next.char] or item.next.char
+ if Babel.ug_conson[pre] and not Babel.ug_conson[post] then
+ d = node.new(7, 3) % (disc, regular)
+ d.pre = Babel.str_to_nodes(
+ function() return '-' end,
+ nil, item)
+ d.penalty = 0 % Must be tex.(ex)hyphenpenalty
+ head, new = node.insert_before(head, item, d)
+ end
+ end
+ end
+ return head
+end
+}
+
+\gdef\UyghurSetupHyph{%
+ \directlua{
+ Babel.ug_toisol = {}
+ luatexbase.add_to_callback("pre_linebreak_filter",
+ Babel.ug_hyphenate, "Babel.ug_hyphenate")
+ luatexbase.add_to_callback("hpack_filter",
+ Babel.ug_hyphenate, "Babel.ug_hyphenate")
+ }%
+ % It must be done for each font, and stored separately.
+ % Locale must be taken into account too.
+ \bbl at foreach{%
+ 0628,067E,062A,062C,0686,062E,062F,0631,0632,%
+ 0698,0633,0634,0641,063A,0642,0643,06AF,06AD,%
+ 0644,0645,0646,0647,064A,06CB}{%
+ \setbox\z@\hbox{\char"##1=\char"##1^^^^200d=%
+ ^^^^200d\char"##1^^^^200d=^^^^200d\char"##1}%
+ \directlua{
+ local chars = {}
+ for item in node.traverse(tex.box[0].head) do
+ if item.id == node.id'glyph' and item.char > 128 and
+ not (item.char == 0x200D) then
+ table.insert(chars, item.char)
+ end
+ end
+ Babel.ug_toisol[chars[2]] = chars[1]
+ Babel.ug_toisol[chars[3]] = chars[1]
+ Babel.ug_toisol[chars[4]] = chars[1]
+ }}}
+
\endinput
\ No newline at end of file
More information about the latex3-commits
mailing list.