[latex3-commits] [git/LaTeX3-latex3-babel] master: Improved dad. Experimental code for Uyghur hyphenation. (a76c5dd)

Javier email at dante.de
Fri Apr 2 11:32:28 CEST 2021


Repository : https://github.com/latex3/babel
On branch  : master
Link       : https://github.com/latex3/babel/commit/a76c5dd0402534ad12f9249c9e0352022c902604

>---------------------------------------------------------------

commit a76c5dd0402534ad12f9249c9e0352022c902604
Author: Javier <email at localhost>
Date:   Fri Apr 2 11:32:28 2021 +0200

    Improved dad. Experimental code for Uyghur hyphenation.


>---------------------------------------------------------------

a76c5dd0402534ad12f9249c9e0352022c902604
 README.md                  |  14 +++++-----
 babel.dtx                  |  16 +++++++++---
 babel.ins                  |   2 +-
 babel.pdf                  | Bin 824943 -> 825200 bytes
 bbcompat.dtx               |   2 +-
 locale/ar/babel-ar.ini     |  45 ++++++++++++++++++++------------
 locale/ug/babel-ug.ini     |   1 +
 locale/ug/babel-uyghur.tex |  63 +++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 115 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index 17ddcb4..ccf4073 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-## Babel 3.56.2326
+## Babel 3.56.2330
 
 This package manages culturally-determined typographical (and other)
 rules, and hyphenation patterns for a wide range of languages. Many
@@ -48,11 +48,13 @@ respective authors.
 ```
 3.57   2021-04-15??
        * Transforms:
-         - Arabic:    transliteration.dad
-         - Croatian:  digraphs.ligatures
-         - Greek:     diaeresis.hyphen
-         - Hindi:     transliteration.hk
-         - Hungarian: digraphs.hyphen
+         - Arabic:     transliteration.dad
+         - Croatian:   digraphs.ligatures
+         - Greek:      diaeresis.hyphen
+         - Hindi:      transliteration.hk
+         - Hungarian:  digraphs.hyphen
+       * {xxxx} syntax also in string=.
+       * Experimental code for Uyghur hyphenation (lua).
          
 3.56   2021-03-24
        * Transforms (\babelprehyphenation, \babelposthyphenation)
diff --git a/babel.dtx b/babel.dtx
index 9dfa4fd..f78477c 100644
--- a/babel.dtx
+++ b/babel.dtx
@@ -31,7 +31,7 @@
 %
 % \iffalse
 %<*filedriver>
-\ProvidesFile{babel.dtx}[2021/03/29 v3.56.2326 The Babel package]
+\ProvidesFile{babel.dtx}[2021/04/02 v3.56.2330 The Babel package]
 \documentclass{ltxdoc}
 \GetFileInfo{babel.dtx}
 \usepackage{fontspec}
@@ -4897,8 +4897,8 @@ help from Bernd Raichle, for which I am grateful.
 % \section{Tools}
 %
 %    \begin{macrocode}
-%<<version=3.56.2326>>
-%<<date=2021/03/29>>
+%<<version=3.56.2330>>
+%<<date=2021/04/02>>
 %    \end{macrocode}
 %
 % \textbf{Do not use the following macros in \texttt{ldf} files. They
@@ -14247,7 +14247,15 @@ end
   &% The following functions belong to the next macro
   function Babel.capture_func(key, cap)
     local ret = "[[" .. cap:gsub('{([0-9])}', "]]..m[%1]..[[") .. "]]"
-    ret = ret:gsub('{([0-9])|([^|]+)|(.-)}', Babel.capture_func_map)
+    local cnt
+    local u = unicode.utf8
+    ret, cnt = ret:gsub('{([0-9])|([^|]+)|(.-)}', Babel.capture_func_map)
+    if cnt == 0 then
+      ret = u.gsub(ret, '{(%x%x%x%x+)}', 
+            function (n)
+              return u.char(tonumber(n, 16))
+            end)
+    end
     ret = ret:gsub("%[%[%]%]%.%.", '')
     ret = ret:gsub("%.%.%[%[%]%]", '')
     return key .. [[=function(m) return ]] .. ret .. [[ end]]
diff --git a/babel.ins b/babel.ins
index 368a9a1..243b918 100644
--- a/babel.ins
+++ b/babel.ins
@@ -26,7 +26,7 @@
 %% and covered by LPPL is defined by the unpacking scripts (with
 %% extension .ins) which are part of the distribution.
 %%
-\def\filedate{2021/03/29}
+\def\filedate{2021/04/02}
 \def\batchfile{babel.ins}
 \input docstrip.tex
 
diff --git a/babel.pdf b/babel.pdf
index fd57465..47b38f3 100644
Binary files a/babel.pdf and b/babel.pdf differ
diff --git a/bbcompat.dtx b/bbcompat.dtx
index e561b4c..292d05d 100644
--- a/bbcompat.dtx
+++ b/bbcompat.dtx
@@ -30,7 +30,7 @@
 %
 % \iffalse
 %<*dtx>
-\ProvidesFile{bbcompat.dtx}[2021/03/29 v3.56.2326]
+\ProvidesFile{bbcompat.dtx}[2021/04/02 v3.56.2330]
 %</dtx>
 %
 %% File 'bbcompat.dtx'
diff --git a/locale/ar/babel-ar.ini b/locale/ar/babel-ar.ini
index 899ddf8..6650377 100644
--- a/locale/ar/babel-ar.ini
+++ b/locale/ar/babel-ar.ini
@@ -177,21 +177,34 @@ abjad = ا ب ج د ه‍ و ز ح ط ي ك ل م ن س ع ف ص ق ر ش ت ث
 maghrebi.abjad = ا ب ج د ه‍ و ز ح ط ي ك ل م ن ص ع ف ض ق ر س ت ث خ ذ ظ غ ش
 
 [transforms.prehyphenation]
-; Yannis Haralambous' system in his 'dad'. Unfinished, but the
-; basic rules are here.
-transliteration.dad.1.0 = { ([aui])N }
-transliteration.dad.1.1 = { string = {1|aui|ًٌٍ} }
-transliteration.dad.1.2 = { remove }
-transliteration.dad.2.0 = { ([tAa]){*} }
-transliteration.dad.2.1 = { string = {1|tAa|ةٱٰ} }
-transliteration.dad.2.2 = { remove } 
-transliteration.dad.3.0 = { ([tds])h }
-transliteration.dad.3.1 = { string = {1|tds|ثذش} }
-transliteration.dad.3.2 = { remove }
-transliteration.dad.4.0 = { '([AauiI]) }
-transliteration.dad.4.1 = { remove }
-transliteration.dad.4.2 = { string = {1|AauiI|آأؤإئ} }
-transliteration.dad.5.0 = { ([{007C}AbtjHxdrzsSDTZ`RfqklmnhUIYaiu+opCvg]) }
-transliteration.dad.5.1 = { string = {1|{007C}AbtjHxdrzsSDTZ`RfqklmnhUIYaiu+opCvg|ءابتجحخدرزسصضطظعغفقكلمنهوىيَُِّْپچڤگ} }
+; Yannis Haralambous' system in his 'dad'. Only two rules are missing.
+; 3-letter
+transliteration.dad.1.0 = { {+}a{*} }
+transliteration.dad.1.2 =   { string = ٰ }
+transliteration.dad.1.2 =   { string = ّ }
+transliteration.dad.1.3 =   { remove }
+transliteration.dad.2.0 = { {+}([aui])N }
+transliteration.dad.2.2 =   { string = {1|aui|ًٌٍ} }
+transliteration.dad.2.2 =   { string = ّ }
+transliteration.dad.2.3 =   { remove }
+; 2-letter
+transliteration.dad.3.0 = { ([aui])N }
+transliteration.dad.3.1 =   { string = {1|aui|ًٌٍ} }
+transliteration.dad.3.2 =   { remove }
+transliteration.dad.4.0 = { ([tAa]){*} }
+transliteration.dad.4.1 =   { string = {1|tAa|ةٱٰ} }
+transliteration.dad.4.2 =   { remove } 
+transliteration.dad.5.0 = { ([tds])h }
+transliteration.dad.5.1 =   { string = {1|tds|ثذش} }
+transliteration.dad.5.2 =   { remove }
+transliteration.dad.6.0 = { {+}([aui]) }
+transliteration.dad.6.1 =   { string = {1|aui|َُِ} } 
+transliteration.dad.6.2 =   { string = ّ }
+transliteration.dad.7.0 = { '([AauiIbnf]) }
+transliteration.dad.7.1 =   { remove }
+transliteration.dad.7.2 =   { string = {1|AauiIbnf|آأؤإئٮںڡ} }
+; 1-letter
+transliteration.dad.8.0 = { ([{007C}AbtjHxdrzsSDTZ`RfqklmnhUIYaui+opCvgJe]) }
+transliteration.dad.8.1 =   { string = {1|{007C}AbtjHxdrzsSDTZ`RfqklmnhUIYaui+opCvgJe|ءابتجحخدرزسصضطظعغفقكلمنهوىيَُِّْپچڤگژۀ} }
 
 
diff --git a/locale/ug/babel-ug.ini b/locale/ug/babel-ug.ini
index 104cf00..53aca88 100644
--- a/locale/ug/babel-ug.ini
+++ b/locale/ug/babel-ug.ini
@@ -27,6 +27,7 @@ script.tag.opentype = arab
 level = 1
 encodings = 
 derivate = no
+require.babel = uyghur
 
 [captions]
 preface = كىرىش سۆز
diff --git a/locale/ug/babel-uyghur.tex b/locale/ug/babel-uyghur.tex
index 0963ec1..188d383 100644
--- a/locale/ug/babel-uyghur.tex
+++ b/locale/ug/babel-uyghur.tex
@@ -9,4 +9,67 @@
 \fi
 \BabelBeforeIni{ug}{%
 }
+
+\directlua{
+
+Babel.ug_conson = {
+[0x0628] = true, [0x067E] = true, [0x062A] = true, [0x062C] = true,
+[0x0686] = true, [0x062E] = true, [0x062F] = true, [0x0631] = true,
+[0x0632] = true, [0x0698] = true, [0x0633] = true, [0x0634] = true,
+[0x0641] = true, [0x063A] = true, [0x0642] = true, [0x0643] = true,
+[0x06AF] = true, [0x06AD] = true, [0x0644] = true, [0x0645] = true,
+[0x0646] = true, [0x0647] = true, [0x064A] = true, [0x06CB] = true
+}
+
+function Babel.ug_hyphenate(head) 
+  if not Babel.ug_toisol then return end
+  local d, pre, post
+  for item in node.traverse(head) do
+    if item.id == 29 and item.prev and item.prev.id == 29
+       and item.next and item.next.id == 29 then
+      pre =  Babel.ug_toisol[item.char] or item.char
+      post = Babel.ug_toisol[item.next.char] or item.next.char
+      if Babel.ug_conson[pre] and not Babel.ug_conson[post] then
+        d = node.new(7, 3)   % (disc, regular)
+        d.pre     = Babel.str_to_nodes(
+                      function() return '-' end, 
+                      nil, item)
+        d.penalty = 0 % Must be tex.(ex)hyphenpenalty
+        head, new = node.insert_before(head, item, d)
+      end
+    end
+  end
+  return head
+end
+}
+
+\gdef\UyghurSetupHyph{%
+  \directlua{
+      Babel.ug_toisol   = {}
+      luatexbase.add_to_callback("pre_linebreak_filter",
+        Babel.ug_hyphenate, "Babel.ug_hyphenate")
+      luatexbase.add_to_callback("hpack_filter",
+        Babel.ug_hyphenate, "Babel.ug_hyphenate")
+  }% 
+  % It must be done for each font, and stored separately.
+  % Locale must be taken into account too.
+  \bbl at foreach{%
+      0628,067E,062A,062C,0686,062E,062F,0631,0632,%
+      0698,0633,0634,0641,063A,0642,0643,06AF,06AD,%
+      0644,0645,0646,0647,064A,06CB}{%
+    \setbox\z@\hbox{\char"##1=\char"##1^^^^200d=%
+      ^^^^200d\char"##1^^^^200d=^^^^200d\char"##1}%
+    \directlua{
+      local chars = {}
+      for item in node.traverse(tex.box[0].head) do
+        if item.id == node.id'glyph' and item.char > 128 and
+             not (item.char == 0x200D) then
+          table.insert(chars, item.char)
+        end
+      end
+      Babel.ug_toisol[chars[2]] = chars[1]
+      Babel.ug_toisol[chars[3]] = chars[1]
+      Babel.ug_toisol[chars[4]] = chars[1]
+    }}}
+
 \endinput
\ No newline at end of file





More information about the latex3-commits mailing list.