[latex3-commits] [latex3/luaotfload] dev: Add AMTRA reordering for arabic fonts (79e909fe)

github at latex-project.org github at latex-project.org
Thu Dec 21 00:19:39 CET 2023


Repository : https://github.com/latex3/luaotfload
On branch  : dev
Link       : https://github.com/latex3/luaotfload/commit/79e909fe7724e51b7056c88b46c1bf7cf497413b

>---------------------------------------------------------------

commit 79e909fe7724e51b7056c88b46c1bf7cf497413b
Author: Marcel Fabian Krüger <tex at 2krueger.de>
Date:   Thu Dec 21 00:09:06 2023 +0100

    Add AMTRA reordering for arabic fonts


>---------------------------------------------------------------

79e909fe7724e51b7056c88b46c1bf7cf497413b
 src/luaotfload-arabic.lua   | 143 ++++++++++++++++++++++++++++++++++++++++++++
 src/luaotfload-features.lua |   1 +
 testfiles/arab1.tlg         |   2 +-
 testfiles/arab2.tlg         |   4 +-
 4 files changed, 147 insertions(+), 3 deletions(-)

diff --git a/src/luaotfload-arabic.lua b/src/luaotfload-arabic.lua
new file mode 100644
index 00000000..c26ee10f
--- /dev/null
+++ b/src/luaotfload-arabic.lua
@@ -0,0 +1,143 @@
+-----------------------------------------------------------------------
+--         FILE:  luaotfload-arabic.lua
+--  DESCRIPTION:  part of luaotfload / arabic specific support
+-----------------------------------------------------------------------
+
+assert(luaotfload_module, "This is a part of luaotfload and should not be loaded independently") { 
+    name          = "luaotfload-arabic",
+    version       = "3.27-dev",       --TAGVERSION
+    date          = "2023-08-31", --TAGDATE
+    description   = "luaotfload submodule / features",
+    license       = "GPL v2.0",
+    author        = "Marcel Krüger",
+    copyright     = "The LaTeX Project",
+}
+
+local unicode = require'luaotfload-unicode'
+local ccc = unicode.ccc
+
+local node_new = node.direct.new
+local setlink = node.direct.setlink
+local is_char = node.direct.is_char
+local getnext = node.direct.getnext
+
+-- Mark combining marks
+local mcm = {
+    [0x0654] = true, -- ARABIC HAMZA ABOVE
+    [0x0655] = true, -- ARABIC HAMZA BELOW
+    [0x0658] = true, -- ARABIC MARK NOON GHUNNA
+    [0x06DC] = true, -- ARABIC SMALL HIGH SEEN
+    [0x06E3] = true, -- ARABIC SMALL LOW SEEN
+    [0x06E7] = true, -- ARABIC SMALL HIGH YEH
+    [0x06E8] = true, -- ARABIC SMALL HIGH NOON
+    [0x08CA] = true, -- ARABIC SMALL HIGH FARSI YEH
+    [0x08CB] = true, -- ARABIC SMALL HIGH YEH BARREE WITH TWO DOTS BELOW
+    [0x08CD] = true, -- ARABIC SMALL HIGH ZAH
+    [0x08CE] = true, -- ARABIC LARGE ROUND DOT ABOVE
+    [0x08CF] = true, -- ARABIC LARGE ROUND DOT BELOW
+    [0x08D3] = true, -- ARABIC SMALL LOW WAW
+    [0x08F3] = true, -- ARABIC SMALL HIGH WAW 
+}
+
+-- Implement AMTRA from UTR #53.
+-- This assumes that the text is already normalized according to NFD. For most
+-- fonts, normalizing to NFC should be good enough.
+local function reorder_amtra(head, f)
+    local n = head
+    while n do
+        local base, prev = n
+        prev, n = n, getnext(n)
+        while true do
+            local char = is_char(n, f) -- is_char(nil, f) == is_char(0, f) == nil
+            local this_ccc = ccc[char]
+            if not this_ccc then break end -- ! This `break` is the hot path
+            if this_ccc == 33 then
+                local after_33, tail_33 = n
+                repeat
+                    tail_33 = after_33
+                    after_33 = getnext(tail_33)
+                    local char = is_char(after_33, f)
+                    local after_ccc = ccc[char]
+                until after_ccc ~= 33
+                setlink(prev, after_33)
+                setlink(tail_33, getnext(base))
+                setlink(base, n)
+                if prev == base then
+                    prev = tail_33
+                end
+                n = after_33
+            elseif this_ccc == 220 then
+                local after_220, tail_220, found = n
+                repeat
+                    tail_220 = after_220
+                    after_220 = getnext(tail_220)
+                    local char = is_char(after_220, f)
+                    if mcm[char] then found = true end
+                    local after_ccc = ccc[char]
+                until after_ccc ~= 220
+                if found then
+                    setlink(prev, after_220)
+                    setlink(tail_220, getnext(base))
+                    setlink(base, n)
+                    if prev == base then
+                        prev = tail_220
+                    end
+                    n = after_220
+                    base = tail_220 -- Because ccc230 should get inserted after this
+                else
+                    prev, n = tail_220, after_220
+                end
+            elseif this_ccc == 230 then
+                local after_230, tail_230, found = n
+                repeat
+                    tail_230 = after_230
+                    after_230 = getnext(tail_230)
+                    local char = is_char(after_230, f)
+                    if mcm[char] then found = true end
+                    local after_ccc = ccc[char]
+                until after_ccc ~= 230
+                if found then
+                    setlink(prev, after_230)
+                    setlink(tail_230, getnext(base))
+                    setlink(base, n)
+                    if prev == base then
+                        prev = tail_230
+                    end
+                    n = after_230
+                else
+                    prev, n = tail_220, after_220
+                end
+            else
+                prev, n = n, getnext(n)
+            end
+        end
+    end
+    return n
+end
+
+-- We need to run after normalize and ideally directly afterwards. So try to insert after normalize
+-- or default to the start of the list such that normalize can insert itself before us later.
+local normalize_index = 0
+for i, manipulator in ipairs(fonts.constructors.features.otf.processors.node) do
+  if manipulator.name == 'normalize' then
+    normalize_index = i
+  end
+end
+fonts.constructors.features.otf.register {
+    name = 'amtra',
+    default = 'auto',
+    description = 'Apply Unicode Arabic Mark Rendering',
+    initializers = {
+        node = function(fonttable, value, features)
+            if values == 'auto' then
+                features.amtra = fonttable.properties.script == 'arab'
+            end
+        end,
+    },
+    processors = {
+        position = normalize_index + 1,
+        node = function(head, f)
+            return reorder_amtra(head, f)
+        end,
+    },
+}
diff --git a/src/luaotfload-features.lua b/src/luaotfload-features.lua
index f30531f1..4328c1e9 100644
--- a/src/luaotfload-features.lua
+++ b/src/luaotfload-features.lua
@@ -890,6 +890,7 @@ fonts.constructors.features.otf.register {
       end,
     },
 }
+require'luaotfload-arabic'
 
 -- mathsize feature for compatibility with older fontloader versions
 -- Not all that useful in most cases since it leads to messy font sizes,
diff --git a/testfiles/arab1.tlg b/testfiles/arab1.tlg
index da214f77..5f369ad5 100644
--- a/testfiles/arab1.tlg
+++ b/testfiles/arab1.tlg
@@ -1,7 +1,7 @@
 This is a generated file for the l3build validation system.
 Don't change this file in any respect.
 [9:?]<+TLT><+TRT>[0:3]<984487><1615><985909><1614><983379>[12:13]<1608><1614><986305><1616><986323><1614><983379><1585><1615><1607><1615><-TRT>[14:2][12:15]
-[9:?]<+TLT><+TRT>[0:3]<1571><987302><986386><1614><986456>[12:13]<983736><1614><983514><1616><983440><983709><1612>[12:13]<1573><983279><986443><1614><986456>[12:13]<984487><1615><985909><1614><983379>[12:13]<983440><1614><983782><983685><1615><983425><1615>[12:13]<983693><1616><983491><983608><1615>[12:13]<986305><1616><986323><1614><983379><1585><1614><1607><1615>[12:13]<983679><1616><984655><1614><984695>[13:0]<983639><1614><983488><1614><983608><1615>[12:13]<986425><1616><986462>[12:13]<983755><1614><983548><983724><1614><1577><1613>[12:13]<983534><1614><983741><1616><984655><984695><1614><1577><1613>[12:13]<983535><1614><983547><1614><983379><1604><1614>[12:13]<986219><1614><986229><1615><58>[12:13]<983755><1614><983796><1601><1614>[12:13]<1571><983277><983400><1616><983481><983514><1615><1607><1615>[12:13]<1573><983279><983679><1614><983481><983621><1614>[12:13]<986425><1616><986462>[12:13]<1575><984342><984346><1614><983766><1614><983383><1569><1616>[12:13]<1608><1614><1571><987302><1583><983535><1614><983393><1615>[12:13]<986675><1614><986695><1614>[12:13]<1571><983277><985326><985351><1614><1577><1611><46>[12:13]<983535><1614><983547><1614><983379><1604><1614>[12:13]<984487><1615><985909><1614><983379><58>[12:13]<1571><987302><985890><1614><983379>[12:13]<1570><983755><1616><983524><1612>[12:13]<985713><1616><983514><1611><1617><1575>[12:13]<1571><987302><986383><1616><1617><986462>[12:13]<984349><1614><984354>[12:13]<1571><987302><983755><983494><1614><983782><1616><983481><983393><1615>[12:13]<1571><987302><1606>[12:13]<1571><983277><983575><1614><983547><1616><1617><983709><1614>[12:13]<986675><1614><986695><1614>[12:13]<1585><1614><983395><983488><1614><983494><1614><983621><1614>[12:13]<983535><1614><983379><984364><984369><1616><984385><1614><983379><1585><1615>[12:13]<983679><1614><985828><983744><1614>[12:13]<983613><1615><983491><1614><983379>[12:13]<1575><983679><983481><1614><983796><1605><1614><46>[12:13]<1608><1614><983534><1614><983488><983673><1614>[12:13]<1571><987302><1606>[12:13]<983440><1615><983984><1616><983998><1614><1617>[12:13]<984487><1615><985909><1614><983379>[12:13]<986009><1614><986085><1614><984916><983693><1614><983608><1615>[12:13]<984071><1614><984098><1614><1571><987302>[12:13]<1575><984364><984369><1616><984385><1614><983379><1585><1615>[12:13]<987640><1614><986646><986663><1614><983709><1615>[12:13]<986425><1616><986462>[12:13]<1573><983279><983736><983782><1614><983488><986225><1616><986229><1616><46>[12:13]<983535><1614><983547><1614><983379><1604><1614>[12:13]<986219><1614><986229><1615>[12:13]<983736><1614><983514><1616><983440><983547><1615><983608><1615><58>[12:13]<1573><983279><986383><1616><1617><986462>[12:13]<1571><987302><986326><986341><1614><983393><1615>[12:13]<986305><1616><986323><1614><983379><1585><1614><1603><1614>[12:13]<985880><1614><983379>[12:13]<984487><1615><985909><1614><983379>[12:13]<987640><1614><986646><986663><1614><983709><1615><46>[12:13]<983535><1614><983547><1614><983379><1604><1614>[12:13]<986219><1614><986229><1615>[12:13]<984487><1615><985909><1614><983379><58>[12:13]<983395><1614><984449>[13:0]<1616><985085><985130><1612>[12:13]<1571><987302><984434><984449><1615><1603><1614>[12:13]<985880><1614><983379>[12:13]<983736><1614><983514><1616><983440><983904><1616><983927>[12:13]<1571><987302><983453><1615><983741><1614><983514><1616><1617><1602><1615>[12:13]<1575><984364><984369><1616><984385><1614><983379><1585><1614>[12:13]<1608><1614><983453><1615><986184><1614><986208><1616><1617><984185><1614><984229><1616><984259><1567><-TRT>[14:2][12:15]
+[9:?]<+TLT><+TRT>[0:3]<1571><987302><986386><1614><986456>[12:13]<983736><1614><983514><1616><983440><983709><1612>[12:13]<1573><983279><986443><1614><986456>[12:13]<984487><1615><985909><1614><983379>[12:13]<983440><1614><983782><983685><1615><983425><1615>[12:13]<983693><1616><983491><983608><1615>[12:13]<986305><1616><986323><1614><983379><1585><1614><1607><1615>[12:13]<983679><1616><984655><1614><984695>[13:0]<983639><1614><983488><1614><983608><1615>[12:13]<986425><1616><986462>[12:13]<983755><1614><983548><983724><1614><1577><1613>[12:13]<983534><1614><983741><1616><984655><984695><1614><1577><1613>[12:13]<983535><1614><983547><1614><983379><1604><1614>[12:13]<986219><1614><986229><1615><58>[12:13]<983755><1614><983796><1601><1614>[12:13]<1571><983277><983400><1616><983481><983514><1615><1607><1615>[12:13]<1573><983279><983679><1614><983481><983621><1614>[12:13]<986425><1616><986462>[12:13]<1575><984342><984346><1614><983766><1614><983383><1569><1616>[12:13]<1608><1614><1571><987302><1583><983535><1614><983393><1615>[12:13]<986675><1614><986695><1614>[12:13]<1571><983277><985326><985351><1614><1577><1611><46>[12:13]<983535><1614><983547><1614><983379><1604><1614>[12:13]<984487><1615><985909><1614><983379><58>[12:13]<1571><987302><985890><1614><983379>[12:13]<1570><983755><1616><983524><1612>[12:13]<985713><1616><983514><1617><983274><1575>[12:13]<1571><987302><986383><1617><1616><986462>[12:13]<984349><1614><984354>[12:13]<1571><987302><983755><983494><1614><983782><1616><983481><983393><1615>[12:13]<1571><987302><1606>[12:13]<1571><983277><983575><1614><983547><1617><1616><983709><1614>[12:13]<986675><1614><986695><1614>[12:13]<1585><1614><983395><983488><1614><983494><1614><983621><1614>[12:13]<983535><1614><983379><984364><984369><1616><984385><1614><983379><1585><1615>[12:13]<983679><1614><985828><983744><1614>[12:13]<983613><1615><983491><1614><983379>[12:13]<1575><983679><983481><1614><983796><1605><1614><46>[12:13]<1608><1614><983534><1614><983488><983673><1614>[12:13]<1571><987302><1606>[12:13]<983440><1615><983984><1616><983998><1617><983275>[12:13]<984487><1615><985909><1614><983379>[12:13]<986009><1614><986085><1614><984916><983693><1614><983608><1615>[12:13]<984071><1614><984098><1614><1571><987302>[12:13]<1575><984364><984369><1616><984385><1614><983379><1585><1615>[12:13]<987640><1614><986646><986663><1614><983709><1615>[12:13]<986425><1616><986462>[12:13]<1573><983279><983736><983782><1614><983488><986225><1616><986229><1616><46>[12:13]<983535><1614><983547><1614><983379><1604><1614>[12:13]<986219><1614><986229><1615>[12:13]<983736><1614><983514><1616><983440><983547><1615><983608><1615><58>[12:13]<1573><983279><986383><1617><1616><986462>[12:13]<1571><987302><986326><986341><1614><983393><1615>[12:13]<986305><1616><986323><1614><983379><1585><1614><1603><1614>[12:13]<985880><1614><983379>[12:13]<984487><1615><985909><1614><983379>[12:13]<987640><1614><986646><986663><1614><983709><1615><46>[12:13]<983535><1614><983547><1614><983379><1604><1614>[12:13]<986219><1614><986229><1615>[12:13]<984487><1615><985909><1614><983379><58>[12:13]<983395><1614><984449>[13:0]<1616><985085><985130><1612>[12:13]<1571><987302><984434><984449><1615><1603><1614>[12:13]<985880><1614><983379>[12:13]<983736><1614><983514><1616><983440><983904><1616><983927>[12:13]<1571><987302><983453><1615><983741><1614><983514><1617><1616><1602><1615>[12:13]<1575><984364><984369><1616><984385><1614><983379><1585><1614>[12:13]<1608><1614><983453><1615><986184><1614><986208><1617><1616><984185><1614><984229><1616><984259><1567><-TRT>[14:2][12:15]
 [2:0]
 [0:2]
 [0:2]
diff --git a/testfiles/arab2.tlg b/testfiles/arab2.tlg
index 2d3739d2..c36cf0e2 100644
--- a/testfiles/arab2.tlg
+++ b/testfiles/arab2.tlg
@@ -1,8 +1,8 @@
 This is a generated file for the l3build validation system.
 Don't change this file in any respect.
-[9:?]<+TLT><+TRT>[0:3]<983693><1616><983698>[12:13]<983755><1615><983796><1585><1614><1577><1616>[12:13]<1575><983679><985838><1616><1617><983766><1614><983383><1569><1616><58><-TRT>[14:2][12:15]
+[9:?]<+TLT><+TRT>[0:3]<983693><1616><983698>[12:13]<983755><1615><983796><1585><1614><1577><1616>[12:13]<1575><983679><985838><1617><1616><983766><1614><983383><1569><1616><58><-TRT>[14:2][12:15]
 [9:?]<+TLT><+TRT>[0:3]<65021><-TRT>[14:2][12:15]
-[9:?]<+TLT><+TRT>[0:3]<985880><1648><983382><987302><986595><1615><1617><986663><1614><983379>[12:13]<1649><986219><1614><1617><986239><1616><984534><984562><1614>[12:13]<1570><983693><1614><983491><1615><983796><1575>[12:13]<984349><1614><984354>[12:13]<985592><1614><985384><1616><983673><1615><1617>[12:13]<983679><987313><1614><984575><1615><987310><1618>[12:13]<1571><987302><1606><1618>[12:13]<983845><1614><983873><1616><983456><1615><983796><1575>[12:13]<1649><983679><985838><1616><1617><983766><1614><983383><1569><1614>[12:13]<984815><1614><984842><1618><983613><1611><983379><1750>[12:13]<1608><1614><984349><1614><984354>[12:13]<983453><1614><983407><1618><983739><1615><983878><1615><983894><983613><1615><983698><1614><1617>[12:13]<983679><1616><983494><1614><983513><1618><983613><1614><983488><1615><983796><1575>[12:13]<985092><1616><984406><1614><983407><1618><983729><1616>[12:13]<983693><1614><983379>[12:13]<1570><985800><1614><985828><1618><983494><1615><983696><1615><983796><983613><1615><983698><1614><1617>[12:13]<1573><983279><984349><1614><1617><984354>[12:13]<1571><987302><1606>[12:13]<985880><1614><1617><983382><983278><983453><1616><985285><985312><1614>[12:13]<983447><1616><983548><1614><983379><983575><1616><983769><1614><983612><1613>[12:13]<983693><1615><1617><983488><1614><985828><1616><1617><983491><1614><983612><1613><1754>[12:13]<1608><1614><985707><1614><983379><985747><1616><985520><1615><1608><983613><1615><983698><1614><1617>[12:13]<985887><1616><983386><984342><1618><985942><1614><983407><1618><983724><1615><1608><1601><1616><1754>[12:13]<983535><1614><983378><983279><1606><1618>[12:13]<984815><1614><984842><1616><983613><1618><983494><1615><983696><1615><983796><983613><1615><983698><1614><1617>[12:13]<983535><1614><983407><1614><985755><1614><985530><1648>[12:13]<1571><987302><1606><1618>[12:13]<983453><1614><984702><1618><984729><1614><983613><1615><983796><1575>[12:13]<983758><1614><983481><1618><987275><1611><983379>[12:13]<1608>[13:0]<1614><1617><985592><1614><985388><1618><983407><1614><983673><1614>[12:13]<1649><983182><983183><1617><1648><983184><1615>[12:13]<983535><1616><983481><983608><1616>[12:13]<985708><1614><984655><1618><984695><1611><1575>[12:13]<983639><1614><983496><1616><984655><984695><1611><1575>[12:13]<1641><1633><1757><-TRT>[14:2][12:15]
+[9:?]<+TLT><+TRT>[0:3]<985880><1648><983382><987302><986595><1617><983277><986663><1614><983379>[12:13]<1649><986219><1617><983275><986239><1616><984534><984562><1614>[12:13]<1570><983693><1614><983491><1615><983796><1575>[12:13]<984349><1614><984354>[12:13]<985592><1614><985384><1616><983673><1617><983277>[12:13]<983679><987313><1614><984575><1615><987310><1618>[12:13]<1571><987302><1606><1618>[12:13]<983845><1614><983873><1616><983456><1615><983796><1575>[12:13]<1649><983679><985838><1617><1616><983766><1614><983383><1569><1614>[12:13]<984815><1614><984842><1618><983613><1611><983379><1750>[12:13]<1608><1614><984349><1614><984354>[12:13]<983453><1614><983407><1618><983739><1615><983878><1615><983894><983613><1615><983698><1614><1617>[12:13]<983679><1616><983494><1614><983513><1618><983613><1614><983488><1615><983796><1575>[12:13]<985092><1616><984406><1614><983407><1618><983729><1616>[12:13]<983693><1614><983379>[12:13]<1570><985800><1614><985828><1618><983494><1615><983696><1615><983796><983613><1615><983698><1614><1617>[12:13]<1573><983279><984349><1614><1617><984354>[12:13]<1571><987302><1606>[12:13]<985880><1614><1617><983382><983278><983453><1616><985285><985312><1614>[12:13]<983447><1616><983548><1614><983379><983575><1616><983769><1614><983612><1613>[12:13]<983693><1615><1617><983488><1614><985828><1616><1617><983491><1614><983612><1613><1754>[12:13]<1608><1614><985707><1614><983379><985747><1616><985520><1615><1608><983613><1615><983698><1614><1617>[12:13]<985887><1616><983386><984342><1618><985942><1614><983407><1618><983724><1615><1608><1601><1616><1754>[12:13]<983535><1614><983378><983279><1606><1618>[12:13]<984815><1614><984842><1616><983613><1618><983494><1615><983696><1615><983796><983613><1615><983698><1614><1617>[12:13]<983535><1614><983407><1614><985755><1614><985530><1648>[12:13]<1571><987302><1606><1618>[12:13]<983453><1614><984702><1618><984729><1614><983613><1615><983796><1575>[12:13]<983758><1614><983481><1618><987275><1611><983379>[12:13]<1608>[13:0]<1614><1617><985592><1614><985388><1618><983407><1614><983673><1614>[12:13]<1649><983182><983183><1617><1648><983184><1615>[12:13]<983535><1616><983481><983608><1616>[12:13]<985708><1614><984655><1618><984695><1611><1575>[12:13]<983639><1614><983496><1616><984655><984695><1611><1575>[12:13]<1641><1633><1757><-TRT>[14:2][12:15]
 [2:0]
 [0:2]
 [0:2]





More information about the latex3-commits mailing list.