texlive[59846] Master/texmf-dist: lua-uni-algos (5jul21)

commits+karl at tug.org commits+karl at tug.org
Mon Jul 5 22:55:06 CEST 2021


Revision: 59846
          http://tug.org/svn/texlive?view=revision&revision=59846
Author:   karl
Date:     2021-07-05 22:55:05 +0200 (Mon, 05 Jul 2021)
Log Message:
-----------
lua-uni-algos (5jul21)

Modified Paths:
--------------
    trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/README.md
    trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/lua-uni-algos.pdf
    trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/lua-uni-algos.tex
    trunk/Master/texmf-dist/tex/luatex/lua-uni-algos/lua-uni-normalize.lua

Modified: trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/README.md
===================================================================
--- trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/README.md	2021-07-05 20:54:50 UTC (rev 59845)
+++ trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/README.md	2021-07-05 20:55:05 UTC (rev 59846)
@@ -1,8 +1,8 @@
 # The lua-uni-algos Package
 
-Version: v0.2
+Version: v0.3
 
-Date: 2020-05-17
+Date: 2021-07-05
 
 Author: Marcel Krüger
 

Modified: trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/lua-uni-algos.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/lua-uni-algos.tex
===================================================================
--- trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/lua-uni-algos.tex	2021-07-05 20:54:50 UTC (rev 59845)
+++ trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/lua-uni-algos.tex	2021-07-05 20:55:05 UTC (rev 59846)
@@ -2,7 +2,7 @@
 \usepackage{doc, shortvrb, metalogo, hyperref, fontspec}
 % \setmainfont{Noto Serif}
 % \setmonofont{FreeMono}
-\title{Unicode algorithms for Lua\TeX}
+\title{Unicode algorithms for Lua\TeX\thanks{This document corresponds to \pkg{lua-uni-algos} v0.3.}}
 \author{Marcel Krüger\thanks{E-Mail: \href{mailto:tex at 2krueger.de}{\nolinkurl{tex at 2krueger.de}}}}
 \MakeShortVerb\|
 \newcommand\pkg{\texttt}
@@ -51,6 +51,12 @@
 (This example is shown in Latin Modern Mono which has the (for this purpose) very useful property of not handling combining character very well.
 In a well-behaving font, the `...C` and `...D` lines should look the same.)
 
+Additionally for NFC direct normalization of Lua\TeX\ node lists is supported.
+There are two functions |normalize.node.NFC| and |normalize.direct.NFC| taking upto four parameters: The first parameter is the head of the node list to be converted.
+The second parameter is the font id of the affected character nodes. Only non-protected glyph nodes of the specified font will be normalized. Pass |nil| for the font
+to normalize without respecting the font in the process. The third parameter is an optional table. If it is not |nil|, normalization is supressed if it might add glyph
+which map to |false| (or |nil|) in this table. If the forth argument is |true|, normalization will never join two glyph nodes with different attributes.
+
 \section{Case folding}
 For case folding load the Lua module |lua-uni-case|.
 You can either load it directly with

Modified: trunk/Master/texmf-dist/tex/luatex/lua-uni-algos/lua-uni-normalize.lua
===================================================================
--- trunk/Master/texmf-dist/tex/luatex/lua-uni-algos/lua-uni-normalize.lua	2021-07-05 20:54:50 UTC (rev 59845)
+++ trunk/Master/texmf-dist/tex/luatex/lua-uni-algos/lua-uni-normalize.lua	2021-07-05 20:55:05 UTC (rev 59846)
@@ -169,6 +169,7 @@
   codepoints[new_pos] = first
   return ccc_reorder(codepoints, i, j, k == i and i or k-1)
 end
+
 local result_table = {}
 local function get_string()
   local result_table = result_table
@@ -269,11 +270,198 @@
   return to_nfc_generic(s, compatibility_mapping)
 end
 
+-- allowed_characters only works reliably if it's closed under canonical decomposition mappings
+-- but it should fail in reasonable ways as long as it's at least closed under full canonical decompositions
+local function nodes_to_nfc(head, f, allowed_characters, preserve_attr)
+  if not head then return head end
+  local tmp_node = node.new'temp'
+  -- This is more complicated since we want to ensure that nodes (including their attributes and properties) are preserved whenever possible
+  --
+  -- We use three passes:
+  -- 1. Decompose composition exclusions etc.
+  local n = head
+  while n do
+    local char = node.is_char(n, f)
+    if char then
+      local decomposed = decomposition_mapping[char]
+      if decomposed then
+        local compose_lookup = composition_mapping[decomposed[1]]
+        if not (compose_lookup and compose_lookup[decomposed[2]]) then
+          local available = true
+          if allowed_characters then
+            -- This is probably buggy for werd fonts
+            for i=1, #decomposed do
+              if not allowed_characters[decomposed[i]] then
+                available = false
+                break
+              end
+            end
+          end
+          if available then
+            -- Here we never want to compose again, so we can decompose directly
+            n.char = decomposed[1]
+            for i=2, #decomposed do
+              local nn = node.copy(n)
+              nn.char = decomposed[i]
+              node.insert_after(head, n, nn)
+              n = nn
+            end
+          end
+        end
+      end
+    end
+    n = n.next
+  end
+  -- 2. Reorder marks
+  local last_ccc
+  n = head
+  local prev = head.prev
+  tmp_node.next, head.prev = head, tmp_node
+  while n do
+    local char = node.is_char(n, f)
+    if char then
+      local this_ccc = ccc[char]
+      if last_ccc and this_ccc and last_ccc > this_ccc then
+        local nn = n
+        while nn ~= tmp_node do
+          nn = nn.prev
+          local nn_char = node.is_char(nn, f)
+          if not nn_char then break end
+          local nn_ccc = ccc[nn_char]
+          if not nn_ccc or nn_ccc <= this_ccc then break end
+        end
+        local before, after = n.prev, n.next
+        node.insert_after(head, nn, n)
+        before.next = after
+        if after then after.prev = before end
+        n = after
+      else
+        n = n.next
+        last_ccc = this_ccc
+      end
+    else
+      n = n.next
+      last_ccc = nil
+    end
+  end
+  head, head.prev = tmp_node.next, prev
+  -- 3. The rest: Maybe decompose and then compose again
+  local starter_n, starter, lookup
+  local starter_decomposition
+  local last_ccc
+  local i -- index into starter_decomposition
+  local i_ccc
+  n = head
+  node.insert_after(head, nil, tmp_node)
+  repeat
+    local char = node.is_char(n, f)
+    local this_ccc = ccc[char] or 300
+    while i and i_ccc <= this_ccc do
+      local new_starter = lookup and lookup[starter_decomposition[i]]
+      if new_starter and (not allowed_characters or allowed_characters[new_starter]) then
+        starter = new_starter
+        starter_n.char = starter
+        lookup = composition_mapping[starter]
+      else
+        local nn = node.copy(starter_n)
+        nn.char = starter_decomposition[i]
+        node.insert_before(head, n, nn)
+        last_ccc = i_ccc
+      end
+      i = i + 1
+      local i_char = starter_decomposition[i]
+      if i_char then
+        i_ccc = ccc[starter_decomposition[i]] or 300
+      else
+        i = nil
+      end
+    end
+    if char then
+      if lookup and (this_ccc == 300) == (this_ccc == last_ccc) then
+        local new_starter = lookup[char]
+        if new_starter and (not allowed_characters or allowed_characters[new_starter]) and (not preserve_attr or starter_n.attr == n.attr) then
+          local last = n.prev
+          node.remove(head, n)
+          node.free(n)
+          n = last
+          starter = new_starter
+          starter_n.char, char = starter, starter
+          lookup = composition_mapping[starter]
+        else
+          last_ccc = this_ccc
+        end
+       -- Now handle Hangul syllables. We never decompose them since we would just recompose them anyway and they are starters
+      elseif not lookup and this_ccc == 300 and last_ccc == 300 then
+        if starter >= 0x1100 and starter <= 0x1112 and char >= 0x1161 and char <= 0x1175 then -- L + V -> LV
+          local new_starter = ((starter - 0x1100) * 21 + char - 0x1161) * 28 + 0xAC00
+          if (not allowed_characters or allowed_characters[new_starter]) and (not preserve_attr or starter_n.attr == n.attr) then
+            node.remove(head, n)
+            node.free(n)
+            starter = starter
+            starter_n.char, char = starter, starter
+            lookup = composition_mapping[starter]
+            n = starter_n
+          end
+        elseif char >= 0x11A8 and char <= 0x11C2 and starter >= 0xAC00 and starter <= 0xD7A3 and (starter-0xAC00) % 28 == 0 then -- LV + T -> LVT
+          local new_starter = starter + char - 0x11A7
+          if (not allowed_characters or allowed_characters[new_starter]) and (not preserve_attr or starter_n.attr == n.attr) then
+            node.remove(head, n)
+            node.free(n)
+            starter = new_starter
+            starter_n.char, char = starter, starter
+            lookup = composition_mapping[starter]
+            n = starter_n
+          end
+        end
+      else
+        last_ccc = this_ccc
+      end
+      if this_ccc == 300 then
+        starter_n = n
+        starter_decomposition = decomposition_mapping[char]
+        if allowed_characters and starter_decomposition then
+          for i=1, #starter_decomposition do
+            if not allowed_characters[starter_decomposition[i]] then
+              starter_decomposition = nil
+              break
+            end
+          end
+        end
+        starter = starter_decomposition and starter_decomposition[1] or char
+        starter_n.char = starter
+        lookup = composition_mapping[starter]
+        if starter_decomposition then
+          i, i_ccc = 2, ccc[starter_decomposition[2]] or 300
+        else
+          i, i_ccc = nil
+        end
+      end
+    else
+      starter, lookup, last_ccc, last_decomposition, i, i_ccc = nil
+    end
+    if n == tmp_node then
+      node.remove(head, tmp_node)
+      break
+    end
+    n = n.next
+  until false
+  node.free(tmp_node)
+  return head
+end
+
+local todirect, tonode = node.direct.todirect, node.direct.tonode
+
 return {
   NFD = to_nfd,
   NFC = to_nfc,
   NFKD = to_nfkd,
   NFKC = to_nfkc,
+  node = {
+    NFC = nodes_to_nfc,
+  },
+  direct = {
+    NFC = function(head, ...) return todirect(nodes_to_nfc(tonode(head), ...)) end,
+  },
 }
 -- print(require'inspect'{to_nfd{0x1E0A}, to_nfc{0x1E0A}})
 



More information about the tex-live-commits mailing list.