texlive[59846] Master/texmf-dist: lua-uni-algos (5jul21)
commits+karl at tug.org
commits+karl at tug.org
Mon Jul 5 22:55:06 CEST 2021
Revision: 59846
http://tug.org/svn/texlive?view=revision&revision=59846
Author: karl
Date: 2021-07-05 22:55:05 +0200 (Mon, 05 Jul 2021)
Log Message:
-----------
lua-uni-algos (5jul21)
Modified Paths:
--------------
trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/README.md
trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/lua-uni-algos.pdf
trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/lua-uni-algos.tex
trunk/Master/texmf-dist/tex/luatex/lua-uni-algos/lua-uni-normalize.lua
Modified: trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/README.md
===================================================================
--- trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/README.md 2021-07-05 20:54:50 UTC (rev 59845)
+++ trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/README.md 2021-07-05 20:55:05 UTC (rev 59846)
@@ -1,8 +1,8 @@
# The lua-uni-algos Package
-Version: v0.2
+Version: v0.3
-Date: 2020-05-17
+Date: 2021-07-05
Author: Marcel Krüger
Modified: trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/lua-uni-algos.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/lua-uni-algos.tex
===================================================================
--- trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/lua-uni-algos.tex 2021-07-05 20:54:50 UTC (rev 59845)
+++ trunk/Master/texmf-dist/doc/luatex/lua-uni-algos/lua-uni-algos.tex 2021-07-05 20:55:05 UTC (rev 59846)
@@ -2,7 +2,7 @@
\usepackage{doc, shortvrb, metalogo, hyperref, fontspec}
% \setmainfont{Noto Serif}
% \setmonofont{FreeMono}
-\title{Unicode algorithms for Lua\TeX}
+\title{Unicode algorithms for Lua\TeX\thanks{This document corresponds to \pkg{lua-uni-algos} v0.3.}}
\author{Marcel Krüger\thanks{E-Mail: \href{mailto:tex at 2krueger.de}{\nolinkurl{tex at 2krueger.de}}}}
\MakeShortVerb\|
\newcommand\pkg{\texttt}
@@ -51,6 +51,12 @@
(This example is shown in Latin Modern Mono which has the (for this purpose) very useful property of not handling combining character very well.
In a well-behaving font, the `...C` and `...D` lines should look the same.)
+Additionally for NFC direct normalization of Lua\TeX\ node lists is supported.
+There are two functions |normalize.node.NFC| and |normalize.direct.NFC| taking upto four parameters: The first parameter is the head of the node list to be converted.
+The second parameter is the font id of the affected character nodes. Only non-protected glyph nodes of the specified font will be normalized. Pass |nil| for the font
+to normalize without respecting the font in the process. The third parameter is an optional table. If it is not |nil|, normalization is supressed if it might add glyph
+which map to |false| (or |nil|) in this table. If the forth argument is |true|, normalization will never join two glyph nodes with different attributes.
+
\section{Case folding}
For case folding load the Lua module |lua-uni-case|.
You can either load it directly with
Modified: trunk/Master/texmf-dist/tex/luatex/lua-uni-algos/lua-uni-normalize.lua
===================================================================
--- trunk/Master/texmf-dist/tex/luatex/lua-uni-algos/lua-uni-normalize.lua 2021-07-05 20:54:50 UTC (rev 59845)
+++ trunk/Master/texmf-dist/tex/luatex/lua-uni-algos/lua-uni-normalize.lua 2021-07-05 20:55:05 UTC (rev 59846)
@@ -169,6 +169,7 @@
codepoints[new_pos] = first
return ccc_reorder(codepoints, i, j, k == i and i or k-1)
end
+
local result_table = {}
local function get_string()
local result_table = result_table
@@ -269,11 +270,198 @@
return to_nfc_generic(s, compatibility_mapping)
end
+-- allowed_characters only works reliably if it's closed under canonical decomposition mappings
+-- but it should fail in reasonable ways as long as it's at least closed under full canonical decompositions
+local function nodes_to_nfc(head, f, allowed_characters, preserve_attr)
+ if not head then return head end
+ local tmp_node = node.new'temp'
+ -- This is more complicated since we want to ensure that nodes (including their attributes and properties) are preserved whenever possible
+ --
+ -- We use three passes:
+ -- 1. Decompose composition exclusions etc.
+ local n = head
+ while n do
+ local char = node.is_char(n, f)
+ if char then
+ local decomposed = decomposition_mapping[char]
+ if decomposed then
+ local compose_lookup = composition_mapping[decomposed[1]]
+ if not (compose_lookup and compose_lookup[decomposed[2]]) then
+ local available = true
+ if allowed_characters then
+ -- This is probably buggy for werd fonts
+ for i=1, #decomposed do
+ if not allowed_characters[decomposed[i]] then
+ available = false
+ break
+ end
+ end
+ end
+ if available then
+ -- Here we never want to compose again, so we can decompose directly
+ n.char = decomposed[1]
+ for i=2, #decomposed do
+ local nn = node.copy(n)
+ nn.char = decomposed[i]
+ node.insert_after(head, n, nn)
+ n = nn
+ end
+ end
+ end
+ end
+ end
+ n = n.next
+ end
+ -- 2. Reorder marks
+ local last_ccc
+ n = head
+ local prev = head.prev
+ tmp_node.next, head.prev = head, tmp_node
+ while n do
+ local char = node.is_char(n, f)
+ if char then
+ local this_ccc = ccc[char]
+ if last_ccc and this_ccc and last_ccc > this_ccc then
+ local nn = n
+ while nn ~= tmp_node do
+ nn = nn.prev
+ local nn_char = node.is_char(nn, f)
+ if not nn_char then break end
+ local nn_ccc = ccc[nn_char]
+ if not nn_ccc or nn_ccc <= this_ccc then break end
+ end
+ local before, after = n.prev, n.next
+ node.insert_after(head, nn, n)
+ before.next = after
+ if after then after.prev = before end
+ n = after
+ else
+ n = n.next
+ last_ccc = this_ccc
+ end
+ else
+ n = n.next
+ last_ccc = nil
+ end
+ end
+ head, head.prev = tmp_node.next, prev
+ -- 3. The rest: Maybe decompose and then compose again
+ local starter_n, starter, lookup
+ local starter_decomposition
+ local last_ccc
+ local i -- index into starter_decomposition
+ local i_ccc
+ n = head
+ node.insert_after(head, nil, tmp_node)
+ repeat
+ local char = node.is_char(n, f)
+ local this_ccc = ccc[char] or 300
+ while i and i_ccc <= this_ccc do
+ local new_starter = lookup and lookup[starter_decomposition[i]]
+ if new_starter and (not allowed_characters or allowed_characters[new_starter]) then
+ starter = new_starter
+ starter_n.char = starter
+ lookup = composition_mapping[starter]
+ else
+ local nn = node.copy(starter_n)
+ nn.char = starter_decomposition[i]
+ node.insert_before(head, n, nn)
+ last_ccc = i_ccc
+ end
+ i = i + 1
+ local i_char = starter_decomposition[i]
+ if i_char then
+ i_ccc = ccc[starter_decomposition[i]] or 300
+ else
+ i = nil
+ end
+ end
+ if char then
+ if lookup and (this_ccc == 300) == (this_ccc == last_ccc) then
+ local new_starter = lookup[char]
+ if new_starter and (not allowed_characters or allowed_characters[new_starter]) and (not preserve_attr or starter_n.attr == n.attr) then
+ local last = n.prev
+ node.remove(head, n)
+ node.free(n)
+ n = last
+ starter = new_starter
+ starter_n.char, char = starter, starter
+ lookup = composition_mapping[starter]
+ else
+ last_ccc = this_ccc
+ end
+ -- Now handle Hangul syllables. We never decompose them since we would just recompose them anyway and they are starters
+ elseif not lookup and this_ccc == 300 and last_ccc == 300 then
+ if starter >= 0x1100 and starter <= 0x1112 and char >= 0x1161 and char <= 0x1175 then -- L + V -> LV
+ local new_starter = ((starter - 0x1100) * 21 + char - 0x1161) * 28 + 0xAC00
+ if (not allowed_characters or allowed_characters[new_starter]) and (not preserve_attr or starter_n.attr == n.attr) then
+ node.remove(head, n)
+ node.free(n)
+ starter = starter
+ starter_n.char, char = starter, starter
+ lookup = composition_mapping[starter]
+ n = starter_n
+ end
+ elseif char >= 0x11A8 and char <= 0x11C2 and starter >= 0xAC00 and starter <= 0xD7A3 and (starter-0xAC00) % 28 == 0 then -- LV + T -> LVT
+ local new_starter = starter + char - 0x11A7
+ if (not allowed_characters or allowed_characters[new_starter]) and (not preserve_attr or starter_n.attr == n.attr) then
+ node.remove(head, n)
+ node.free(n)
+ starter = new_starter
+ starter_n.char, char = starter, starter
+ lookup = composition_mapping[starter]
+ n = starter_n
+ end
+ end
+ else
+ last_ccc = this_ccc
+ end
+ if this_ccc == 300 then
+ starter_n = n
+ starter_decomposition = decomposition_mapping[char]
+ if allowed_characters and starter_decomposition then
+ for i=1, #starter_decomposition do
+ if not allowed_characters[starter_decomposition[i]] then
+ starter_decomposition = nil
+ break
+ end
+ end
+ end
+ starter = starter_decomposition and starter_decomposition[1] or char
+ starter_n.char = starter
+ lookup = composition_mapping[starter]
+ if starter_decomposition then
+ i, i_ccc = 2, ccc[starter_decomposition[2]] or 300
+ else
+ i, i_ccc = nil
+ end
+ end
+ else
+ starter, lookup, last_ccc, last_decomposition, i, i_ccc = nil
+ end
+ if n == tmp_node then
+ node.remove(head, tmp_node)
+ break
+ end
+ n = n.next
+ until false
+ node.free(tmp_node)
+ return head
+end
+
+local todirect, tonode = node.direct.todirect, node.direct.tonode
+
return {
NFD = to_nfd,
NFC = to_nfc,
NFKD = to_nfkd,
NFKC = to_nfkc,
+ node = {
+ NFC = nodes_to_nfc,
+ },
+ direct = {
+ NFC = function(head, ...) return todirect(nodes_to_nfc(tonode(head), ...)) end,
+ },
}
-- print(require'inspect'{to_nfd{0x1E0A}, to_nfc{0x1E0A}})
More information about the tex-live-commits
mailing list.