[latex3-commits] [git/LaTeX3-latex3-luaotfload] dev: New discretionary handling approach (441331b)
Marcel Fabian Krüger
tex at 2krueger.de
Fri Oct 30 01:11:42 CET 2020
Repository : https://github.com/latex3/luaotfload
On branch : dev
Link : https://github.com/latex3/luaotfload/commit/441331b4810a1ced1cce4d108e3c39d9be8105b1
>---------------------------------------------------------------
commit 441331b4810a1ced1cce4d108e3c39d9be8105b1
Author: Marcel Fabian Krüger <tex at 2krueger.de>
Date: Wed Oct 28 12:22:13 2020 +0100
New discretionary handling approach
>---------------------------------------------------------------
441331b4810a1ced1cce4d108e3c39d9be8105b1
src/luaotfload-harf-plug.lua | 394 +++++++++++++++++++++++--------------------
1 file changed, 212 insertions(+), 182 deletions(-)
diff --git a/src/luaotfload-harf-plug.lua b/src/luaotfload-harf-plug.lua
index e298be0..05dca48 100644
--- a/src/luaotfload-harf-plug.lua
+++ b/src/luaotfload-harf-plug.lua
@@ -43,6 +43,7 @@ local removenode = direct.remove
local copynodelist = direct.copy_list
local ischar = direct.is_char
local uses_font = direct.uses_font
+local length = direct.length
local getattrs = direct.getattributelist
local setattrs = direct.setattributelist
@@ -78,11 +79,11 @@ local setwidth = direct.setwidth
local setlist = direct.setlist
local is_char = direct.is_char
local tail = direct.tail
+local getboth = direct.getboth
+local setlink = direct.setlink
local properties = direct.get_properties_table()
-local imgnode = img.node
-
local hlist_t = node.id("hlist")
local disc_t = node.id("disc")
local glue_t = node.id("glue")
@@ -179,8 +180,12 @@ local function itemize(head, fontid, direction)
local currdir = direction or 0
local lastskip, lastdir = true
local lastrun = {}
+ local lastdisc
+ local in_disc
for n, id, subtype in traverse(head) do
+ if in_disc == n then in_disc = nil end
+ local disc
local code = 0xFFFC -- OBJECT REPLACEMENT CHARACTER
local skip = lastskip
local props = properties[n]
@@ -200,8 +205,27 @@ local function itemize(head, fontid, direction)
code = 0x0020 -- SPACE
elseif id == disc_t then
if uses_font(n, fontid) then
- code = 0x00AD -- SOFT HYPHEN
+ local _, _, rep, _, _, rep_tail = getdisc(n, true)
+ setfield(n, 'replace', nil)
+ local prev, next = getboth(n)
+ in_disc = next
+ disc = {
+ disc = n,
+ anchor_cluster = #codes - (lastrun.start or 0),
+ after_cluster = #codes - (lastrun.start or 0) + length(rep),
+ }
+ if rep then
+ setlink(prev, rep)
+ setlink(rep_tail, next)
+ setnext(n, rep) -- This one is just to keep the loop going
+ else
+ setlink(prev, next)
+ end
+ code = nil
skip = false
+ if not prev then
+ head = n
+ end
else
skip = true
end
@@ -221,26 +245,39 @@ local function itemize(head, fontid, direction)
currdir = getdirection(n)
end
- codes[#codes + 1] = code
+ local ncodes = #codes -- Necessary to count discs correctly
+ codes[ncodes + 1] = code
if lastdir ~= currdir or lastskip ~= skip then
+ if disc then
+ disc.after_cluster = disc.after_cluster - disc.anchor_cluster
+ disc.anchor_cluster = 0
+ end
lastrun.after = n
lastrun = {
- start = #codes,
- len = 1,
+ start = ncodes + 1,
+ len = code and 1 or 0,
font = fontid,
dir = currdir == 1 and dir_rtl or dir_ltr,
skip = skip,
codes = codes,
+ discs = disc,
}
runs[#runs + 1] = lastrun
- lastdir, lastskip = currdir, skip
- else
+ lastdir, lastskip, lastdisc = currdir, skip, disc
+ elseif code then
lastrun.len = lastrun.len + 1
+ elseif disc then
+ if lastdisc then
+ lastdisc.next = disc
+ lastdisc = disc
+ else
+ lastrun.discs, lastdisc = disc, disc
+ end
end
end
- return runs
+ return head, runs
end
@@ -261,7 +298,6 @@ local function makesub(run, codes, nodelist)
font = run.font,
dir = run.dir,
fordisc = true,
- node = nodelist,
codes = codes,
}
local glyphs
@@ -296,13 +332,12 @@ function shape(head, firstnode, run)
local node = firstnode
local codes = run.codes
local offset = run.start
- local nodeindex = offset
run.start = offset
local len = run.len
local fontid = run.font
local dir = run.dir
- local fordisc = run.fordisc
- local cluster = offset - 2
+ local cluster
+ local discs = (not run.fordisc) and run.discs
local fontdata = font.getfont(fontid)
local hbdata = fontdata.hb
@@ -372,182 +407,177 @@ function shape(head, firstnode, run)
local glyphs = buf:get_glyphs()
+ local break_glyph, break_cluster, break_node = 1, offset-1, node
+ local disc_glyph, disc_cluster, disc_node
+ local disc_cluster
+ -- local disc2_node, disc2_index -- TODO: Hopefully later
local i = 0
- while i < #glyphs do
- i = i + 1
- local glyph = glyphs[i]
-
- -- Calculate the Unicode code points of this glyph. If cluster did not
- -- change then this is a glyph inside a complex cluster and will be
- -- handled with the start of its cluster.
- if cluster ~= glyph.cluster then
- cluster = glyph.cluster
- for i = nodeindex, cluster do node = getnext(node) end
- nodeindex = cluster + 1
- local nextcluster
- for j = i+1, #glyphs do
- nextcluster = glyphs[j].cluster
- if cluster ~= nextcluster then
- glyph.nglyphs = j - i
- goto NEXTCLUSTERFOUND -- break
- end
- end -- else -- only executed if the loop reached the end without
- -- finding another cluster
- nextcluster = offset + len - 1
- glyph.nglyphs = #glyphs + 1 - i
- ::NEXTCLUSTERFOUND:: -- end
- glyph.nextcluster = nextcluster
- local disc, discindex
- do
- local hex = ""
- local str = ""
- local node = node
- for j = cluster+1,nextcluster do
- local char, id = is_char(node, fontid)
- if char then
- -- assert(char == codes[j])
- hex = hex .. to_utf16_hex(char)
- str = str .. utf8.char(char)
- elseif not discindex and id == disc_t then
- local props = properties[disc]
- if not (props and props.zwnj) then
- disc, discindex = node, j
- end
- end
+ local glyph
+ -- The following is a repeat {...} while glyph {...} loop.
+ while true do
+ repeat
+ i = i+1
+ glyph = glyphs[i]
+ until not glyph or glyph.cluster ~= cluster
+ do
+ local oldcluster = cluster
+ cluster = glyph and glyph.cluster or offset + len - 1
+ if oldcluster then
+ for _ = oldcluster+1, cluster do
node = getnext(node)
end
- glyph.tounicode = hex
- glyph.string = str
end
- if not fordisc and discindex then
- -- Discretionary found.
- local startindex, stopindex = nil, nil
- local startglyph, stopglyph = nil, nil
-
- -- Find the previous glyph that is safe to break at.
- local startglyph = i
- while startglyph > 1
- and codes[glyphs[startglyph - 1].cluster + 1] ~= 0x20
- and codes[glyphs[startglyph - 1].cluster + 1] ~= 0xFFFC
- and (unsafetobreak(glyphs[startglyph])
- or glyphs[startglyph].cluster == glyphs[startglyph-1].cluster) do
- startglyph = startglyph - 1
- end
- -- Get the corresponding character index.
- startindex = glyphs[startglyph].cluster + 1
-
- -- Find the next glyph that is safe to break at.
- stopglyph = i + 1
- local lastcluster = glyphs[i].cluster
- while stopglyph <= #glyphs
- and codes[glyphs[stopglyph].cluster + 1] ~= 0x20
- and codes[glyphs[stopglyph].cluster + 1] ~= 0xFFFC
- and (unsafetobreak(glyphs[stopglyph])
- or lastcluster == glyphs[stopglyph].cluster) do
- lastcluster = glyphs[stopglyph].cluster
- stopglyph = stopglyph + 1
- end
+ end
- stopindex = stopglyph <= #glyphs and glyphs[stopglyph].cluster + 1
- or offset + len
+ -- Is this a safe breakpoint?
+ if discs and ((not glyph) or codes[cluster+1] == 0x20 or codes[cluster+1] == 0xFFFC
+ or not unsafetobreak(glyph)) then
+ -- Should we change the discretionary state?
+ local anchor_cluster, after_cluster = offset + discs.anchor_cluster, offset + discs.after_cluster
+ while disc_cluster and after_cluster <= cluster
+ or not disc_cluster and anchor_cluster <= cluster do
+ if disc_cluster then
+
+ local rep_glyphs = table.move(glyphs, disc_glyph, i - 1, 1, {})
+ for j = 1, #rep_glyphs do
+ local glyph = rep_glyphs[j]
+ glyph.cluster = glyph.cluster - disc_cluster
+ glyph.nextcluster = glyph.nextcluster - disc_cluster
+ end
+ do
+ local cluster_offset = 1 + disc_cluster - cluster -- The offset the glyph indices will move
+ for j = i, #glyphs do
+ local glyph = glyphs[j]
+ glyph.cluster = glyph.cluster + cluster_offset
+ end
+ len = len + cluster_offset
+ table.move(glyphs, i, #glyphs + i - disc_glyph, disc_glyph + 1)
+
+ local discs = discs.next
+ while discs do
+ discs.anchor_cluster = discs.anchor_cluster + cluster_offset
+ discs.after_cluster = discs.after_cluster + cluster_offset
+ discs = discs.next
+ end
+ end
- local startnode, stopnode = node, node
- for j=nodeindex - 1, startindex, -1 do
- startnode = getprev(startnode)
- end
- for j=nodeindex + 1, stopindex do
- stopnode = getnext(stopnode)
- end
+ -- TODO: Remove nested discretionaries from discs
- glyphs[startglyph] = glyph
- glyph.cluster = startindex - 1
- glyph.nextcluster = startindex
- for j = stopglyph, #glyphs do
- local glyph = glyphs[j]
- glyph.cluster = glyph.cluster - (stopindex - startindex) + 1
- end
- len = len - (stopindex - startindex) + 1
- table.move(glyphs, stopglyph, #glyphs + stopglyph - startglyph - 1, startglyph + 1)
-
- local subcodes, subindex = {}
- do
- local node = startnode
- while node ~= stopnode do
- if node == disc then
- subindex = #subcodes
- startindex = startindex + 1
- node = getnext(node)
- elseif getid(node) == disc_t then
- local oldnode = node
- startnode, node = removenode(startnode, node)
- freenode(oldnode)
- tableremove(codes, startindex)
+ local pre, post, _, _, lastpost, _ = getdisc(discs.disc, true)
+ local precodes, postcodes = {}, {}
+ table.move(codes, disc_cluster + 1, anchor_cluster, 1, precodes)
+ for n in traverse(pre) do
+ precodes[#precodes + 1] = is_char(n, fontid) or 0xFFFC
+ end
+ for n in traverse(post) do
+ postcodes[#postcodes + 1] = is_char(n, fontid) or 0xFFFC
+ end
+ table.move(codes, after_cluster + 1, cluster, #postcodes + 1, postcodes)
+ table.move(codes, cluster + 1, #codes + cluster - disc_cluster - 1, disc_cluster + 2)
+ codes[disc_cluster + 1] = 0xFFFC
+
+ do
+ local iter = disc_node
+ for _ = disc_cluster, anchor_cluster-1 do iter = getnext(iter) end
+ if iter ~= disc_node then
+ local newpre = copynodelist(disc_node, iter)
+ setlink(tail(newpre), pre)
+ pre = newpre
+ end
+ for _ = anchor_cluster, after_cluster-1 do iter = getnext(iter) end
+ if post then
+ setlink(lastpost, copynodelist(iter, node))
else
- subcodes[#subcodes + 1] = tableremove(codes, startindex)
- node = getnext(node)
+ post = copynodelist(iter, node)
end
end
- end
-
- local pre, post, rep, lastpre, lastpost, lastrep = getdisc(disc, true)
- local precodes, postcodes, repcodes = {}, {}, {}
- table.move(subcodes, 1, subindex, 1, repcodes)
- for n, id, subtype in traverse(rep) do
- repcodes[#repcodes + 1] = getfont(n) == fontid and getchar(n) or 0xFFFC
- end
- table.move(subcodes, subindex + 1, #subcodes, #repcodes + 1, repcodes)
- table.move(subcodes, 1, subindex, 1, precodes)
- for n, id, subtype in traverse(pre) do
- precodes[#precodes + 1] = getfont(n) == fontid and getchar(n) or 0xFFFC
- end
- for n, id, subtype in traverse(post) do
- postcodes[#postcodes + 1] = getfont(n) == fontid and getchar(n) or 0xFFFC
- end
- table.move(subcodes, subindex + 1, #subcodes, #postcodes + 1, postcodes)
- if startnode ~= disc then
- local newpre = copynodelist(startnode, disc)
- setnext(tail(newpre), pre)
- pre = newpre
- end
- if post then
- setnext(lastpost, copynodelist(getnext(disc), stopnode))
- else
- post = copynodelist(getnext(disc), stopnode)
- end
- if startnode ~= disc then
- local predisc = getprev(disc)
- setnext(predisc, rep)
- setprev(rep, predisc)
- if firstnode == startnode then
- firstnode = disc
+ local prev = getprev(disc_node)
+ if disc_cluster ~= cluster then
+ setprev(disc_node, nil)
+ setnext(getprev(node), nil)
end
- if startnode == head then
- head = disc
- else
- local before = getprev(startnode)
- setnext(before, disc)
- setprev(disc, before)
+ setlink(prev, discs.disc, node)
+ if disc_node == firstnode then
+ firstnode = discs.disc
+ if head == disc_node then
+ head = firstnode
+ end
end
- setprev(startnode, nil)
- rep = startnode
- lastrep = lastrep or predisc
+ glyphs[disc_glyph] = {
+ replace = {
+ glyphs = rep_glyphs,
+ head = disc_node ~= node and disc_node or nil,
+ run = {
+ start = 1,
+ font = run.font,
+ dir = run.dir,
+ },
+ },
+ pre = makesub(run, precodes, pre),
+ post = makesub(run, postcodes, post),
+ cluster = disc_cluster,
+ nextcluster = disc_cluster + 1,
+ codepoint = 0xFFFC,
+ }
+ i = disc_glyph
+ node = discs.disc
+ cluster = disc_cluster
+
+ disc_cluster = nil
+ discs = discs.next
+ if not discs then break end
+ anchor_cluster, after_cluster = offset + discs.anchor_cluster, offset + discs.after_cluster
+ elseif anchor_cluster == cluster then
+ disc_glyph, disc_cluster, disc_node = i, cluster, node
+ else
+ disc_glyph, disc_cluster, disc_node = break_glyph, break_cluster, break_node
end
- if getnext(disc) ~= stopnode then
- setnext(getprev(stopnode), nil)
- setprev(stopnode, disc)
- setprev(getnext(disc), lastrep)
- setnext(lastrep, getnext(disc))
- rep = rep or getnext(disc)
- setnext(disc, stopnode)
+ end
+ break_glyph, break_cluster, break_node = i, cluster, node
+ end
+
+ if not glyph then break end
+
+ local nextcluster
+ for j = i+1, #glyphs do
+ nextcluster = glyphs[j].cluster
+ if cluster ~= nextcluster then
+ glyph.nglyphs = j - i
+ goto NEXTCLUSTERFOUND -- break
+ end
+ end -- else -- only executed if the loop reached the end without
+ -- finding another cluster
+ nextcluster = offset + len - 1
+ glyph.nglyphs = #glyphs + 1 - i
+ ::NEXTCLUSTERFOUND:: -- end
+ glyph.nextcluster = nextcluster
+
+ local disc, discindex
+ -- Calculate the Unicode code points of this glyph. If cluster did not
+ -- change then this is a glyph inside a complex cluster and will be
+ -- handled with the start of its cluster.
+ do
+ local hex = ""
+ local str = ""
+ local node = node
+ for j = cluster+1,nextcluster do
+ local char, id = is_char(node, fontid)
+ if char then
+ -- assert(char == codes[j])
+ hex = hex .. to_utf16_hex(char)
+ str = str .. utf8.char(char)
+ elseif not discindex and id == disc_t then
+ local props = properties[disc]
+ if not (props and props.zwnj) then
+ disc, discindex = node, j
+ end
end
- glyph.replace = makesub(run, repcodes, rep)
- glyph.pre = makesub(run, precodes, pre)
- glyph.post = makesub(run, postcodes, post)
- i = startglyph
- node = disc
- cluster = glyph.cluster
- nodeindex = cluster + 1
+ node = getnext(node)
end
+ glyph.tounicode = hex
+ glyph.string = str
+ end
+ if not fordisc and discindex then
end
end
return head, firstnode, glyphs, run.len - len
@@ -583,7 +613,7 @@ local function color_to_rgba(color)
end
end
--- Cache of color glyph PNG data for bookkeeping, only because I couldn’t
+-- Cache of color glyph PNG data for bookkeeping, only because I couldn't
-- figure how to make the engine load the image from the binary data directly.
local pngcache = {}
local pngcachefiles = {}
@@ -708,7 +738,7 @@ local function tonodes(head, node, run, glyphs)
character.depth = layerchar.depth
end
-- color_index has a special value, 0x10000, that mean use text
- -- color, we don’t check for it here explicitly since we will
+ -- color, we don't check for it here explicitly since we will
-- get nil anyway.
local color = palette[layer.color_index]
if not color or color.alpha ~= 0 then
@@ -778,7 +808,7 @@ local function tonodes(head, node, run, glyphs)
-- Color bitmap font with no glyph outlines (like Noto
-- Color Emoji) but has no bitmap for current glyph (most likely
-- `.notdef` glyph). The engine does not know how to embed such
- -- fonts, so we don’t want them to reach the backend as it will cause
+ -- fonts, so we don't want them to reach the backend as it will cause
-- a fatal error. We use `nullfont` instead. That is a hack, but I
-- think it is good enough for now. We could make the glyph virtual
-- with empty commands suh that LuaTeX ignores it, but we still want
@@ -815,7 +845,7 @@ local function tonodes(head, node, run, glyphs)
-- overfull messages, otherwise it will be trying to print our
-- invalid pseudo Unicode code points.
-- If the string is empty it means this glyph is part of a larger
- -- cluster and we don’t to print anything for it as the first glyph
+ -- cluster and we don't to print anything for it as the first glyph
-- in the cluster will have the string of the whole cluster.
local props = properties[node]
if not props then
@@ -828,7 +858,7 @@ local function tonodes(head, node, run, glyphs)
-- * Find how many characters in this cluster and how many glyphs,
-- * If there is more than 0 characters
-- * One glyph: one to one or one to many mapping, can be
- -- represented by font’s /ToUnicode
+ -- represented by font's /ToUnicode
-- * More than one: many to one or many to many mapping, can be
-- represented by /ActualText spans.
-- * If there are zero characters, then this glyph is part of complex
@@ -874,7 +904,7 @@ local function tonodes(head, node, run, glyphs)
end
elseif id == kern_t and getsubtype(node) == italiccorr_t then
-- If this is an italic correction node and the previous node is a
- -- glyph, update its kern value with the glyph’s italic correction.
+ -- glyph, update its kern value with the glyph's italic correction.
local prevchar, prevfontid = ischar(getprev(node))
if prevfontid == fontid and prevchar and prevchar > 0 then
local italic = characters[prevchar].italic
@@ -910,8 +940,8 @@ local function shape_run(head, current, run)
end
function process(head, font, _attr, direction)
- local newhead, current = head, head
- local runs = itemize(head, font, direction)
+ local newhead, runs = itemize(head, font, direction)
+ local current = newhead
local offset = 0
for i = 1,#runs do
More information about the latex3-commits
mailing list.