[latex3-commits] [git/LaTeX3-latex3-luaotfload] ignorable: Drop `Default_Ignorable_Code_Point`s (6e8ab66)
Marcel Fabian Krüger
tex at 2krueger.de
Sat Aug 17 14:07:06 CEST 2019
Repository : https://github.com/latex3/luaotfload
On branch : ignorable
Link : https://github.com/latex3/luaotfload/commit/6e8ab66598012e159253a2e2b422ce7ec6c2db1f
>---------------------------------------------------------------
commit 6e8ab66598012e159253a2e2b422ce7ec6c2db1f
Author: Marcel Fabian Krüger <tex at 2krueger.de>
Date: Sat Aug 17 13:55:52 2019 +0200
Drop `Default_Ignorable_Code_Point`s
>---------------------------------------------------------------
6e8ab66598012e159253a2e2b422ce7ec6c2db1f
src/luaotfload-notdef.lua | 97 ++++++++++++++++++++--
.../tex/latex-dev/luaotfload/luaotfload-notdef.lua | 97 ++++++++++++++++++++--
2 files changed, 176 insertions(+), 18 deletions(-)
diff --git a/src/luaotfload-notdef.lua b/src/luaotfload-notdef.lua
index dd24eb1..e8356bf 100644
--- a/src/luaotfload-notdef.lua
+++ b/src/luaotfload-notdef.lua
@@ -16,14 +16,57 @@ if luatexbase and luatexbase.provides_module then
luatexbase.provides_module (ProvidesLuaModule)
end
-local nodenew = node.direct.new
+local flush_node = node.direct.flush_node
local getfont = font.getfont
-local setfont = node.direct.setfont
+local getnext = node.direct.getnext
local getwhd = node.direct.getwhd
+local insert = table.insert
local insert_after = node.direct.insert_after
-local traverse_char = node.direct.traverse_char
+local kern_id = node.id'kern'
+local nodenew = node.direct.new
+local otfregister = fonts.constructors.features.otf.register
local protect_glyph = node.direct.protect_glyph
-local otffeatures = fonts.constructors.newfeatures "otf"
+local remove = node.direct.remove
+local setfont = node.direct.setfont
+local traverse_char = node.direct.traverse_char
+
+local ignorable_codepoints do
+ local sep = lpeg.P' '^0 * ';' * lpeg.P' '^0
+ local codepoint = lpeg.S'0123456789ABCDEF'^4/function(c)return tonumber(c, 16)end
+ local codepoint_range = codepoint * ('..' * codepoint + lpeg.Cc(false))
+ local function multirawset(table, key1, key2, value)
+ for key = key1,(key2 or key1) do
+ rawset(table, key, value)
+ end
+ return table
+ end
+ local entry = lpeg.Cg(codepoint * ';' * (1-lpeg.P';')^0 * ';Cf;' * lpeg.Cc(true))^-1 * (1-lpeg.P'\n')^0 * '\n'
+ local file = lpeg.Cf(
+ lpeg.Ct''
+ * entry^0
+ , rawset)
+ local f = io.open(kpse.find_file"UnicodeData.txt")
+ ignorable_codepoints = file:match(f:read'*a')
+ f:close()
+ entry = lpeg.Cg(codepoint_range * sep * ('Other_Default_Ignorable_Code_Point' * lpeg.Cc(true)
+ + 'Variation_Selector' * lpeg.Cc(true)
+ + 'White_Space' * lpeg.Cc(nil)
+ + 'Prepended_Concatenation_Mark' * lpeg.Cc(nil)
+ ) * lpeg.P' '^0 * '#')^-1 * (1-lpeg.P'\n')^0 * '\n'
+ file = lpeg.Cf(
+ lpeg.Carg(1)
+ * entry^0
+ , multirawset)
+ f = io.open(kpse.find_file"PropList.txt")
+ ignorable_codepoints = file:match(f:read'*a', 1, ignorable_codepoints)
+ f:close()
+ for i = 0xFFF9,0xFFFB do
+ ignorable_codepoints[i] = nil
+ end
+ for i = 0x13430,0x13438 do
+ ignorable_codepoints[i] = nil
+ end
+end
local function setnotdef(tfmdata, factor)
local desc = tfmdata.shared.rawdata.descriptions
@@ -35,7 +78,6 @@ local function setnotdef(tfmdata, factor)
tfmdata.notdefcode = 0xF0000
return
end
- print')'
-- If this didn't happen, it might be mapped to one of the
-- replacement characters:
for code = 0xFFFC,0xFFFF do
@@ -45,7 +87,6 @@ local function setnotdef(tfmdata, factor)
return
end
end
- print')))'
-- Oh no, we couldn't find it. Maybe we can find it by name?
local code = tfmdata.resources.unicodes[".notdef"]
-- Better safe than sorry
@@ -54,7 +95,6 @@ local function setnotdef(tfmdata, factor)
tfmdata.notdefcode = code
return
end
- print'))))'
-- So the font didn't do the obvious things and then it lied to us.
-- At this point we should think about sending an automated complain
-- to the font author, but we probably can't trust the contact
@@ -81,7 +121,7 @@ local function donotdef(head, font, _, _, _)
if not notdef then return end
for cur, cid, fid in traverse_char(head) do if fid == font then
local w, h, d = getwhd(cur)
- if w == 0 and h == 0 and d == 0 and not chars[cid] then
+ if w == 0 and h == 0 and d == 0 and not chars[cid] and not ignorable_codepoints[cid] then
local notdefnode = nodenew(glyph_id, 256)
setfont(notdefnode, font, notdef)
insert_after(cur, cur, notdefnode)
@@ -90,7 +130,7 @@ local function donotdef(head, font, _, _, _)
end end
end
-otffeatures.register {
+otfregister {
name = "notdef",
description = "Add notdef glyphs",
default = 1,
@@ -102,4 +142,43 @@ otffeatures.register {
}
}
+function fonts.handlers.otf.handlers.gsub_remove(head,char,dataset,sequence,replacement)
+ local next
+ head, next = remove(head, char)
+ flush_node(char)
+ if not head and not next then -- Avoid a double free if we were alone
+ head = nodenew(kern_id)
+ end
+ return head, next, true, true
+end
+
+local sequence = {
+ features = {invisible = {["*"] = {["*"] = true}}},
+ flags = {false, false, false, false},
+ name = "invisible",
+ order = {"invisible"},
+ nofsteps = 1,
+ steps = {{
+ coverage = ignorable_codepoints,
+ index = 1,
+ }},
+ type = "gsub_remove",
+}
+local function invisibleinitialiser(tfmdata, value)
+ local resources = tfmdata.resources
+ local sequences = resources and resources.sequences
+ if sequences then
+ -- Now we get to the interesting part: At which point should our new sequence be inserted? Let's do it at the end, then they are still seen by all features.
+ insert(sequences, sequence)
+ end
+end
+otfregister {
+ name = 'invisible',
+ description = 'Remove invisible control characters',
+ default = false,
+ initializers = {
+ node = invisibleinitialiser,
+ },
+}
+
--- vim:sw=2:ts=2:expandtab:tw=71
diff --git a/texmf/tex/latex-dev/luaotfload/luaotfload-notdef.lua b/texmf/tex/latex-dev/luaotfload/luaotfload-notdef.lua
index dd24eb1..e8356bf 100644
--- a/texmf/tex/latex-dev/luaotfload/luaotfload-notdef.lua
+++ b/texmf/tex/latex-dev/luaotfload/luaotfload-notdef.lua
@@ -16,14 +16,57 @@ if luatexbase and luatexbase.provides_module then
luatexbase.provides_module (ProvidesLuaModule)
end
-local nodenew = node.direct.new
+local flush_node = node.direct.flush_node
local getfont = font.getfont
-local setfont = node.direct.setfont
+local getnext = node.direct.getnext
local getwhd = node.direct.getwhd
+local insert = table.insert
local insert_after = node.direct.insert_after
-local traverse_char = node.direct.traverse_char
+local kern_id = node.id'kern'
+local nodenew = node.direct.new
+local otfregister = fonts.constructors.features.otf.register
local protect_glyph = node.direct.protect_glyph
-local otffeatures = fonts.constructors.newfeatures "otf"
+local remove = node.direct.remove
+local setfont = node.direct.setfont
+local traverse_char = node.direct.traverse_char
+
+local ignorable_codepoints do
+ local sep = lpeg.P' '^0 * ';' * lpeg.P' '^0
+ local codepoint = lpeg.S'0123456789ABCDEF'^4/function(c)return tonumber(c, 16)end
+ local codepoint_range = codepoint * ('..' * codepoint + lpeg.Cc(false))
+ local function multirawset(table, key1, key2, value)
+ for key = key1,(key2 or key1) do
+ rawset(table, key, value)
+ end
+ return table
+ end
+ local entry = lpeg.Cg(codepoint * ';' * (1-lpeg.P';')^0 * ';Cf;' * lpeg.Cc(true))^-1 * (1-lpeg.P'\n')^0 * '\n'
+ local file = lpeg.Cf(
+ lpeg.Ct''
+ * entry^0
+ , rawset)
+ local f = io.open(kpse.find_file"UnicodeData.txt")
+ ignorable_codepoints = file:match(f:read'*a')
+ f:close()
+ entry = lpeg.Cg(codepoint_range * sep * ('Other_Default_Ignorable_Code_Point' * lpeg.Cc(true)
+ + 'Variation_Selector' * lpeg.Cc(true)
+ + 'White_Space' * lpeg.Cc(nil)
+ + 'Prepended_Concatenation_Mark' * lpeg.Cc(nil)
+ ) * lpeg.P' '^0 * '#')^-1 * (1-lpeg.P'\n')^0 * '\n'
+ file = lpeg.Cf(
+ lpeg.Carg(1)
+ * entry^0
+ , multirawset)
+ f = io.open(kpse.find_file"PropList.txt")
+ ignorable_codepoints = file:match(f:read'*a', 1, ignorable_codepoints)
+ f:close()
+ for i = 0xFFF9,0xFFFB do
+ ignorable_codepoints[i] = nil
+ end
+ for i = 0x13430,0x13438 do
+ ignorable_codepoints[i] = nil
+ end
+end
local function setnotdef(tfmdata, factor)
local desc = tfmdata.shared.rawdata.descriptions
@@ -35,7 +78,6 @@ local function setnotdef(tfmdata, factor)
tfmdata.notdefcode = 0xF0000
return
end
- print')'
-- If this didn't happen, it might be mapped to one of the
-- replacement characters:
for code = 0xFFFC,0xFFFF do
@@ -45,7 +87,6 @@ local function setnotdef(tfmdata, factor)
return
end
end
- print')))'
-- Oh no, we couldn't find it. Maybe we can find it by name?
local code = tfmdata.resources.unicodes[".notdef"]
-- Better safe than sorry
@@ -54,7 +95,6 @@ local function setnotdef(tfmdata, factor)
tfmdata.notdefcode = code
return
end
- print'))))'
-- So the font didn't do the obvious things and then it lied to us.
-- At this point we should think about sending an automated complain
-- to the font author, but we probably can't trust the contact
@@ -81,7 +121,7 @@ local function donotdef(head, font, _, _, _)
if not notdef then return end
for cur, cid, fid in traverse_char(head) do if fid == font then
local w, h, d = getwhd(cur)
- if w == 0 and h == 0 and d == 0 and not chars[cid] then
+ if w == 0 and h == 0 and d == 0 and not chars[cid] and not ignorable_codepoints[cid] then
local notdefnode = nodenew(glyph_id, 256)
setfont(notdefnode, font, notdef)
insert_after(cur, cur, notdefnode)
@@ -90,7 +130,7 @@ local function donotdef(head, font, _, _, _)
end end
end
-otffeatures.register {
+otfregister {
name = "notdef",
description = "Add notdef glyphs",
default = 1,
@@ -102,4 +142,43 @@ otffeatures.register {
}
}
+function fonts.handlers.otf.handlers.gsub_remove(head,char,dataset,sequence,replacement)
+ local next
+ head, next = remove(head, char)
+ flush_node(char)
+ if not head and not next then -- Avoid a double free if we were alone
+ head = nodenew(kern_id)
+ end
+ return head, next, true, true
+end
+
+local sequence = {
+ features = {invisible = {["*"] = {["*"] = true}}},
+ flags = {false, false, false, false},
+ name = "invisible",
+ order = {"invisible"},
+ nofsteps = 1,
+ steps = {{
+ coverage = ignorable_codepoints,
+ index = 1,
+ }},
+ type = "gsub_remove",
+}
+local function invisibleinitialiser(tfmdata, value)
+ local resources = tfmdata.resources
+ local sequences = resources and resources.sequences
+ if sequences then
+ -- Now we get to the interesting part: At which point should our new sequence be inserted? Let's do it at the end, then they are still seen by all features.
+ insert(sequences, sequence)
+ end
+end
+otfregister {
+ name = 'invisible',
+ description = 'Remove invisible control characters',
+ default = false,
+ initializers = {
+ node = invisibleinitialiser,
+ },
+}
+
--- vim:sw=2:ts=2:expandtab:tw=71
More information about the latex3-commits
mailing list