[latex3-commits] [git/LaTeX3-latex3-luaotfload] ignorable: Drop `Default_Ignorable_Code_Point`s (6e8ab66)

Marcel Fabian Krüger tex at 2krueger.de
Sat Aug 17 14:07:06 CEST 2019


Repository : https://github.com/latex3/luaotfload
On branch  : ignorable
Link       : https://github.com/latex3/luaotfload/commit/6e8ab66598012e159253a2e2b422ce7ec6c2db1f

>---------------------------------------------------------------

commit 6e8ab66598012e159253a2e2b422ce7ec6c2db1f
Author: Marcel Fabian Krüger <tex at 2krueger.de>
Date:   Sat Aug 17 13:55:52 2019 +0200

    Drop `Default_Ignorable_Code_Point`s


>---------------------------------------------------------------

6e8ab66598012e159253a2e2b422ce7ec6c2db1f
 src/luaotfload-notdef.lua                          | 97 ++++++++++++++++++++--
 .../tex/latex-dev/luaotfload/luaotfload-notdef.lua | 97 ++++++++++++++++++++--
 2 files changed, 176 insertions(+), 18 deletions(-)

diff --git a/src/luaotfload-notdef.lua b/src/luaotfload-notdef.lua
index dd24eb1..e8356bf 100644
--- a/src/luaotfload-notdef.lua
+++ b/src/luaotfload-notdef.lua
@@ -16,14 +16,57 @@ if luatexbase and luatexbase.provides_module then
   luatexbase.provides_module (ProvidesLuaModule)
 end  
 
-local nodenew            = node.direct.new
+local flush_node         = node.direct.flush_node
 local getfont            = font.getfont
-local setfont            = node.direct.setfont
+local getnext            = node.direct.getnext
 local getwhd             = node.direct.getwhd
+local insert             = table.insert
 local insert_after       = node.direct.insert_after
-local traverse_char      = node.direct.traverse_char
+local kern_id            = node.id'kern'
+local nodenew            = node.direct.new
+local otfregister        = fonts.constructors.features.otf.register
 local protect_glyph      = node.direct.protect_glyph
-local otffeatures        = fonts.constructors.newfeatures "otf"
+local remove             = node.direct.remove
+local setfont            = node.direct.setfont
+local traverse_char      = node.direct.traverse_char
+
+local ignorable_codepoints do
+  local sep = lpeg.P' '^0 * ';' * lpeg.P' '^0
+  local codepoint = lpeg.S'0123456789ABCDEF'^4/function(c)return tonumber(c, 16)end
+  local codepoint_range = codepoint * ('..' * codepoint + lpeg.Cc(false))
+  local function multirawset(table, key1, key2, value)
+    for key = key1,(key2 or key1) do
+      rawset(table, key, value)
+    end
+    return table
+  end
+  local entry = lpeg.Cg(codepoint * ';' * (1-lpeg.P';')^0 * ';Cf;' * lpeg.Cc(true))^-1 * (1-lpeg.P'\n')^0 * '\n'
+  local file = lpeg.Cf(
+      lpeg.Ct''
+    * entry^0
+  , rawset)
+  local f = io.open(kpse.find_file"UnicodeData.txt")
+  ignorable_codepoints = file:match(f:read'*a')
+  f:close()
+  entry = lpeg.Cg(codepoint_range * sep * ('Other_Default_Ignorable_Code_Point' * lpeg.Cc(true)
+                                               + 'Variation_Selector' * lpeg.Cc(true)
+                                               + 'White_Space' * lpeg.Cc(nil)
+                                               + 'Prepended_Concatenation_Mark' * lpeg.Cc(nil)
+                                          ) * lpeg.P' '^0 * '#')^-1 * (1-lpeg.P'\n')^0 * '\n'
+  file = lpeg.Cf(
+      lpeg.Carg(1)
+    * entry^0
+  , multirawset)
+  f = io.open(kpse.find_file"PropList.txt")
+  ignorable_codepoints = file:match(f:read'*a', 1, ignorable_codepoints)
+  f:close()
+  for i = 0xFFF9,0xFFFB do
+    ignorable_codepoints[i] = nil
+  end
+  for i = 0x13430,0x13438 do
+    ignorable_codepoints[i] = nil
+  end
+end
 
 local function setnotdef(tfmdata, factor)
   local desc = tfmdata.shared.rawdata.descriptions
@@ -35,7 +78,6 @@ local function setnotdef(tfmdata, factor)
     tfmdata.notdefcode = 0xF0000
     return
   end
-  print')'
   -- If this didn't happen, it might be mapped to one of the
   -- replacement characters:
   for code = 0xFFFC,0xFFFF do
@@ -45,7 +87,6 @@ local function setnotdef(tfmdata, factor)
       return
     end
   end
-  print')))'
   -- Oh no, we couldn't find it. Maybe we can find it by name?
   local code = tfmdata.resources.unicodes[".notdef"]
   -- Better safe than sorry
@@ -54,7 +95,6 @@ local function setnotdef(tfmdata, factor)
     tfmdata.notdefcode = code
     return
   end
-  print'))))'
   -- So the font didn't do the obvious things and then it lied to us.
   -- At this point we should think about sending an automated complain
   -- to the font author, but we probably can't trust the contact
@@ -81,7 +121,7 @@ local function donotdef(head, font, _, _, _)
   if not notdef then return end
   for cur, cid, fid in traverse_char(head) do if fid == font then
     local w, h, d = getwhd(cur)
-    if w == 0 and h == 0 and d == 0 and not chars[cid] then
+    if w == 0 and h == 0 and d == 0 and not chars[cid] and not ignorable_codepoints[cid] then
       local notdefnode = nodenew(glyph_id, 256)
       setfont(notdefnode, font, notdef)
       insert_after(cur, cur, notdefnode)
@@ -90,7 +130,7 @@ local function donotdef(head, font, _, _, _)
   end end
 end
 
-otffeatures.register {
+otfregister {
   name        = "notdef",
   description = "Add notdef glyphs",
   default     = 1,
@@ -102,4 +142,43 @@ otffeatures.register {
   }
 }
 
+function fonts.handlers.otf.handlers.gsub_remove(head,char,dataset,sequence,replacement)
+  local next
+  head, next = remove(head, char)
+  flush_node(char)
+  if not head and not next then -- Avoid a double free if we were alone
+    head = nodenew(kern_id)
+  end
+  return head, next, true, true
+end
+
+local sequence = {
+  features = {invisible = {["*"] = {["*"] = true}}},
+  flags = {false, false, false, false},
+  name = "invisible",
+  order = {"invisible"},
+  nofsteps = 1,
+  steps = {{
+    coverage = ignorable_codepoints,
+    index = 1,
+  }},
+  type = "gsub_remove",
+}
+local function invisibleinitialiser(tfmdata, value)
+  local resources = tfmdata.resources
+  local sequences = resources and resources.sequences
+  if sequences then
+    -- Now we get to the interesting part: At which point should our new sequence be inserted? Let's do it at the end, then they are still seen by all features.
+    insert(sequences, sequence)
+  end
+end
+otfregister {
+  name = 'invisible',
+  description = 'Remove invisible control characters',
+  default = false,
+  initializers = {
+    node = invisibleinitialiser,
+  },
+}
+
 --- vim:sw=2:ts=2:expandtab:tw=71
diff --git a/texmf/tex/latex-dev/luaotfload/luaotfload-notdef.lua b/texmf/tex/latex-dev/luaotfload/luaotfload-notdef.lua
index dd24eb1..e8356bf 100644
--- a/texmf/tex/latex-dev/luaotfload/luaotfload-notdef.lua
+++ b/texmf/tex/latex-dev/luaotfload/luaotfload-notdef.lua
@@ -16,14 +16,57 @@ if luatexbase and luatexbase.provides_module then
   luatexbase.provides_module (ProvidesLuaModule)
 end  
 
-local nodenew            = node.direct.new
+local flush_node         = node.direct.flush_node
 local getfont            = font.getfont
-local setfont            = node.direct.setfont
+local getnext            = node.direct.getnext
 local getwhd             = node.direct.getwhd
+local insert             = table.insert
 local insert_after       = node.direct.insert_after
-local traverse_char      = node.direct.traverse_char
+local kern_id            = node.id'kern'
+local nodenew            = node.direct.new
+local otfregister        = fonts.constructors.features.otf.register
 local protect_glyph      = node.direct.protect_glyph
-local otffeatures        = fonts.constructors.newfeatures "otf"
+local remove             = node.direct.remove
+local setfont            = node.direct.setfont
+local traverse_char      = node.direct.traverse_char
+
+local ignorable_codepoints do
+  local sep = lpeg.P' '^0 * ';' * lpeg.P' '^0
+  local codepoint = lpeg.S'0123456789ABCDEF'^4/function(c)return tonumber(c, 16)end
+  local codepoint_range = codepoint * ('..' * codepoint + lpeg.Cc(false))
+  local function multirawset(table, key1, key2, value)
+    for key = key1,(key2 or key1) do
+      rawset(table, key, value)
+    end
+    return table
+  end
+  local entry = lpeg.Cg(codepoint * ';' * (1-lpeg.P';')^0 * ';Cf;' * lpeg.Cc(true))^-1 * (1-lpeg.P'\n')^0 * '\n'
+  local file = lpeg.Cf(
+      lpeg.Ct''
+    * entry^0
+  , rawset)
+  local f = io.open(kpse.find_file"UnicodeData.txt")
+  ignorable_codepoints = file:match(f:read'*a')
+  f:close()
+  entry = lpeg.Cg(codepoint_range * sep * ('Other_Default_Ignorable_Code_Point' * lpeg.Cc(true)
+                                               + 'Variation_Selector' * lpeg.Cc(true)
+                                               + 'White_Space' * lpeg.Cc(nil)
+                                               + 'Prepended_Concatenation_Mark' * lpeg.Cc(nil)
+                                          ) * lpeg.P' '^0 * '#')^-1 * (1-lpeg.P'\n')^0 * '\n'
+  file = lpeg.Cf(
+      lpeg.Carg(1)
+    * entry^0
+  , multirawset)
+  f = io.open(kpse.find_file"PropList.txt")
+  ignorable_codepoints = file:match(f:read'*a', 1, ignorable_codepoints)
+  f:close()
+  for i = 0xFFF9,0xFFFB do
+    ignorable_codepoints[i] = nil
+  end
+  for i = 0x13430,0x13438 do
+    ignorable_codepoints[i] = nil
+  end
+end
 
 local function setnotdef(tfmdata, factor)
   local desc = tfmdata.shared.rawdata.descriptions
@@ -35,7 +78,6 @@ local function setnotdef(tfmdata, factor)
     tfmdata.notdefcode = 0xF0000
     return
   end
-  print')'
   -- If this didn't happen, it might be mapped to one of the
   -- replacement characters:
   for code = 0xFFFC,0xFFFF do
@@ -45,7 +87,6 @@ local function setnotdef(tfmdata, factor)
       return
     end
   end
-  print')))'
   -- Oh no, we couldn't find it. Maybe we can find it by name?
   local code = tfmdata.resources.unicodes[".notdef"]
   -- Better safe than sorry
@@ -54,7 +95,6 @@ local function setnotdef(tfmdata, factor)
     tfmdata.notdefcode = code
     return
   end
-  print'))))'
   -- So the font didn't do the obvious things and then it lied to us.
   -- At this point we should think about sending an automated complain
   -- to the font author, but we probably can't trust the contact
@@ -81,7 +121,7 @@ local function donotdef(head, font, _, _, _)
   if not notdef then return end
   for cur, cid, fid in traverse_char(head) do if fid == font then
     local w, h, d = getwhd(cur)
-    if w == 0 and h == 0 and d == 0 and not chars[cid] then
+    if w == 0 and h == 0 and d == 0 and not chars[cid] and not ignorable_codepoints[cid] then
       local notdefnode = nodenew(glyph_id, 256)
       setfont(notdefnode, font, notdef)
       insert_after(cur, cur, notdefnode)
@@ -90,7 +130,7 @@ local function donotdef(head, font, _, _, _)
   end end
 end
 
-otffeatures.register {
+otfregister {
   name        = "notdef",
   description = "Add notdef glyphs",
   default     = 1,
@@ -102,4 +142,43 @@ otffeatures.register {
   }
 }
 
+function fonts.handlers.otf.handlers.gsub_remove(head,char,dataset,sequence,replacement)
+  local next
+  head, next = remove(head, char)
+  flush_node(char)
+  if not head and not next then -- Avoid a double free if we were alone
+    head = nodenew(kern_id)
+  end
+  return head, next, true, true
+end
+
+local sequence = {
+  features = {invisible = {["*"] = {["*"] = true}}},
+  flags = {false, false, false, false},
+  name = "invisible",
+  order = {"invisible"},
+  nofsteps = 1,
+  steps = {{
+    coverage = ignorable_codepoints,
+    index = 1,
+  }},
+  type = "gsub_remove",
+}
+local function invisibleinitialiser(tfmdata, value)
+  local resources = tfmdata.resources
+  local sequences = resources and resources.sequences
+  if sequences then
+    -- Now we get to the interesting part: At which point should our new sequence be inserted? Let's do it at the end, then they are still seen by all features.
+    insert(sequences, sequence)
+  end
+end
+otfregister {
+  name = 'invisible',
+  description = 'Remove invisible control characters',
+  default = false,
+  initializers = {
+    node = invisibleinitialiser,
+  },
+}
+
 --- vim:sw=2:ts=2:expandtab:tw=71





More information about the latex3-commits mailing list