[latex3-commits] [l3svn] 03/03: Add a simple font loader for LuaTeX in format mode

noreply at latex-project.org noreply at latex-project.org
Thu Feb 4 18:07:11 CET 2016


This is an automated email from the git hooks/post-receive script.

joseph pushed a commit to branch master
in repository l3svn.

commit b2a92893b3daffb2e2e90cdb434b9142ac6c5312
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Thu Feb 4 17:00:45 2016 +0000

    Add a simple font loader for LuaTeX in format mode
    
    This allows us to load OpenType fonts 'out of the box', at least
    if they are in the texmf tree. There's no font shaping here so its
    only meant for European glyphs and even then limited. However, for
    testing this should be handy.
    
    Note that the code itself here is taken from Speeddata Publisher
    with permission (see notes in .dtx). We can re-license but at
    present I've gone with loading rather than copying so things are
    clear. We are unlikely to use such a simple loader long-term in any
    case. (The file fonts.lua from Speeddata Publisher shows how to do
    basic shaping as needed for European support.)
    
    Probably the next thing to do is deal with TeX ligatures and the
    like, which will need a bit of work. That's all quite doable, as is
    I think handling system fonts if we want to.
    
    The big picture idea here is that we can now test Unicode stuff,
    including hyphenation, in IniTeX mode.
    
    Note that the format-building process is still a bit 'rough': you
    need to put l3format.lua and fontloader.lua where LuaTeX can find
    them.
---
 l3kernel/fontloader.lua |  290 +++++++++++++++++++++++++++++++++++++++++++++++
 l3kernel/l3final.dtx    |   35 +++++-
 l3kernel/l3format.ins   |    3 +-
 l3kernel/l3luatex.dtx   |   64 +++++++++++
 4 files changed, 386 insertions(+), 6 deletions(-)

diff --git a/l3kernel/fontloader.lua b/l3kernel/fontloader.lua
new file mode 100644
index 0000000..acc9a4b
--- /dev/null
+++ b/l3kernel/fontloader.lua
@@ -0,0 +1,290 @@
+--- The fontloader uses the LuaTeX internal fontforge library (called
+--- fontloader) to inspect an OpenType, a TrueType or a Type1 font. It
+--- converts this font to a font structure  TeX uses internally.
+--
+--  fontloader.lua
+--  speedata publisher
+--
+--  Copyright 2010-2011 Patrick Gundlach.
+--  See file COPYING in the root directory for license info.
+
+
+module(...,package.seeall)
+
+--- Return `truetype`, `opentype` or `type1` depending on the string
+--- `filename`. If not recognized form  the file name, return _nil_.
+--- This function simply looks at the last three letters.
+function guess_fonttype( filename )
+    local f=filename:lower()
+    if f:match(".*%.ttf$") then return "truetype"
+    elseif f:match(".*%.otf$") then return "opentype"
+    elseif f:match(".*%.pfb$") then return "type1"
+    else return nil
+    end
+end
+
+--- Return `true` if the this feature table `tab` has an entry for the
+--- given `script` and `lang`. The table is something like:
+---
+---     [1] = {
+---       ["langs"] = {
+---         [1] = "AZE "
+---         [2] = "CRT "
+---         [3] = "TRK "
+---       },
+---       ["script"] = "latn"
+---     },
+function features_scripts_matches( tab,script,lang )
+    local lang   = string.lower(lang)
+    local script = string.lower(script)
+    for i=1,#tab do
+        local entry = tab[i]
+        if string.lower(entry.script)==script then
+            for j=1,#entry.langs do
+                if string.lower(entry.langs[j])==lang then
+                    return true
+                end
+            end
+        end
+    end
+    return false
+end
+
+--- Convert codepoint to a UTF-16 string.
+function to_utf16(codepoint)
+    assert(codepoint)
+    if codepoint < 65536 then
+        return string.format("%04X",codepoint)
+    else
+        return string.format("%04X%04X",codepoint / 1024 + 0xD800 ,codepoint % 1024 + 0xDC00)
+    end
+end
+
+--- Return the string that is responsible for the OpenType feature `featurename`.
+--- Currently only lookups are for script `latn` and language `dflt`.
+function find_feature_string(f,featurename)
+    local ret = {}
+    if f.gsub==nil then
+        return ret
+    end
+    for i=1,#f.gsub do
+        local gsub_tabelle=f.gsub[i]
+        if gsub_tabelle.features then
+            for j = 1,#gsub_tabelle.features do
+                local gtf = gsub_tabelle.features[j]
+                if gtf.tag==featurename and features_scripts_matches(gtf.scripts,"latn","dflt") then
+                    if #gsub_tabelle.subtables ~= 1 then
+                        -- w("warning: #subtables in gpos != 1")
+                    end
+                    ret[#ret + 1] = gsub_tabelle.subtables[1].name
+                end
+            end
+        end
+    end
+    return ret
+end
+
+--- LuaTeX's fontloader (function `to_table()`) returns a rather complex table
+--- with all kinds of information. Loading this table is expensive (_todo:
+--- measure it_), so we  don't load it over and over agin if the user
+--- requests the same font in a different size. We also cache  the `to_unicode` mapping.
+--- Only the size dependent values are computed.
+local lookup_fonttable_from_filename = {}
+
+
+--- For define_font() we extend our file list
+local fontlist = {}
+
+local fp = os.getenv("SP_FONT_PATH")
+if fp ~= "" then
+  for _,dir in ipairs(string.explode(fp,":")) do
+    for i in dirtree(dir) do
+      local filename = i:gsub(".*/([^/]+)$","%1")
+      fontlist[filename] = i
+    end
+  end
+end
+
+
+
+--- Return a TeX usable font table, or _nil_ plus an error message.
+--- The parameter `name` is the filename (without path), `size` is
+--- given in scaled points, `extra_parameter` is a table such as:
+---     {
+---       ["space"] = "25"
+---       ["marginprotrusion"] = "100"
+---       ["otfeatures"] = {
+---         ["smcp"] = "true"
+---       },
+---     },
+function define_font(name, size,extra_parameter)
+    local extra_parameter = extra_parameter or {}
+    local fonttable
+
+    if lookup_fonttable_from_filename[name] then
+        fonttable=lookup_fonttable_from_filename[name]
+        assert(fonttable)
+    else
+        -- These are stored in the cached fonttable table
+        local filename_with_path
+        local lookup_codepoint_by_name   = {}
+        local lookup_codepoint_by_number = {}
+
+        filename_with_path = kpse.filelist[name] or fontlist[name]
+        if not filename_with_path then return false, string.format("Fontfile '%s' not found.", name) end
+        local font, err = fontloader.open(filename_with_path)
+        if not font then
+            if type(err) == "string" then
+                return false, err
+            else
+                printtable("Font error",err)
+            end
+        end
+        fonttable = fontloader.to_table(font)
+        if fonttable == nil then return false, string.format("Problem while loading font '%s'",tostring(filename_with_path))  end
+
+        -- Store the table for quicker lookup later.
+        lookup_fonttable_from_filename[name]=fonttable
+
+        fonttable.filename_with_path = filename_with_path
+        local is_unicode = (fonttable.pfminfo.unicoderanges ~= nil)
+
+        --- We require a mapping glyph number -> unicode codepoint. The problem is
+        --- that TTF/OTF fonts have a different encoding mechanism. TTF/OTF can be
+        --- accessed via the table `fonttable.map.backmap` (the key is the glyph
+        --- number, the value is glyph name). For Type 1 fonts we use
+        --- `glyph.unicode` and `glyph.name` for the codepoint and the name.
+        ---
+        --- For kerning a mapping glyphname -> codepoint is needed.
+        if is_unicode then
+            -- TTF/OTF, use map.backmap
+            for i = 1,#fonttable.glyphs do
+                local g=fonttable.glyphs[i]
+                lookup_codepoint_by_name[g.name] = fonttable.map.backmap[i]
+                lookup_codepoint_by_number[i]    = fonttable.map.backmap[i]
+            end
+        else
+            -- Type1, use glyph.unicode
+            for i = 1,#fonttable.glyphs do
+                local g=fonttable.glyphs[i]
+                lookup_codepoint_by_name[g.name] = g.unicode
+                lookup_codepoint_by_number[i]    = g.unicode
+            end
+        end -- is unicode
+        fonttable.lookup_codepoint_by_name   = lookup_codepoint_by_name
+        fonttable.lookup_codepoint_by_number = lookup_codepoint_by_number
+    end
+
+    --- A this point we have taken the `fonttable` from memory or from `fontloader#to_table()`. The next
+    --- part is mostly size/features dependent.
+
+    if (size < 0) then size = (- 655.36) * size end
+    -- Some fonts have `units_per_em` set to 0. I am not sure if setting this to
+    -- 1000 in that case has any drawbacks.
+    if fonttable.units_per_em == 0 then fonttable.units_per_em = 1000 end
+    local mag = size / fonttable.units_per_em
+
+    --- The table `f` is the font structure that TeX can use, see chapter 7 of the LuaTeX manual for a detailed description. This is returned from
+    --- the function. It is safe to store additional data here.
+    local f = { }
+
+    -- The index of the characters table must match the glyphs in the
+    -- "document". It is wise to have everything in unicode, so we do keep that
+    -- in mind when filling the characters subtable.
+    f.characters    = { }
+    f.fontloader    = fonttable
+    if extra_parameter and extra_parameter.otfeatures and extra_parameter.otfeatures.smcp then
+        f.smcp = find_feature_string(fonttable,"smcp")
+    end
+    if extra_parameter and extra_parameter.otfeatures and extra_parameter.otfeatures.onum then
+        f.onum = find_feature_string(fonttable,"onum")
+    end
+
+    f.otfeatures    = extra_parameter.otfeatures             -- OpenType Features (smcp,...)
+    f.name          = fonttable.fontname
+    f.fullname      = fonttable.fontname
+    f.designsize    = size
+    f.size          = size
+    f.direction     = 0
+    f.filename      = fonttable.filename_with_path
+    f.type          = 'real'
+    f.encodingbytes = 2
+    f.tounicode     = 1
+    f.stretch       = 40
+    f.shrink        = 30
+    f.step          = 10
+    f.auto_expand   = true
+
+    f.parameters    = {
+        slant         = 0,
+        space         = ( extra_parameter.space or 25 ) / 100  * size,
+        space_stretch = 0.3  * size,
+        space_shrink  = 0.1  * size,
+        x_height      = 0.4  * size,
+        quad          = 1.0  * size,
+        extra_space   = 0
+    }
+
+    f.format = guess_fonttype(name)
+    if f.format==nil then return false,"Could not determine the type of the font '".. fonttable.filename_with_path .."'." end
+
+    f.embedding = "subset"
+    f.cidinfo = fonttable.cidinfo
+
+
+    for i=1,#fonttable.glyphs do
+        local glyph     = fonttable.glyphs[i]
+        local codepoint = fonttable.lookup_codepoint_by_number[i]
+
+        -- TeX uses U+002D HYPHEN-MINUS for hyphen, correct would be U+2012 HYPHEN.
+        -- Because font vendors all have different ideas of hyphen, we just map all
+        -- occurrences of *HYPHEN* to 0x2D (decimal 45)
+        if glyph.name:lower():match("^hyphen$") then codepoint=45  end
+
+        f.characters[codepoint] = {
+            index = i,
+            width = glyph.width * mag,
+            name  = glyph.name,
+            expansion_factor = 1000,
+        }
+
+        -- Height and depth of the glyph
+        if glyph.boundingbox[4] then f.characters[codepoint].height = glyph.boundingbox[4] * mag  end
+        if glyph.boundingbox[2] then f.characters[codepoint].depth = -glyph.boundingbox[2] * mag  end
+
+        --- We change the `tounicode` entry for entries with a period. Sometimes fonts
+        --- have entries like `a.sc` or `a.c2sc` for smallcaps letter a. We are
+        --- only interested in the part before the period.
+        --- _This solution might not be perfect_.
+        if glyph.name:match("%.") then
+            local destname = glyph.name:gsub("^([^%.]*)%..*$","%1")
+            local cp = fonttable.lookup_codepoint_by_name[destname]
+            if cp then
+                f.characters[codepoint].tounicode=to_utf16(cp)
+            end
+        end
+
+
+        --- Margin protrusion is enabled in `spinit.lua`.
+        if (glyph.name=="hyphen" or glyph.name=="period" or glyph.name=="comma") and extra_parameter and type(extra_parameter.marginprotrusion) == "number" then
+            f.characters[codepoint]["right_protruding"] = glyph.width * extra_parameter.marginprotrusion / 100
+        end
+
+        --- We do kerning by default. In the future we could turn it off.
+        local kerns={}
+        if glyph.kerns then
+            for _,kern in pairs(glyph.kerns) do
+                local dest = fonttable.lookup_codepoint_by_name[kern.char]
+                if dest and dest > 0 then
+                    kerns[dest] = kern.off * mag
+                else
+                end
+            end
+        end
+        f.characters[codepoint].kerns = kerns
+    end
+
+    return true,f
+end
+
+-- End of file
diff --git a/l3kernel/l3final.dtx b/l3kernel/l3final.dtx
index 97c283c..12641cd 100644
--- a/l3kernel/l3final.dtx
+++ b/l3kernel/l3final.dtx
@@ -260,16 +260,41 @@
 %
 % \subsection{Temporary hacks}
 %
-% \begin{macro}{\T1/lmr/m/n/10}
+% \begin{macro}{\T1/lmr/m/n/10, \TU/lmr/m/n/10}
 %   For \emph{testing only} provide some kind of output: for that we
 %   need a font. At present, select Latin Modern Roman at 10\,pt:
 %   entirely arbitrary but at least usable.
 %    \begin{macrocode}
-\tex_everyjob:D \exp_after:wN
+\sys_if_engine_luatex:T
   {
-    \exp_after:wN \tex_font:D \cs:w T1/lmr/m/n/10 \cs_end:
-      = ec-lmr10 \scan_stop:
-    \use:c { T1/lmr/m/n/10 }
+    \tex_everyjob:D \exp_after:wN
+      {
+        \tex_the:D \tex_everyjob:D
+        \lua_now_x:n { require("l3format.lua") }
+      }
+  }
+\bool_if:nTF
+  {
+    \sys_if_engine_luatex_p: ||
+    \sys_if_engine_xetex_p:
+  }
+  {
+    \tex_everyjob:D \exp_after:wN \exp_after:wN \exp_after:wN
+      {
+        \exp_after:wN \tex_the:D \exp_after:wN \tex_everyjob:D
+        \exp_after:wN \tex_font:D \cs:w TU/lmr/m/n/10 \cs_end:
+          = "[lmroman10-regular.otf]"~at~10pt \scan_stop:
+        \use:c { TU/lmr/m/n/10 }
+      }
+  }
+  {
+    \tex_everyjob:D \exp_after:wN \exp_after:wN \exp_after:wN
+      {
+        \exp_after:wN \tex_the:D \exp_after:wN \tex_everyjob:D
+        \exp_after:wN \tex_font:D \cs:w T1/lmr/m/n/10 \cs_end:
+          = ec-lmr10 \scan_stop:
+        \use:c { T1/lmr/m/n/10 }
+      }
   }
 %    \end{macrocode}
 % \end{macro}
diff --git a/l3kernel/l3format.ins b/l3kernel/l3format.ins
index 381cf7c..1dabe77 100644
--- a/l3kernel/l3format.ins
+++ b/l3kernel/l3format.ins
@@ -104,6 +104,7 @@ Do not distribute a modified version of this file.
 
 \endpreamble
 \nopostamble
-\generate{\file{expl3.lua}{\from{l3luatex.dtx}{package,lua}}}
+\generate{\file{expl3.lua}   {\from{l3luatex.dtx}{lua,package}}}
+\generate{\file{l3format.lua}{\from{l3luatex.dtx}{fontloader}}}
 
 \endbatchfile
diff --git a/l3kernel/l3luatex.dtx b/l3kernel/l3luatex.dtx
index 9721542..8e9b6a6 100644
--- a/l3kernel/l3luatex.dtx
+++ b/l3kernel/l3luatex.dtx
@@ -298,6 +298,70 @@ l3kernel.charcat = charcat
 %</initex|package>
 %    \end{macrocode}
 %
+% \subsection{Format mode code: font loader}
+%
+%    \begin{macrocode}
+%<*fontloader>
+%    \end{macrocode}
+%
+% In format mode, there needs to be a font loader available to let us
+% use OpenType fonts. For testing, this is provided by
+% \texttt{fontloader.lua} from the Speeddata Publisher system
+% (\url{https://github.com/speedata/publisher}). The code there is designed
+% to be self-contained and has a certain number of build-in assumptions,
+% so there is a small amount of compatibility required.
+% 
+% The code we load looks up \texttt{texmf} tree files using
+% \texttt{kpse.filelist}, which isn't part of the standard \texttt{kpse}
+% library. The interface is emulated using metatable.
+%    \begin{macrocode}
+kpse.filelist = setmetatable({}, {
+  __index = function (t, key)
+    return kpse.lookup(key)
+  end
+})
+%    \end{macrocode}
+% There is a built-in assumption in \texttt{fontloader.lua} that various
+% environmental variables are set. We deal with that by intercepting the
+% relevant names and returning something sane.
+%    \begin{macrocode}
+local os_getenv = os.getenv
+function os.getenv (var)
+  if var == "SP_FONT_PATH" then return "" end
+  return os_getenv(var)
+end
+%    \end{macrocode}
+% As detailed in
+% \url{https://github.com/speedata/publisher/blob/develop/COPYING}, the current
+% license for Speeddata Publisher is \textsc{AGPLv3}. We therefore only
+% load the file and use its public interfaces rather than copying/modifying
+% the code itself. Note though that we do have permission to use
+% \texttt{fontloader.lua} as a public domain work
+% (\url{http://chat.stackexchange.com/transcript/message/27273687#27273687}):
+% if we want to develop a richer loader we may want to take advantage of that
+% (which also applies to the simple shaper in the related \texttt{fonts.lua}
+% file).
+%    \end{macrocode}
+local fontloader = require("fontloader.lua")
+%    \end{macrocode}
+% That done, register a callback which at present simply passes everything
+% through. There's no attempt to pick up font settings (which presumably
+% will be needed). Syntax is coerced to the same as for \XeTeX{}.
+%    \begin{macrocode}
+callback.register("define_font", 
+  function (name, size, id)
+    if string.match(name, "^%[") and string.match(name, "%]$") then
+      name = string.sub(name, 2, -2)
+    end
+    return select(2, fontloader.define_font(name, size))
+  end
+)
+%    \end{macrocode}
+%
+%    \begin{macrocode}
+%</fontloader>
+%    \end{macrocode}
+%
 %\end{implementation}
 %
 %\PrintIndex

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the latex3-commits mailing list