texlive[48477] trunk: make4ht (24aug18)

commits+karl at tug.org commits+karl at tug.org
Sat Aug 25 00:19:06 CEST 2018


Revision: 48477
          http://tug.org/svn/texlive?view=revision&revision=48477
Author:   karl
Date:     2018-08-25 00:19:06 +0200 (Sat, 25 Aug 2018)
Log Message:
-----------
make4ht (24aug18)

Modified Paths:
--------------
    trunk/Build/source/texk/texlive/linked_scripts/make4ht/make4ht
    trunk/Master/texmf-dist/doc/support/make4ht/README
    trunk/Master/texmf-dist/doc/support/make4ht/changelog.tex
    trunk/Master/texmf-dist/doc/support/make4ht/make4ht-doc.pdf
    trunk/Master/texmf-dist/doc/support/make4ht/readme.tex
    trunk/Master/texmf-dist/scripts/make4ht/formats/odt.lua
    trunk/Master/texmf-dist/scripts/make4ht/make4ht
    trunk/Master/texmf-dist/scripts/make4ht/mkparams.lua
    trunk/Master/texmf-dist/scripts/make4ht/mkutils.lua

Added Paths:
-----------
    trunk/Master/texmf-dist/scripts/make4ht/extensions/dvisvgm_hashes.lua
    trunk/Master/texmf-dist/scripts/make4ht/filters/make4ht-entities-to-unicode.lua
    trunk/Master/texmf-dist/scripts/make4ht/make4ht-dvireader.lua

Modified: trunk/Build/source/texk/texlive/linked_scripts/make4ht/make4ht
===================================================================
--- trunk/Build/source/texk/texlive/linked_scripts/make4ht/make4ht	2018-08-24 22:18:02 UTC (rev 48476)
+++ trunk/Build/source/texk/texlive/linked_scripts/make4ht/make4ht	2018-08-24 22:19:06 UTC (rev 48477)
@@ -27,7 +27,7 @@
 
 -- set version number. the template should be replaced by the
 -- actual version number by the build script
-local version = "v0.2b"
+local version = "v0.2c"
 mkparams.version_number = version
 
 local args = mkparams.get_args()
@@ -92,6 +92,11 @@
 if #extensions > 0 then
   make = mkutils.extensions_modify_build(extensions, make)
 end
+
+-- allow output formats to modify the build process at the end
+make = formatter.modify_build(make) or make
+
+
 make:match("tmp$", function() return false,"tmp file" end)
 make:match(".*",function(filename,par)
 	local outdir =  '' --par["outdir"] and par["outdir"] .."/" or ''

Modified: trunk/Master/texmf-dist/doc/support/make4ht/README
===================================================================
--- trunk/Master/texmf-dist/doc/support/make4ht/README	2018-08-24 22:18:02 UTC (rev 48476)
+++ trunk/Master/texmf-dist/doc/support/make4ht/README	2018-08-24 22:19:06 UTC (rev 48477)
@@ -119,6 +119,11 @@
 
 :    clean the `HTML` files using the `tidy` command.
 
+dvisvgm_hashes
+
+:    efficient generation of SVG pictures using Dvisvgm. It can utilize
+multiple processor cores and generates only changed images.
+
 common\_filters
 
 :    clean the output HTML files using filters.
@@ -777,7 +782,21 @@
       }
     }
 
+### The `dvisvgm_hashes` extension
 
+options
+
+:  command line options for Dvisvgm. The default value is `-n --exact -c 1.15,1.15`.
+
+cpu_cnt
+
+:  number of processor cores used for conversion. The extension tries to detect the available cores automatically by default.
+
+parallel_size
+
+:  number of pages used in each Dvisvgm call. The extension detects changed pages in the DVI file and construct multiple calls to Dvisvgm with only changed pages.
+
+
 # Configuration file {#configfile}
 
 It is possible to globally modify the build settings using the configuration
@@ -883,6 +902,14 @@
 
 The former way is preferable, though.
 
+## Filenames containing spaces
+
+`tex4ht` cannot handle filenames containing spaces. `make4ht` thus replaces spaces in input file names with underscores, so generated XML files use underscores instead of spaces as well.
+
+## Filenames containing non-ASCII characters
+
+The `odt` output doesn't support accented filenames, it is best to stick to ASCII characters in filenames.
+
 # License
 
 Permission is granted to copy, distribute and/or modify this software

Modified: trunk/Master/texmf-dist/doc/support/make4ht/changelog.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/make4ht/changelog.tex	2018-08-24 22:18:02 UTC (rev 48476)
+++ trunk/Master/texmf-dist/doc/support/make4ht/changelog.tex	2018-08-24 22:19:06 UTC (rev 48477)
@@ -3,6 +3,135 @@
 
 \begin{itemize}
 \item
+  2018/08/23
+
+  \begin{itemize}
+  \tightlist
+  \item
+    released version 0.2c
+  \end{itemize}
+\item
+  2018/08/21
+
+  \begin{itemize}
+  \tightlist
+  \item
+    added processor core detection on Windows
+  \item
+    make processor number configurable
+  \item
+    updated the documentation.
+  \end{itemize}
+\item
+  2018/08/20
+
+  \begin{itemize}
+  \tightlist
+  \item
+    added \texttt{dvisvgm\_hashes} extension
+  \end{itemize}
+\item
+  2018/07/03
+
+  \begin{itemize}
+  \tightlist
+  \item
+    create the \texttt{mimetype} file to achieve the ODT file validity
+  \end{itemize}
+\item
+  2018/07/02
+
+  \begin{itemize}
+  \tightlist
+  \item
+    disabled conversion of XML entities for \&, \textless{} and
+    \textgreater{} characters back to Unicode, because it breaks XML
+    validity
+  \end{itemize}
+\item
+  2018/06/27
+
+  \begin{itemize}
+  \tightlist
+  \item
+    fixed root dir detection
+  \end{itemize}
+\item
+  2018/06/26
+
+  \begin{itemize}
+  \tightlist
+  \item
+    added code for detection of TeX distribution root for Miktex and TL
+  \end{itemize}
+\item
+  2018/06/25
+
+  \begin{itemize}
+  \tightlist
+  \item
+    moved call to \texttt{xtpipes} from \texttt{t4ht} to the
+    \texttt{ODT} format drives. This should fix issues with path
+    expansion in \texttt{tex4ht.env} in TeX distributions.
+  \end{itemize}
+\item
+  2018/06/22
+
+  \begin{itemize}
+  \tightlist
+  \item
+    added \texttt{mkutils.find\_zip} function. It detects \texttt{zip}
+    or \texttt{miktex-zip} executables
+  \end{itemize}
+\item
+  2018/06/19
+
+  \begin{itemize}
+  \tightlist
+  \item
+    added new filter: \texttt{entities-to-unicode}. It converts XML
+    entites for Unicode characters back to Unicode.
+  \item
+    execute \texttt{entities-to-unicode} filter on text and math files
+    in the ODT output.
+  \end{itemize}
+\item
+  2018/06/12
+
+  \begin{itemize}
+  \tightlist
+  \item
+    added support for direct \texttt{ODT} file packing
+  \end{itemize}
+\item
+  2018/06/11
+
+  \begin{itemize}
+  \tightlist
+  \item
+    new function available for formats, \texttt{format.modify\_build}
+  \item
+    function \texttt{mkutils.delete\_dir} for directory removal
+  \item
+    function \texttt{mkutils.mv} for file moving
+  \item
+    started on packing of the \texttt{ODT} files directly by the format,
+    instead of \texttt{t4ht}
+  \end{itemize}
+\item
+  2018/06/08
+
+  \begin{itemize}
+  \tightlist
+  \item
+    added support for filenames containing spaces
+  \item
+    added support for filenames containing non-ascii characters
+  \item
+    don't require sudo for the installation, let the user to install
+    symbolic links to \texttt{\$PATH}
+  \end{itemize}
+\item
   2018/05/03
 
   \begin{itemize}

Modified: trunk/Master/texmf-dist/doc/support/make4ht/make4ht-doc.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/doc/support/make4ht/readme.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/make4ht/readme.tex	2018-08-24 22:18:02 UTC (rev 48476)
+++ trunk/Master/texmf-dist/doc/support/make4ht/readme.tex	2018-08-24 22:19:06 UTC (rev 48477)
@@ -155,6 +155,9 @@
 use \texttt{Latexmk} for \LaTeX~compilation.
 \item[tidy]
 clean the \texttt{HTML} files using the \texttt{tidy} command.
+\item[dvisvgm\_hashes]
+efficient generation of SVG pictures using Dvisvgm. It can utilize
+multiple processor cores and generates only changed images.
 \item[common\_filters]
 clean the output HTML files using filters.
 \item[common\_domfilters]
@@ -863,6 +866,23 @@
 }
 \end{verbatim}
 
+\hypertarget{the-dvisvgm_hashes-extension}{%
+\subsubsection{\texorpdfstring{The \texttt{dvisvgm\_hashes}
+extension}{The dvisvgm\_hashes extension}}\label{the-dvisvgm_hashes-extension}}
+
+\begin{description}
+\item[options]
+command line options for Dvisvgm. The default value is
+\texttt{-n\ -\/-exact\ -c\ 1.15,1.15}.
+\item[cpu\_cnt]
+number of processor cores used for conversion. The extension tries to
+detect the available cores automatically by default.
+\item[parallel\_size]
+number of pages used in each Dvisvgm call. The extension detects changed
+pages in the DVI file and construct multiple calls to Dvisvgm with only
+changed pages.
+\end{description}
+
 \hypertarget{configfile}{%
 \section{Configuration file}\label{configfile}}
 
@@ -999,6 +1019,22 @@
 
 The former way is preferable, though.
 
+\hypertarget{filenames-containing-spaces}{%
+\subsection{Filenames containing
+spaces}\label{filenames-containing-spaces}}
+
+\texttt{tex4ht} cannot handle filenames containing spaces.
+\texttt{make4ht} thus replaces spaces in input file names with
+underscores, so generated XML files use underscores instead of spaces as
+well.
+
+\hypertarget{filenames-containing-non-ascii-characters}{%
+\subsection{Filenames containing non-ASCII
+characters}\label{filenames-containing-non-ascii-characters}}
+
+The \texttt{odt} output doesn't support accented filenames, it is best
+to stick to ASCII characters in filenames.
+
 \hypertarget{license}{%
 \section{License}\label{license}}
 

Added: trunk/Master/texmf-dist/scripts/make4ht/extensions/dvisvgm_hashes.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/extensions/dvisvgm_hashes.lua	                        (rev 0)
+++ trunk/Master/texmf-dist/scripts/make4ht/extensions/dvisvgm_hashes.lua	2018-08-24 22:19:06 UTC (rev 48477)
@@ -0,0 +1,254 @@
+local dvireader = require "make4ht-dvireader"
+local mkutils = require "mkutils"
+local filter = require "make4ht-filter"
+
+
+local M = {}
+-- mapping between tex4ht image names and hashed image names
+local output_map = {}
+local dvisvgm_options = "-n --exact -c 1.15,1.15"
+local parallel_size = 64
+-- local parallel_size = 3
+
+local function make_hashed_name(base, hash)
+  return base .. "-" ..hash..".svg"
+end
+
+-- detect the number of available processors
+local cpu_cnt = 3  -- set a reasonable default for non-Linux systems
+
+if os.name == 'linux' then
+  cpu_cnt = 0
+  local cpuinfo=assert(io.open('/proc/cpuinfo', 'r'))
+  for line in cpuinfo:lines() do
+    if line:match('^processor') then
+      cpu_cnt = cpu_cnt + 1
+    end
+  end
+  -- set default number of threds if no CPU core have been found
+  if cpu_cnt == 0 then cpu_cnt = 1 end
+  cpuinfo:close()
+elseif os.name == 'cygwin' or os.type == 'windows' then
+  -- windows has NUMBER_OF_PROCESSORS environmental value
+  local nop = os.getenv('NUMBER_OF_PROCESSORS')
+  if tonumber(nop) then
+    cpu_cnt = nop
+  end
+end
+
+
+
+-- process output of dvisvgm and find output page numbers and corresponding files
+local function get_generated_pages(output, pages)
+  local pages = pages or {}
+  local pos = 1
+  local pos, finish, page = string.find(output, "processing page (%d+)", pos)
+  while(pos) do
+    pos, finish, file = string.find(output, "output written to ([^\n]+)", finish)
+    pages[tonumber(page)] = file
+    pos, finish, page = string.find(output, "processing page (%d+)", finish)
+  end
+  return pages
+end
+
+local function make_ranges(pages)
+  local newpages = {}
+  local start, stop
+  for i=1,#pages do
+    local current = pages[i]
+    local next_el = pages[i+1] or current + 100 -- just select a big number
+    local diff = next_el - current
+    if diff == 1 then
+      if not start then start = current end
+    else
+      local element
+      if start then
+        element = start .. "-" .. current
+      else
+        element = current
+      end
+      newpages[#newpages+1] = element
+      start = nil
+    end
+  end
+  return newpages
+end
+
+local function read_log(dvisvgmlog)
+  local f = io.open(dvisvgmlog, "r")
+  if not f then return nil, "Cannot read dvisvgm log" end
+  local output = f:read("*all")
+  f:close()
+  return output
+end
+
+-- test the existence of GNU Make, which can execute tasks in parallel
+local function test_make()
+  local make = io.popen("make -v", "r")
+  if not make then return false end
+  local content = make:read("*all")
+  make:close()
+  return true
+end
+
+local function save_file(filename, text)
+  local f = io.open(filename, "w")
+  f:write(text) 
+  f:close()
+end
+
+
+local function make_makefile_command(idvfile, page_sequences)
+  local logs = {}
+  local all = {} -- list of targets in the "all:" makefile target
+  local targets = {}
+  local basename = idvfile:gsub(".idv$", "")
+  local makefilename = basename .. "-images" .. ".mk"
+  -- build make targets
+  for i, ranges in ipairs(page_sequences) do
+    local target = basename .. "-" .. i
+    local logfile = target .. ".dlog"
+    logs[#logs + 1] = logfile
+    all[#all+1] = target
+    local chunk = target .. ":\n\tdvisvgm -v4 " .. dvisvgm_options .. " -p " .. ranges  .. " " .. idvfile .. " 2> " .. logfile .. "\n"
+    targets[#targets + 1] = chunk
+  end
+  -- construct makefile and save it
+  local makefile = "all: " .. table.concat(all, " ") .. "\n\n" .. table.concat(targets, "\n")
+  save_file(makefilename, makefile)
+  local command = "make -j" .. cpu_cnt .." -f " .. makefilename
+  return command, logs
+end
+
+local function prepare_command(idvfile, pages)
+  local logs = {}
+  if #pages > parallel_size and test_make() then 
+    local page_sequences = {}
+    for i=1, #pages, parallel_size do
+      local current_pages = {}
+      for x = i, i+parallel_size -1 do
+        current_pages[#current_pages + 1] = pages[x]
+      end
+      table.insert(page_sequences,table.concat(make_ranges(current_pages), ","))
+    end
+    return make_makefile_command(idvfile, page_sequences)
+  end
+  -- else
+    local pagesequence = table.concat(make_ranges(pages), ",")
+    -- the stderr from dvisvgm must be redirected and postprocessed
+    local dvisvgmlog = idvfile:gsub("idv$", "dlog")
+    -- local dvisvgm = io.popen("dvisvgm -v4 -n --exact -c 1.15,1.15 -p " .. pagesequence .. " " .. idvfile, "r")
+    local command = "dvisvgm -v4 " .. dvisvgm_options .. " -p " .. pagesequence .. " " .. idvfile .. " 2> " .. dvisvgmlog
+    return command, {dvisvgmlog}
+  -- end
+end
+
+local function execute_dvisvgm(idvfile, pages)
+  if #pages < 1 then return nil, "No pages to convert" end
+  local command, logs = prepare_command(idvfile, pages)
+  print(command)
+  os.execute(command)
+  local generated_pages = {}
+  for _, dvisvgmlog in ipairs(logs) do
+    local output = read_log(dvisvgmlog)
+    generated_pages = get_generated_pages(output, generated_pages)
+  end
+  return generated_pages
+end
+
+local function get_dvi_pages(arg)
+  -- list of pages to convert in this run
+  local to_convert = {}
+  local idv_file = arg.input .. ".idv"
+  -- set extension options
+  local extoptions = mkutils.get_filter_settings "dvisvgm_hashes" or {}
+  dvisvgm_options = arg.options or extoptions.options or dvisvgm_options
+  parallel_size = arg.parallel_size or extoptions.parallel_size or parallel_size
+  cpu_cnt = arg.cpu_cnt or extoptions.cpu_cnt or cpu_cnt
+  local f = io.open(idv_file, "r")
+  if not f then return nil, "Cannot open idv file: " .. idv_file end
+  local content = f:read("*all")
+  f:close()
+  local dvi_pages = dvireader.get_pages(content)
+  -- we must find page numbers and output name sfor the generated images
+  local lg = mkutils.parse_lg(arg.input ..".lg")
+  for _, name in ipairs(lg.images) do
+    local page = tonumber(name.page)
+    local hash = dvi_pages[page]
+    local tex4ht_name = name.output
+    local output_name = make_hashed_name(arg.input, hash)
+    output_map[tex4ht_name] = output_name
+    if not mkutils.file_exists(output_name) then
+      print(output_name)
+      to_convert[#to_convert+1] = page
+    end
+  end
+  local generated_files, msg = execute_dvisvgm(idv_file, to_convert)
+  if not generated_files then
+    return nil, msg
+  end
+
+  -- rename the generated files to the hashed filenames
+  for page, file in pairs(generated_files) do
+    os.rename(file, make_hashed_name(arg.input, dvi_pages[page]))
+  end
+
+end
+
+function M.test(format)
+  -- ODT format doesn't support SVG
+  if format == "odt" then return false end
+  return true
+end
+
+function M.modify_build(make)
+  -- this must be used in the .mk4 file as
+  -- Make:dvisvgm_hashes {}
+  make:add("dvisvgm_hashes", function(arg)
+    get_dvi_pages(arg)
+  end, 
+  {
+  })
+
+  -- insert dvisvgm_hashes command at the end of the build sequence -- it needs to be called after t4ht
+  make:dvisvgm_hashes {}
+
+  -- replace original image names with hashed names
+  local executed = false
+  make:match(".*", function(arg)
+    if not executed then
+      executed = true
+      local lgfiles = make.lgfile.files
+      for i, filename in ipairs(lgfiles) do
+        local replace = output_map[filename]
+        if replace then
+          lgfiles[i] = replace
+        end
+      end
+    end
+  end)
+
+  -- fix src attributes
+  local process = filter {
+    function(str)
+      return str:gsub('src="([^"]+)', function(filename)
+        local newname = output_map[filename] or filename
+        print("newname", newname)
+        return 'src="'.. newname 
+      end)
+    end
+  }
+
+  make:match("htm.?$", process)
+
+  -- disable the image processing
+  for _,v in ipairs(make.build_seq) do
+    if v.name == "t4ht" then
+      v.params.t4ht_par = v.params.t4ht_par .. " -p"
+    end
+  end
+  make:image(".", function() return "" end)
+  return make
+end
+
+return M


Property changes on: trunk/Master/texmf-dist/scripts/make4ht/extensions/dvisvgm_hashes.lua
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: trunk/Master/texmf-dist/scripts/make4ht/filters/make4ht-entities-to-unicode.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/filters/make4ht-entities-to-unicode.lua	                        (rev 0)
+++ trunk/Master/texmf-dist/scripts/make4ht/filters/make4ht-entities-to-unicode.lua	2018-08-24 22:19:06 UTC (rev 48477)
@@ -0,0 +1,13 @@
+-- convert Unicode characters encoded as XML entities back to Unicode
+
+-- list of disabled characters
+local disabled = { ["&"] = "&", ["<"] = "<", [">"] = ">" }
+local utfchar = unicode.utf8.char
+return  function(content)
+  return content:gsub("%&%#x([A-Fa-f0-9]+);", function(entity)
+    -- convert hexadecimal entity to Unicode
+    local newchar =  utfchar(tonumber(entity, 16))
+    -- we don't want to break XML validity with forbidden characters
+    return disabled[newchar] or newchar
+  end)
+end


Property changes on: trunk/Master/texmf-dist/scripts/make4ht/filters/make4ht-entities-to-unicode.lua
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: trunk/Master/texmf-dist/scripts/make4ht/formats/odt.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/formats/odt.lua	2018-08-24 22:18:02 UTC (rev 48476)
+++ trunk/Master/texmf-dist/scripts/make4ht/formats/odt.lua	2018-08-24 22:19:06 UTC (rev 48477)
@@ -1,11 +1,266 @@
 local M = {}
 local mkutils = require "mkutils"
+local lfs     = require "lfs"
+local os      = require "os"
+local kpse    = require "kpse"
+local filter  = require "make4ht-filter"
 
+
 function M.prepare_parameters(settings, extensions)
   settings.tex4ht_sty_par = settings.tex4ht_sty_par ..",ooffice"
   settings.tex4ht_par = settings.tex4ht_par .. " ooffice/! -cmozhtf"
-  settings.t4ht_par = settings.t4ht_par .. " -cooxtpipes -coo "
+  -- settings.t4ht_par = settings.t4ht_par .. " -cooxtpipes -coo "
+  -- settings.t4ht_par = settings.t4ht_par .. " -cooxtpipes "
   settings = mkutils.extensions_prepare_parameters(extensions, settings)
   return settings
 end
+
+-- object for working with the ODT file
+local Odtfile = {}
+Odtfile.__index = Odtfile
+
+Odtfile.new = function(archivename)
+  local self = setmetatable({}, Odtfile)
+  -- create temporary directory
+  local tmpname = os.tmpname()
+  tmpname = tmpname:match("([a-zA-Z0-9_%-]+)$")
+  local status, msg = lfs.mkdir(tmpname)
+  if not status then return nil, msg end
+  -- make picture dir
+  lfs.mkdir(tmpname .. "/Pictures")
+  self.archivelocation = tmpname
+  self.name = archivename
+  return self
+end
+
+function Odtfile:copy(src, dest)
+  mkutils.cp(src, self.archivelocation .. "/" .. dest)
+end
+
+function Odtfile:move(src, dest)
+  mkutils.mv(src, self.archivelocation .. "/" .. dest)
+end
+
+function Odtfile:create_dir(dir)
+  local currentdir = lfs.currentdir()
+  lfs.chdir(self.archivelocation)
+  lfs.mkdir(dir)
+  lfs.chdir(currentdir)
+end
+  
+function Odtfile:make_mimetype()
+  self.mimetypename = "mimetype"
+  local m = io.open(self.mimetypename, "w")
+  m:write("application/vnd.oasis.opendocument.text")
+  m:close()
+end
+
+function Odtfile:remove_mimetype()
+  os.remove(self.mimetypename)
+end
+
+
+function Odtfile:pack()
+  local currentdir = lfs.currentdir()
+  local zip_command = mkutils.find_zip()
+  lfs.chdir(self.archivelocation)
+  -- make temporary mime type file
+  self:make_mimetype()
+  os.execute(zip_command .. " -q0X " .. self.name .. " " .. self.mimetypename)
+  -- remove it, so the next command doesn't overwrite it
+  self:remove_mimetype()
+  os.execute(zip_command .." -r " .. self.name .. " *")
+  lfs.chdir(currentdir)
+  mkutils.cp(self.archivelocation .. "/" .. self.name, self.name)
+  mkutils.delete_dir(self.archivelocation)
+end
+
+-- find if tex4ht.jar exists in a path
+local function find_tex4ht_jar(path)
+  local jar_file = path .. "/tex4ht/bin/tex4ht.jar"
+  return mkutils.file_exists(jar_file)
+end
+
+-- return value of TEXMFROOT variable if it exists and if tex4ht.jar can be located inside
+local function get_texmfroot()
+  -- user can set TEXMFROOT environmental variable as the last resort
+  local root_directories = {kpse.var_value("TEXMFROOT"), kpse.var_value("TEXMFDIST"), os.getenv("TEXMFROOT")}
+  for _, root in ipairs(root_directories) do
+    if root then
+      if find_tex4ht_jar(root) then return root end
+      -- TeX live locates files in texmf-dist subdirectory, but Miktex doesn't
+      local path = root .. "/texmf-dist"
+      if find_tex4ht_jar(path) then return path end
+    end
+  end
+end
+
+-- Miktex doesn't seem to set TeX variables such as TEXMFROOT
+-- we will try to find the TeX root using trick with locating package in TeX root
+-- there is a danger that this file is located in TEXMFHOME, the location will fail then
+local function find_texmfroot()
+  local tex4ht_path = kpse.find_file("tex4ht.sty")
+  if tex4ht_path then
+    local path = tex4ht_path:gsub("/tex/generic/tex4ht/tex4ht.sty$","")
+    if find_tex4ht_jar(path) then return path end
+  end
+  return nil
+end
+
+-- call xtpipes from Lua
+local function call_xtpipes(make)
+  -- we must find root of the TeX distribution
+  local selfautoparent = get_texmfroot() or find_texmfroot()
+  if selfautoparent then
+    -- make pattern using TeX distro path
+    local pattern = string.format("java -classpath %s/tex4ht/bin/tex4ht.jar xtpipes -i %s/tex4ht/xtpipes/ -o ${outputfile} ${filename}", selfautoparent, selfautoparent)
+    -- call xtpipes on a temporary file
+    local matchfunction =  function(filename)
+      -- move the matched file to a temporary file, xtpipes will write it back to the original file
+      local basename = mkutils.remove_extension(filename)
+      local tmpfile = basename ..".tmp"
+      mkutils.mv(filename, tmpfile)
+      local command = pattern % {filename = tmpfile, outputfile = filename}
+      print(command)
+      local status = os.execute(command)
+      if status > 0 then
+        -- if xtpipes failed to process the file, it may mean that it was bad-formed xml
+        -- we can try to make it well-formed using Tidy
+        local tidy_command = "tidy -utf8 -xml -asxml -q -o ${filename} ${tmpfile}" % {tmpfile = tmpfile, filename = filename}
+        print("xtpipes failed trying tidy")
+        print(tidy_command)
+        local status = os.execute(tidy_command)
+        if status > 0 then
+          -- if tidy failed as well, just use the original file
+          -- it will probably produce corrupted ODT file though
+          print("Tidy failed as well")
+          mkutils.mv(tmpfile, filename)
+        end
+      end
+    end
+    make:match("4oo", matchfunction)
+    make:match("4om", matchfunction)
+    -- is is necessary to execute the above matches as first in the build file
+    local matches = make.matches
+    -- move last match to a first place
+    local function move_matches()
+      local last = matches[#matches]
+      table.insert(matches, 1, last)
+      matches[#matches] = nil
+    end
+    -- we need to move last two matches, for 4oo and 4om files
+    move_matches()
+    move_matches()
+  else
+    print "Cannot locate xtpipes. Try to set TEXMFROOT variable to a root directory of your TeX distribution"
+  end
+end
+
+-- sort output files according to their extensions
+local function prepare_output_files(lgfiles)
+  local groups = {}
+  for _, name in ipairs(lgfiles) do
+    local basename, extension = name:match("(.-)%.([^%.]+)$")
+    local group = groups[extension] or {}
+    table.insert(group, basename)
+    groups[extension] = group
+    print(basename, extension)
+  end
+  return groups
+end
+
+-- execute function on all files in the group
+-- function fn takes current filename and table with various attributes
+local function exec_group(groups, name, fn)
+  for _, basename in ipairs(groups[name] or {}) do
+    fn{basename = basename, extension=name, filename = basename .. "." .. name}
+  end
+end
+
+function M.modify_build(make)
+  local executed = false
+  -- execute xtpipes from the build file, instead of t4ht. this fixes issues with wrong paths
+  -- expanded in tex4ht.env in Miktex or Debian
+  call_xtpipes(make)
+  -- convert XML entities for Unicoe characters produced by Xtpipes to characters
+  local fixentities = filter {"entities-to-unicode"}
+  make:match("4oo", fixentities)
+  make:match("4om", fixentities)
+  -- build the ODT file. This match must be executed as a last one
+  -- this will be executed as a first match, just to find the last filename 
+  -- in the lgfile
+  make:match(".*", function()
+    -- execute it only once
+    if not executed then
+      -- this is list of processed files
+      local lgfiles = make.lgfile.files
+      -- find the last one
+      local lastfile = lgfiles[#lgfiles] .."$"
+      -- make match for the last file
+      -- odt packing will be done here
+      make:match(lastfile, function()
+        local groups = prepare_output_files(make.lgfile.files)
+        local basename = groups.odt[1]
+        local odtname = basename .. ".odt"
+        local odt,msg = Odtfile.new(odtname)
+        if not odt then
+          print("Cannot create ODT file: " .. msg)
+        end
+        -- helper function for simple file moving
+        local function move_file(group, dest)
+          exec_group(groups, group, function(par)
+            odt:move("${filename}" % par, dest)
+          end)
+        end
+
+        -- the document text
+        exec_group(groups, "4oo", function(par)
+          odt:move("${filename}" % par, "content.xml")
+          odt:create_dir("Pictures")
+        end)
+
+        -- manifest
+        exec_group(groups, "4of", function(par)
+          odt:create_dir("META-INF")
+          odt:move("${filename}" % par, "META-INF/manifest.xml")
+        end)
+
+        -- math
+        exec_group(groups, "4om", function(par)
+          odt:create_dir(par.basename)
+          odt:move("${filename}" % par, "${basename}/content.xml" % par)
+          -- copy the settings file to math subdir
+          local settings = groups["4os"][1]
+          odt:copy(settings .. ".4os", "${basename}/settings.xml" % par)
+        end)
+
+        -- these files are created only once, so it doesn't matter that they are
+        -- copied to one file
+        move_file("4os", "settings.xml")
+        move_file("4ot", "meta.xml")
+        move_file("4oy", "styles.xml")
+
+        -- pictures
+        exec_group(groups, "4og", function(par)
+          -- add support for images in the TEXMF tree
+          if not mkutils.file_exists(par.basename) then
+            par.basename = kpse.find_file(par.basename, "graphic/figure")
+            if not par.basename then return nil, "Cannot find picture" end
+          end
+          -- the Pictues dir is flat, without subdirs
+          odt:copy("${basename}" % par, "Pictures")
+        end)
+
+        -- remove some spurious file
+        exec_group(groups, "4od", function(par)
+          os.remove(par.filename)
+        end)
+
+        odt:pack()
+      end)
+    end
+    executed = true
+  end)
+  return make
+end
 return M

Modified: trunk/Master/texmf-dist/scripts/make4ht/make4ht
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/make4ht	2018-08-24 22:18:02 UTC (rev 48476)
+++ trunk/Master/texmf-dist/scripts/make4ht/make4ht	2018-08-24 22:19:06 UTC (rev 48477)
@@ -27,7 +27,7 @@
 
 -- set version number. the template should be replaced by the
 -- actual version number by the build script
-local version = "v0.2b"
+local version = "v0.2c"
 mkparams.version_number = version
 
 local args = mkparams.get_args()
@@ -92,6 +92,11 @@
 if #extensions > 0 then
   make = mkutils.extensions_modify_build(extensions, make)
 end
+
+-- allow output formats to modify the build process at the end
+make = formatter.modify_build(make) or make
+
+
 make:match("tmp$", function() return false,"tmp file" end)
 make:match(".*",function(filename,par)
 	local outdir =  '' --par["outdir"] and par["outdir"] .."/" or ''

Added: trunk/Master/texmf-dist/scripts/make4ht/make4ht-dvireader.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/make4ht-dvireader.lua	                        (rev 0)
+++ trunk/Master/texmf-dist/scripts/make4ht/make4ht-dvireader.lua	2018-08-24 22:19:06 UTC (rev 48477)
@@ -0,0 +1,182 @@
+-- This is not actually full DVI reader. It just calculates hash for each page,
+-- so it can be detected if it changed between compilations and needs to be
+-- converted to image using Dvisvgm or Dvipng
+--
+-- information about DVI format is from here: https://web.archive.org/web/20070403030353/http://www.math.umd.edu/~asnowden/comp-cont/dvi.html
+--
+local M
+
+-- the file after post_post is filled with bytes 223
+local endfill = 223
+
+-- numbers of bytes for each data type in DVI file
+local int = 4
+local byte = 1
+local sixteen = 2
+
+local function read_char(str, pos)
+  if pos and pos > string.len(str) then return nil end
+  return string.sub(str, pos, pos + 1)
+end
+
+local function read_byte(str, pos)
+  return string.byte(read_char(str, pos))
+end
+
+-- DVI file format uses signed big endian integers. This code doesn't take into account 
+-- the sign, so it will return incorrect result for negative numbers. It doesn't matter 
+-- for the original purpose of this library, but it should be fixed for general use.
+local function read_integer(str, pos)
+  local first = read_byte(str, pos)
+  local num = first * (256 ^ 3)
+  num = read_byte(str, pos + 1) * (256 ^ 2) + num
+  num = read_byte(str, pos + 2) * 256  + num
+  num = read_byte(str, pos + 3) + num
+  return num
+end
+
+local function read_sixteen(str, pos)
+  local num = read_byte(str, pos) * 256 
+  num = read_byte(str, pos + 1) + num
+  return num
+end
+
+-- select reader function with number of bytes of an argument
+local readers = {
+  [byte] = read_byte,
+  [int] = read_integer,
+  [sixteen] = read_sixteen
+}
+
+
+local opcodes = {
+  post_post = {
+    opcode = 249, args = {
+      {name="q", type = int}, -- postamble address
+      {name="i", type = byte}
+    }
+  },
+  post = {
+    opcode = 248,
+    args = {
+      {name="p", type = int}, -- address of the last page
+      {name="num", type = int},
+      {name="den", type = int},
+      {name="mag", type = int},
+      {name="l", type = int},
+      {name="u", type = int},
+      {name="s", type = sixteen},
+      {name="t", type = sixteen},
+    }
+  },
+  bop = {
+    opcode = 139,
+    args = {
+      {name="c0", type=int},
+      {name="c1", type=int},
+      {name="c2", type=int},
+      {name="c3", type=int},
+      {name="c4", type=int},
+      {name="c5", type=int},
+      {name="c6", type=int},
+      {name="c7", type=int},
+      {name="c8", type=int},
+      {name="c9", type=int},
+      {name="p", type=int}, -- previous page
+    }
+  }
+}
+
+local function read_arguments(str, pos, args)
+  local t = {}
+  for _, v in ipairs(args) do
+    local fn =  readers[v.type]
+    t[v.name] = fn(str, pos)
+    -- seek the position. v.type contains size of the current data type in bytes
+    pos = pos + v.type
+  end
+  return t
+end
+
+local function read_opcode(opcode, str, pos)
+  local format = opcodes[opcode]
+  if not format then return nil, "Cannot find opcode format: " .. opcode end
+  -- check that opcode byte in the current position is the same as required opcode
+  local op = read_byte(str, pos)
+  if op ~= format.opcode then return nil, "Wrong opcode " .. op " at position " .. pos end
+  return read_arguments(str, pos+1, format.args)
+end
+
+-- find the postamble address
+local function get_postamble_addr(dvicontent)
+  local pos = string.len(dvicontent)
+  local last = read_char(dvicontent, pos)
+  -- skip endfill bytes at the end of file
+  while string.byte(last) == endfill do
+    pos = pos - 1
+    last = read_char(dvicontent, pos)
+  end
+  -- first read post_post to get address of the postamble
+  local post_postamble, msg = read_opcode("post_post", dvicontent, pos-5)
+  if not post_postamble then return nil, msg end
+  -- return the postamble address
+  return post_postamble.q + 1
+  -- return read_opcode("post", dvicontent, post_postamble.q + 1)
+
+end
+
+local function read_page(str, start, stop)
+  local function get_end_of_page(str, pos)
+    if read_byte(str, pos) == 140 then -- end of page
+      return pos
+    end
+    return get_end_of_page(str, pos - 1)
+  end
+  -- we reached the end of file
+  if start == 2^32-1 then return nil end
+  local current_page = read_opcode("bop", str,  start + 1)
+  if not current_page then return nil end
+  local endofpage = get_end_of_page(str, stop)
+  -- get the page contents, but skip all parameters, because they can change
+  -- (especially pointer to the previous page)
+  local page = str:sub(start + 46, endofpage) 
+  local page_obj = {
+    number = current_page.c0, -- the page number
+    hash = md5.sumhexa(page) -- hash the page contents
+  }
+  return page_obj, current_page.p, start
+end
+
+local function get_pages(dvicontent)
+  local pages = {}
+  local postamble_pos = get_postamble_addr(dvicontent)
+  local postamble = read_opcode("post", dvicontent, postamble_pos)
+  local next_page_pos = postamble.p 
+  local page, previous_page = nil, postamble_pos
+  local page_sequence = {}
+  while next_page_pos do
+    page, next_page_pos, previous_page = read_page(dvicontent, next_page_pos, previous_page)
+    page_sequence[#page_sequence+1] = page
+  end
+
+  -- reorder pages
+  for _, v in ipairs(page_sequence) do
+    pages[v.number] = v.hash
+  end
+  return pages
+
+end
+
+-- if arg[1] then
+--   local f = io.open(arg[1], "r")
+--   local dvicontent = f:read("*all")
+--   f:close()
+--   local pages = get_pages(dvicontent)
+--   for k,v in pairs(pages) do 
+--     print(k,v)
+--   end
+-- end
+
+return {
+  get_pages = get_pages
+}


Property changes on: trunk/Master/texmf-dist/scripts/make4ht/make4ht-dvireader.lua
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Modified: trunk/Master/texmf-dist/scripts/make4ht/mkparams.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/mkparams.lua	2018-08-24 22:18:02 UTC (rev 48476)
+++ trunk/Master/texmf-dist/scripts/make4ht/mkparams.lua	2018-08-24 22:19:06 UTC (rev 48477)
@@ -118,6 +118,8 @@
   if not latex_cli_params:match("%-jobname") then
     -- we must strip out directories from jobname when full path to document is given
     input = input:match("([^%/^%\\]+)$")
+    -- input also cannot contain spaces, replace them with underscores
+    input = input:gsub("%s", "_")
     table.insert(latex_params,"-jobname="..input)
   else
     -- when user specifies -jobname, we must change name of the input file,

Modified: trunk/Master/texmf-dist/scripts/make4ht/mkutils.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/mkutils.lua	2018-08-24 22:18:02 UTC (rev 48476)
+++ trunk/Master/texmf-dist/scripts/make4ht/mkutils.lua	2018-08-24 22:19:06 UTC (rev 48477)
@@ -118,6 +118,20 @@
 	os.execute(command)
 end
 
+function mv(src, dest)
+  local mv_func = os.type == "unix" and "mv " or "move "
+	local command = string.format('%s "%s" "%s"', mv_func, src, dest)
+  -- fix windows paths
+	if mv_func == "move" then command = command:gsub("/",'\\') end
+  print("Move: ".. command)
+  os.execute(command)
+end
+
+function delete_dir(path)
+  local cmd = os.type == "unix" and "rm -rd " or "rd /s/q "
+  os.execute(cmd .. path)
+end
+
 local used_dir = {}
 
 function prepare_path(path)
@@ -209,15 +223,26 @@
 	return true
 end
 
+-- find the zip command
+function find_zip()
+  if io.popen("zip -v","r"):close() then
+    return "zip"
+  elseif io.popen("miktex-zip -v","r"):close() then
+    return "miktex-zip"
+  end
+  -- we cannot find the zip command
+  return "zip"
+end
+
 -- Config loading
 local function run(untrusted_code, env)
-	if untrusted_code:byte(1) == 27 then return nil, "binary bytecode prohibited" end
-	local untrusted_function = nil
+  if untrusted_code:byte(1) == 27 then return nil, "binary bytecode prohibited" end
+  local untrusted_function = nil
   untrusted_function, message = load(untrusted_code, nil, "t",env)
-	if not untrusted_function then return nil, message end
-	if not setfenv then setfenv = function(a,b) return true end end
-	setfenv(untrusted_function, env)
-	return pcall(untrusted_function)
+  if not untrusted_function then return nil, message end
+  if not setfenv then setfenv = function(a,b) return true end end
+  setfenv(untrusted_function, env)
+  return pcall(untrusted_function)
 end
 
 local main_settings = {}
@@ -283,9 +308,9 @@
   end
 end
 env.Font   = function(s)
-	local font_name = s["name"]
-	if not font_name then return nil, "Cannot find font name" end
-	env.settings.fonts[font_name] = s
+  local font_name = s["name"]
+  if not font_name then return nil, "Cannot find font name" end
+  env.settings.fonts[font_name] = s
 end
 
 env.Make   = make4ht.Make
@@ -294,22 +319,22 @@
 
 -- this function reads the LaTeX log file and tries to detect fatal errors in the compilation
 local function testlogfile(par)
-	local logfile = par.input .. ".log"
-	local f = io.open(logfile,"r")
-	if not f then
-		print("Make4ht: cannot open log file "..logfile)
-		return 1
-	end
-	local len = f:seek("end")
+  local logfile = par.input .. ".log"
+  local f = io.open(logfile,"r")
+  if not f then
+    print("Make4ht: cannot open log file "..logfile)
+    return 1
+  end
+  local len = f:seek("end")
   -- test only the end of the log file, no need to run search functions on everything
   local newlen = len - 1256
   -- but the value to seek must be greater than 0
   newlen = (newlen > 0) and newlen or 0
-	f:seek("set", newlen)
-	local text = f:read("*a")
-	f:close()
-	if text:match("No pages of output") or text:match("TeX capacity exceeded, sorry") then return 1 end
-	return 0
+  f:seek("set", newlen)
+  local text = f:read("*a")
+  f:close()
+  if text:match("No pages of output") or text:match("TeX capacity exceeded, sorry") then return 1 end
+  return 0
 end
 
 
@@ -327,7 +352,7 @@
 "\\HCode\\expandafter\\def\\csname tex4ht\\endcsname{#1,html}\\def"..
 "\\HCode####1{\\documentstyle[tex4ht,}\\@ifnextchar[{\\HCode}{"..
 "\\documentstyle[tex4ht]}}}\\makeatother\\HCode ${tex4ht_sty_par}.a.b.c."..
-"\\input ${tex_file}'"
+"\\input \"\\detokenize{${tex_file}}\"'"
 
 env.Make:add("htlatex",function(par)
   local command = Make.latex_command
@@ -402,6 +427,7 @@
     local format = assert(require(format_library))
     if format then
       format.prepare_extensions = format.prepare_extensions or function(extensions) return extensions end
+      format.modify_build = format.modify_build or function(make) return make end
     end
     return format
   end



More information about the tex-live-commits mailing list