texlive[60077] trunk: make4ht (26jul21)

commits+karl at tug.org commits+karl at tug.org
Mon Jul 26 22:29:50 CEST 2021


Revision: 60077
          http://tug.org/svn/texlive?view=revision&revision=60077
Author:   karl
Date:     2021-07-26 22:29:50 +0200 (Mon, 26 Jul 2021)
Log Message:
-----------
make4ht (26jul21)

Modified Paths:
--------------
    trunk/Build/source/texk/texlive/linked_scripts/make4ht/make4ht
    trunk/Build/source/texk/texlive/linked_scripts/texlive/fmtutil.pl
    trunk/Master/texmf-dist/doc/support/make4ht/README
    trunk/Master/texmf-dist/doc/support/make4ht/changelog.tex
    trunk/Master/texmf-dist/doc/support/make4ht/make4ht-doc.pdf
    trunk/Master/texmf-dist/doc/support/make4ht/readme.tex
    trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-collapsetoc.lua
    trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-joincolors.lua
    trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-tablerows.lua
    trunk/Master/texmf-dist/scripts/make4ht/extensions/make4ht-ext-common_domfilters.lua
    trunk/Master/texmf-dist/scripts/make4ht/extensions/make4ht-ext-staticsite.lua
    trunk/Master/texmf-dist/scripts/make4ht/make4ht
    trunk/Master/texmf-dist/scripts/make4ht/mkutils.lua

Added Paths:
-----------
    trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-itemparagraphs.lua
    trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-sectionid.lua

Removed Paths:
-------------
    trunk/Master/texmf-dist/scripts/make4ht/extensions/make4ht-ext-build_changed.lua
    trunk/Master/texmf-dist/scripts/make4ht/make4ht-odtfilter.lua

Modified: trunk/Build/source/texk/texlive/linked_scripts/make4ht/make4ht
===================================================================
--- trunk/Build/source/texk/texlive/linked_scripts/make4ht/make4ht	2021-07-26 20:29:23 UTC (rev 60076)
+++ trunk/Build/source/texk/texlive/linked_scripts/make4ht/make4ht	2021-07-26 20:29:50 UTC (rev 60077)
@@ -29,7 +29,7 @@
 
 -- set version number. the template should be replaced by the
 -- actual version number by the build script
-local version = "v0.3g"
+local version = "v0.3h"
 mkparams.version_number = version
 
 local args = mkparams.get_args()
@@ -76,6 +76,9 @@
 if make:length() < 1 then
 	if mode == "draft" then
 		make:htlatex()
+  elseif mode == "clean" then
+    make:clean()
+    make.no_dvi_process = true
 	else
 		make:htlatex()
 		make:htlatex()
@@ -84,7 +87,7 @@
 end
 
 
-if not args["no-tex4ht"] then
+if not args["no-tex4ht"] and not make.no_dvi_process then
   make:tex4ht()
 end
 
@@ -92,7 +95,11 @@
 if #make.image_patterns > 0 then
   make.params.t4ht_par = make.params.t4ht_par .. " -p"
 end
-make:t4ht {ext = ext}
+
+if not make.no_dvi_process then
+  make:t4ht {ext = ext}
+end
+
 -- run extensions which modify the build sequence
 if #extensions > 0 then
   make = mkutils.extensions_modify_build(extensions, make)

Modified: trunk/Build/source/texk/texlive/linked_scripts/texlive/fmtutil.pl
===================================================================
--- trunk/Build/source/texk/texlive/linked_scripts/texlive/fmtutil.pl	2021-07-26 20:29:23 UTC (rev 60076)
+++ trunk/Build/source/texk/texlive/linked_scripts/texlive/fmtutil.pl	2021-07-26 20:29:50 UTC (rev 60077)
@@ -1,5 +1,5 @@
 #!/usr/bin/env perl
-# $Id: fmtutil.pl 59983 2021-07-18 22:18:08Z karl $
+# $Id: fmtutil.pl 60057 2021-07-25 18:09:03Z karl $
 # fmtutil - utility to maintain format files.
 # (Maintained in TeX Live:Master/texmf-dist/scripts/texlive.)
 # 
@@ -24,11 +24,11 @@
   TeX::Update->import();
 }
 
-my $svnid = '$Id: fmtutil.pl 59983 2021-07-18 22:18:08Z karl $';
-my $lastchdate = '$Date: 2021-07-19 00:18:08 +0200 (Mon, 19 Jul 2021) $';
+my $svnid = '$Id: fmtutil.pl 60057 2021-07-25 18:09:03Z karl $';
+my $lastchdate = '$Date: 2021-07-25 20:09:03 +0200 (Sun, 25 Jul 2021) $';
 $lastchdate =~ s/^\$Date:\s*//;
 $lastchdate =~ s/ \(.*$//;
-my $svnrev = '$Revision: 59983 $';
+my $svnrev = '$Revision: 60057 $';
 $svnrev =~ s/^\$Revision:\s*//;
 $svnrev =~ s/\s*\$$//;
 my $version = "r$svnrev ($lastchdate)";
@@ -432,12 +432,14 @@
   #
   # for formats that load other formats (e.g., jadetex loads latex.fmt),
   # add the current directory to TEXFORMATS, too.  Currently unnecessary
-  # for MFBASES and MPMEMS.
+  # for MFBASES.
   $ENV{'TEXFORMATS'} ||= "";
   $ENV{'TEXFORMATS'} = "$tmpdir$sep$ENV{TEXFORMATS}";
 
-  # switch to temporary directory for format generation
-  $opts{"dry-run"} || chdir($tmpdir)
+  # switch to temporary directory for format generation; on the other hand,
+  # for -n, the tmpdir won't exist, but we don't want to find a spurious
+  # tex.fmt in the cwd. Probably won't be such things in /.
+  chdir($opts{"dry-run"} ? "/" : $tmpdir)
   || die "Cannot change to directory $tmpdir: $!";
   
   # we rebuild formats in two rounds:
@@ -772,63 +774,72 @@
                   . "$prgswitch $texargs";
   print_verbose("running \`$cmdline' ...\n");
 
-  {
-    my $texpool = $ENV{'TEXPOOL'};
-    if ($localpool) {
-      $ENV{'TEXPOOL'} = cwd() . $sep . ($texpool ? $texpool : "");
-    }
+  my $texpool = $ENV{'TEXPOOL'};
+  if ($localpool) {
+    $ENV{'TEXPOOL'} = cwd() . $sep . ($texpool ? $texpool : "");
+  }
 
-    # in mktexfmtMode we must redirect *all* output to stderr
-    $cmdline .= " >&2" if $mktexfmtMode;
-    $cmdline .= " <$nul";
-    my $retval = system("$DRYRUN$cmdline");
-    
-    # report error if it failed.
-    if ($retval != 0) {
-      $retval /= 256 if ($retval > 0);
-      print_deferred_error("running \`$cmdline' return status: $retval\n");
-    }
+  # in mktexfmtMode we must redirect *all* output to stderr
+  $cmdline .= " >&2" if $mktexfmtMode;
+  $cmdline .= " <$nul";
+  my $retval = system("$DRYRUN$cmdline");
 
-    # Copy the log file after the program is run, so that the log file
-    # is available to inspect even on failure. So we need the dest dir tree.
-    TeXLive::TLUtils::mkdirhier($destdir) if ! $opts{"dry-run"};
-    #
+  # report error if it failed.
+  if ($retval != 0) {
+    $retval /= 256 if ($retval > 0);
+    print_deferred_error("running \`$cmdline' return status: $retval\n");
+  }
+
+  # Copy the log file after the program is run, so that the log file
+  # is available to inspect even on failure. So we need the dest dir tree.
+  TeXLive::TLUtils::mkdirhier($destdir) if ! $opts{"dry-run"};
+  #
+  if ($opts{"dry-run"}) {
+    print_info("would copy log file to: $destdir/$logfile\n");
+  } else {
     # Here and in the following we use copy instead of move
     # to make sure that in SElinux enabled cases the rules of
     # the destination directory are applied.
     # See https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=900580
+    # 
     if (TeXLive::TLUtils::copy("-f", $logfile, "$destdir/$logfile")) {
       print_info("log file copied to: $destdir/$logfile\n");
     } else {
-      print_deferred_error("cannot copy log $logfile to: $destdir\n")
-        unless $opts{"dry-run"};
+      print_deferred_error("failed to copy log $logfile to: $destdir\n");
     }
+  }
 
-    # original shell script did *not* check the return value
-    # we keep this behavior, but add an option --strict that
-    # errors out on all failures.
-    if ($retval != 0 && $opts{'strict'}) {
-      print_deferred_error("returning error due to option --strict\n");
-      return $FMT_FAILURE;
-    }
+  # original shell script did *not* check the return value
+  # we keep this behavior, but add an option --strict that
+  # errors out on all failures.
+  if ($retval != 0 && $opts{'strict'}) {
+    print_deferred_error("returning error due to option --strict\n");
+    return $FMT_FAILURE;
+  }
 
-    if ($localpool) {
-      if ($texpool) {
-        $ENV{'TEXPOOL'} = $texpool;
-      } else {
-        delete $ENV{'TEXPOOL'};
-      }
+  if ($localpool) {
+    if ($texpool) {
+      $ENV{'TEXPOOL'} = $texpool;
+    } else {
+      delete $ENV{'TEXPOOL'};
     }
   }
 
+  # if this was a dry run, we don't expect anything to have been
+  # created, so there's nothing to inspect or copy. Call it good.
+  if ($opts{"dry-run"}) {
+    print_info("dry run, so returning success: $fmtfile\n");
+    return $FMT_SUCCESS;
+  }
+
   # check and install of fmt and log files
-  if (! -f $fmtfile) {
-    print_deferred_error("\`$cmdline' failed (no $fmtfile)\n");
+  if (! -s $fmtfile) {
+    print_deferred_error("no (or empty) $fmtfile made by: $cmdline\n");
     return $FMT_FAILURE;
   }
 
   if (! -f $logfile) {
-    print_deferred_error("no log file generated for $fmt/$eng, strange\n");
+    print_deferred_error("no log file generated for: $fmt/$eng\n");
     return $FMT_FAILURE;
   }
 
@@ -1243,7 +1254,7 @@
 # returns 1 if actually saved due to changes
 sub save_fmtutil {
   my $fn = shift;
-  return if $opts{'dry-run'};
+  return 0 if $opts{'dry-run'};
   my %fmtf = %{$alldata->{'fmtutil'}{$fn}};
   if ($fmtf{'changed'}) {
     TeXLive::TLUtils::mkdirhier(dirname($fn));

Modified: trunk/Master/texmf-dist/doc/support/make4ht/README
===================================================================
--- trunk/Master/texmf-dist/doc/support/make4ht/README	2021-07-26 20:29:23 UTC (rev 60076)
+++ trunk/Master/texmf-dist/doc/support/make4ht/README	2021-07-26 20:29:50 UTC (rev 60077)
@@ -175,6 +175,19 @@
 
     $ make4ht -um draft filename.tex
 
+Another buil-in mode is `clean`. It executes the `Make:clean()` command to
+remove all generated and temporary files from the current directory. 
+No \LaTeX\ compilation happens in this mode. 
+
+It should be used in this way:
+    
+    # copy generated files to a direcory
+    $ make4ht -d outdir filename.tex 
+    # remove all generated files in the current dir
+    # the -a info option will print files that are removed
+    $ make4ht -m clean -a info filename.tex
+    
+
 More information about the build files can be found in section \ref{sec:buildfiles}.
 
 ## Handling of the generated files
@@ -248,7 +261,7 @@
 
 :    clean the HTML file using DOM filters. It is more powerful than
 `common_filters`. Used DOM filters are `fixinlines`, `idcolons`,
-`joincharacters`, and `tablerows`.
+`joincharacters`, `sectionid` and `tablerows`.
 
 detect\_engine
 
@@ -416,6 +429,12 @@
 
 :    One call to the TeX engine with special configuration for loading of the `tex4ht.sty` package.
 
+`Make:clean`
+
+:    This command removes all generated files, including images, HTML files and
+     various auxilary files, from the current directory. It keeps files whose
+     file names don't match the input file name.
+
 `Make:httex`
 
 :    Variant of `Make:htlatex` suitable for Plain \TeX.
@@ -653,6 +672,12 @@
 
 :  fix common issues for MathML.
 
+sectionid
+
+:  create `id` attribute for HTML sectioning elements derived from the section
+   title. It also updates links to these sections. Use the `notoc` command line
+   option to prevent that.
+
 t4htlinks
 
 :  fix hyperlinks in the ODT format.
@@ -935,7 +960,7 @@
 
 `toc_query` 
 
-:  CSS selector for selecting the table of contents container. 
+:  CSS selector for selection of element that contains the table of contents. 
 
 `title_query`
 
@@ -945,12 +970,33 @@
 
 :  table containing a hierarchy of classes used in TOC
 
+`max_depth`
+
+:  set detph of displayed children TOC levels
+
 Default values:
 
     filter_settings "collapsetoc" {
       toc_query = ".tableofcontents",
-      title_query = ".partHead a, .chapterHead a, .sectionHead a, .subsectionHead a",
-      toc_levels = {"partToc", "chapterToc", "sectionToc", "subsectionToc", "subsubsectionToc"}
+      title_query = "h1 a, h2 a, h3 a, h4 a, h5 a, h6 a",
+      max_depth = 1,
+      toc_levels = {
+        tocpart = 1,
+        toclikepart = 1,
+        tocappendix = 1,
+        toclikechapter = 2,
+        tocchapter = 2,
+        tocsection = 3,
+        toclikesection = 3,
+        tocsubsection = 4,
+        toclikesubsection = 4,
+        tocsubsubsection = 5,
+        toclikesubsubsection = 5,
+        tocparagraph = 6,
+        toclikeparagraph = 6,
+        tocsubparagraph = 7,
+        toclikesubparagraph = 7,
+      }
     }
 
 ## The `fixinlines` dom filter 
@@ -1034,15 +1080,22 @@
 table value is a function, it is executed with current parameters and HTML page
 DOM object as arguments.
 
+remove\_maketitle
+
+:  the `staticsite` extension removes text produced by the `\maketitle` command by default. Set this 
+option to `false` to disable the removal.
+
 Example:
 
 
+    -- set the environmental variable 'blog_root' with path to 
+    -- the directory that should hold the generated HTML files
     local outdir = os.getenv "blog_root" 
     
     filter_settings "staticsite" {
       site_root = outdir, 
       map = {
-        [".css$"] = "../css/"
+        [".css$"] = "/css/"
       },
       header = {
          layout="post",
@@ -1262,6 +1315,20 @@
 
 The former way is preferable, though.
 
+## Table of Contents points to a wrong destination
+
+The `sectionid` DOM filter creates better link destinations for sectioning commands.
+In some cases, for example if you use Pandoc, the document may already contain the
+link destination with the same name. In such cases the original destination is preserved 
+in the file. In this case links to the section will point to that place, instead of
+correct destination in the section. This may happen for example if you use Pandoc for
+the Markdown to \LaTeX\ conversion. It creates `\hypertarget` commands that are placed 
+just before section. The links points to that place, instead of the actual section. 
+
+In this case you don't want to update links. Use the `notoc` option to prevent that.
+
+
+
 ## Filenames containing spaces
 
 `tex4ht` command cannot handle filenames containing spaces. to fix this issue, `make4ht` 

Modified: trunk/Master/texmf-dist/doc/support/make4ht/changelog.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/make4ht/changelog.tex	2021-07-26 20:29:23 UTC (rev 60076)
+++ trunk/Master/texmf-dist/doc/support/make4ht/changelog.tex	2021-07-26 20:29:50 UTC (rev 60077)
@@ -3,6 +3,140 @@
 
 \begin{itemize}
 \item
+  2021/07/25
+
+  \begin{itemize}
+  \tightlist
+  \item
+    version \texttt{0.3h} released.
+  \end{itemize}
+\item
+  2021/07/25
+
+  \begin{itemize}
+  \tightlist
+  \item
+    use current directory as default output dir in \texttt{staticsite}
+    extension.
+  \end{itemize}
+\item
+  2021/07/23
+
+  \begin{itemize}
+  \tightlist
+  \item
+    fixed detection of single paragraphs inside
+    \texttt{\textless{}li\textgreater{}} in the \texttt{itemparagraphs}
+    DOM filter.
+  \end{itemize}
+\item
+  2021/07/18
+
+  \begin{itemize}
+  \tightlist
+  \item
+    remove elements produced by \texttt{\textbackslash{}maketitle} in
+    the \texttt{staticsite} extension.
+  \end{itemize}
+\item
+  2021/07/05
+
+  \begin{itemize}
+  \tightlist
+  \item
+    sort colors alphabetically in the \texttt{joincolors} DOM filter to
+    enable reproducible builds.
+  \end{itemize}
+\item
+  2021/06/26
+
+  \begin{itemize}
+  \tightlist
+  \item
+    rewrote the \texttt{collapsetoc} DOM filter.
+  \end{itemize}
+\item
+  2021/06/20
+
+  \begin{itemize}
+  \tightlist
+  \item
+    test for the \texttt{svg} picture mode in the \texttt{tex4ht}
+    command. Use the \texttt{-g.svg} option if it is detected. This is
+    necessary for correct support of pictorial characters.
+  \end{itemize}
+\item
+  2021/06/16
+
+  \begin{itemize}
+  \tightlist
+  \item
+    better handling of duplicate ID attributes in \texttt{sectionid} DOM
+    filter.
+  \item
+    support \texttt{notoc} option in \texttt{sectionid}.
+  \end{itemize}
+\item
+  2021/06/13
+
+  \begin{itemize}
+  \tightlist
+  \item
+    added \texttt{itemparagraphs} DOM filter. It removes unnecessary
+    paragraphs from \texttt{\textless{}li\textgreater{}} elements.
+  \end{itemize}
+\item
+  2021/05/06
+
+  \begin{itemize}
+  \tightlist
+  \item
+    remove \texttt{\textless{}hr\textgreater{}} elements in
+    \texttt{.hline} rows in \texttt{tablerows} DOM filter.
+  \end{itemize}
+\item
+  2021/05/01
+
+  \begin{itemize}
+  \tightlist
+  \item
+    added function \texttt{mkutils.isModuleAvailable}. It checks if Lua
+    library is available.
+  \item
+    check for \texttt{char-def} library in \texttt{sectionid} DOM
+    filter.
+  \end{itemize}
+\item
+  2021/04/08
+
+  \begin{itemize}
+  \tightlist
+  \item
+    removed \texttt{build\_changed}. New script,
+    \href{https://github.com/michal-h21/siterebuild}{siterebuild},
+    should be used instead.
+  \item
+    new DOM filter, \texttt{sectionid}. It uses sanitized titles instead
+    of automatically generated numbers as section IDs.
+  \item
+    added \texttt{sectionid} to \texttt{common\_domfilters}.
+  \item
+    use \texttt{context} in the Docker file, because it contains the
+    \texttt{char-def.lua} file.
+  \end{itemize}
+\item
+  2021/03/20
+
+  \begin{itemize}
+  \tightlist
+  \item
+    use \texttt{kpse} library when files are copied to the output
+    directory.
+  \item
+    added \texttt{clean} mode. It removes all generated, temporary and
+    auxilary files.
+  \end{itemize}
+\item
   2021/03/19
 
   \begin{itemize}

Modified: trunk/Master/texmf-dist/doc/support/make4ht/make4ht-doc.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/doc/support/make4ht/readme.tex
===================================================================
--- trunk/Master/texmf-dist/doc/support/make4ht/readme.tex	2021-07-26 20:29:23 UTC (rev 60076)
+++ trunk/Master/texmf-dist/doc/support/make4ht/readme.tex	2021-07-26 20:29:50 UTC (rev 60077)
@@ -232,6 +232,21 @@
 $ make4ht -um draft filename.tex
 \end{verbatim}
 
+Another buil-in mode is \texttt{clean}. It executes the
+\texttt{Make:clean()} command to remove all generated and temporary
+files from the current directory. No \LaTeX~compilation happens in this
+mode.
+
+It should be used in this way:
+
+\begin{verbatim}
+# copy generated files to a direcory
+$ make4ht -d outdir filename.tex 
+# remove all generated files in the current dir
+# the -a info option will print files that are removed
+$ make4ht -m clean -a info filename.tex
+\end{verbatim}
+
 More information about the build files can be found in section
 \ref{sec:buildfiles}.
 
@@ -328,7 +343,8 @@
 \item[common\_domfilters]
 clean the HTML file using DOM filters. It is more powerful than
 \texttt{common\_filters}. Used DOM filters are \texttt{fixinlines},
-\texttt{idcolons}, \texttt{joincharacters}, and \texttt{tablerows}.
+\texttt{idcolons}, \texttt{joincharacters}, \texttt{sectionid} and
+\texttt{tablerows}.
 \item[detect\_engine]
 detect engine and format necessary for the document compilation from the
 magic comments supported by \LaTeX~editors such as TeXShop or TeXWorks.
@@ -512,6 +528,10 @@
 \item[\texttt{Make:htlatex}]
 One call to the TeX engine with special configuration for loading of the
 \texttt{tex4ht.sty} package.
+\item[\texttt{Make:clean}]
+This command removes all generated files, including images, HTML files
+and various auxilary files, from the current directory. It keeps files
+whose file names don't match the input file name.
 \item[\texttt{Make:httex}]
 Variant of \texttt{Make:htlatex} suitable for Plain \TeX.
 \item[\texttt{Make:latexmk}]
@@ -723,6 +743,10 @@
 remove spurious rows from HTML tables.
 \item[mathmlfixes]
 fix common issues for MathML.
+\item[sectionid]
+create \texttt{id} attribute for HTML sectioning elements derived from
+the section title. It also updates links to these sections. Use the
+\texttt{notoc} command line option to prevent that.
 \item[t4htlinks]
 fix hyperlinks in the ODT format.
 \end{description}
@@ -1023,12 +1047,15 @@
 
 \begin{description}
 \item[\texttt{toc\_query}]
-CSS selector for selecting the table of contents container.
+CSS selector for selection of element that contains the table of
+contents.
 \item[\texttt{title\_query}]
 CSS selector for selecting all elements that contain the section ID
 attribute.
 \item[\texttt{toc\_levels}]
 table containing a hierarchy of classes used in TOC
+\item[\texttt{max\_depth}]
+set detph of displayed children TOC levels
 \end{description}
 
 Default values:
@@ -1036,8 +1063,25 @@
 \begin{verbatim}
 filter_settings "collapsetoc" {
   toc_query = ".tableofcontents",
-  title_query = ".partHead a, .chapterHead a, .sectionHead a, .subsectionHead a",
-  toc_levels = {"partToc", "chapterToc", "sectionToc", "subsectionToc", "subsubsectionToc"}
+  title_query = "h1 a, h2 a, h3 a, h4 a, h5 a, h6 a",
+  max_depth = 1,
+  toc_levels = {
+    tocpart = 1,
+    toclikepart = 1,
+    tocappendix = 1,
+    toclikechapter = 2,
+    tocchapter = 2,
+    tocsection = 3,
+    toclikesection = 3,
+    tocsubsection = 4,
+    toclikesubsection = 4,
+    tocsubsubsection = 5,
+    toclikesubsubsection = 5,
+    tocparagraph = 6,
+    toclikeparagraph = 6,
+    tocsubparagraph = 7,
+    toclikesubparagraph = 7,
+  }
 }
 \end{verbatim}
 
@@ -1145,17 +1189,23 @@
 table with variables to be set in the YAML header in HTML files. If the
 table value is a function, it is executed with current parameters and
 HTML page DOM object as arguments.
+\item[remove\_maketitle]
+the \texttt{staticsite} extension removes text produced by the
+\texttt{\textbackslash{}maketitle} command by default. Set this option
+to \texttt{false} to disable the removal.
 \end{description}
 
 Example:
 
 \begin{verbatim}
+-- set the environmental variable 'blog_root' with path to 
+-- the directory that should hold the generated HTML files
 local outdir = os.getenv "blog_root" 
 
 filter_settings "staticsite" {
   site_root = outdir, 
   map = {
-    [".css$"] = "../css/"
+    [".css$"] = "/css/"
   },
   header = {
      layout="post",
@@ -1405,6 +1455,24 @@
 
 The former way is preferable, though.
 
+\hypertarget{table-of-contents-points-to-a-wrong-destination}{%
+\subsection{Table of Contents points to a wrong
+destination}\label{table-of-contents-points-to-a-wrong-destination}}
+
+The \texttt{sectionid} DOM filter creates better link destinations for
+sectioning commands. In some cases, for example if you use Pandoc, the
+document may already contain the link destination with the same name. In
+such cases the original destination is preserved in the file. In this
+case links to the section will point to that place, instead of correct
+destination in the section. This may happen for example if you use
+Pandoc for the Markdown to \LaTeX~conversion. It creates
+\texttt{\textbackslash{}hypertarget} commands that are placed just
+before section. The links points to that place, instead of the actual
+section.
+
+In this case you don't want to update links. Use the \texttt{notoc}
+option to prevent that.
+
 \hypertarget{filenames-containing-spaces}{%
 \subsection{Filenames containing
 spaces}\label{filenames-containing-spaces}}

Modified: trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-collapsetoc.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-collapsetoc.lua	2021-07-26 20:29:23 UTC (rev 60076)
+++ trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-collapsetoc.lua	2021-07-26 20:29:50 UTC (rev 60077)
@@ -1,198 +1,249 @@
+-- mini TOC support for make4ht
 local domobject = require "luaxml-domobject"
 
 local filter = require "make4ht-filter"
 local log = logging.new "collapsetoc"
+local mktuils = require "mkutils"
 
 
-local toc_levels = {"partToc", "chapterToc", "sectionToc", "subsectionToc", "subsubsectionToc"}
+-- assign levels to entries in the .4tc file
+local toc_levels = {
+  tocpart = 1,
+  toclikepart = 1,
+  tocappendix = 2,
+  toclikechapter = 2,
+  tocchapter = 2,
+  tocsection = 3,
+  toclikesection = 3,
+  tocsubsection = 4,
+  toclikesubsection = 4,
+  tocsubsubsection = 5,
+  toclikesubsubsection = 5,
+  tocparagraph = 6,
+  toclikeparagraph = 6,
+  tocsubparagraph = 7,
+  toclikesubparagraph = 7,
+}
 
-local debug_print = function(...) log:debug(...) end
+-- number of child levels to be kept
+-- the depth of 1 ensures that only direct children of the current sectioning command 
+-- will be kept in TOC
+local max_depth = 1
 
 
--- return toc element type and it's id
-local function get_id(el)
-  local name =  el:get_attribute "class"
-  local id
-  local a = el:query_selector "a" or {}
-  local first = a[1]
-  if first then
-    local href = first:get_attribute "href"
-    id = href:match("#(.+)$")
+-- debugging function to test correct structure of the TOC tree
+local function print_tree(tree, level) 
+  local level = level or 0
+  log:debug(string.rep(" ", level) .. (tree.type or "root"), tree.id)
+  for k, v in pairs(tree.children) do
+    print_tree(v, level + 2)
   end
-  return name, id
 end
 
-local function remove_sections(part_elements, currentpart)
-  -- we need to remove toc entries from the previous part if the
-  -- current document isn't part of it
-  if currentpart == false then
-    for _, part in ipairs(part_elements) do
-      part:remove_node()
+-- convert the parsed toc entries to a tree structure
+local function make_toc_tree(tocentries, lowestlevel, position, tree)
+  local position = position or 1
+  local tree = tree or {
+    level = lowestlevel - 1,
+    children = {}
+  }
+  local stack = {tree}
+  if position > #tocentries then return tree, position end
+  -- loop over TOC entries and make a tree
+  for i = 1, #tocentries do
+    -- initialize new child
+    local element = tocentries[i]
+    element.children = element.children or {}
+    local parent = stack[#stack]
+    local level_diff = element.level - parent.level
+    if level_diff == 0 then -- entry is sibling of parent
+      -- current parent is sibling of the current elemetn, true parent is 
+      -- sibling's parent
+      parent = parent.parent
+      -- we must replace sibling element with the current element in stact
+      -- so the child elements get correct parent
+      table.remove(stack)
+      table.insert(stack, element)
+    elseif level_diff > 0 then -- entry is child of parent
+      for x = 1, level_diff do
+        table.insert(stack, element)
+      end
+    else
+      -- we must remove levels from the stack to get the correct parent
+      for x =1 , level_diff, -1 do
+        if #stack > 0 then
+          parent = table.remove(stack)
+        end
+      end
+      -- we must reinsert parent back to stack, place the current element to stact too
+      table.insert(stack, parent)
+      table.insert(stack, element)
     end
+    table.insert(parent.children, element)
+    element.parent = parent
   end
+  print_tree(tree)
+  return tree
 end
 
-local function make_toc_selector(toc_levels)
-  local level_classes = {}
-  for _, l in ipairs(toc_levels) do
-    level_classes[#level_classes+1] = "." .. l
+-- find first sectioning element in the current page
+local function find_headers(dom, header_levels)
+  -- we need to find id attributes in <a> elements that are children of sectioning elements
+  local ids = {}
+  for _, header in ipairs(dom:query_selector(header_levels)) do
+    local id = header:get_attribute "id"
+    if id then ids[#ids+1] = id end
   end
-  return table.concat(level_classes, ", ")
+  return ids
 end
 
-local function find_toc_levels(toc)
-  -- find toc levels used in the document
-  -- it ecpects that sectioning levels appears in the TOC in the descending order
-  local levels, used = {}, {}
-  local level = 1 
-  -- we still expect the standard class names
-  local toc_selector = make_toc_selector(toc_levels)
-  for _, el in ipairs(toc:query_selector(toc_selector)) do
-    local class = el:get_attribute("class")
-    if not used[class] then
-      table.insert(levels, class)
-      used[class] = level
-      level = level + 1
+
+-- process list of ids and find those that should be kept:
+-- siblings, children, parents and top level
+local function find_toc_entries_to_keep(ids, tree)
+  local tree = tree or {}
+  -- all id in TOC tree that we want to kepp are saved in this table
+  local ids_to_keep = {}
+  -- find current id in the TOC tree
+  local function find_id(id, tree)
+    if tree.id == id then return tree end
+    if not tree.children or #tree.children == 0 then return false end
+    for k,v in pairs(tree.children) do
+      local found_id = find_id(id, v)
+      if found_id then return found_id end
     end
+    return false
   end
-  return levels, used
+  -- always keep top level of the hiearchy
+  local function keep_toplevel(tree)
+    for _, el in ipairs(tree.children) do
+      ids_to_keep[el.id] = true
+    end
+  end
+  -- we want to keep all children in TOC hiearchy
+  local function keep_children(element, depth)
+    local depth = depth or 1
+    local max_depth = max_depth or 1
+    -- stop processing when there are no children
+    for _, el in pairs(element.children or {}) do
+      if el.id then ids_to_keep[el.id] = true end
+      -- by default, we keep just direct children of the current sectioning element
+      if depth < max_depth then
+        keep_children(el, depth + 1)
+      end
+    end
+  end
+  -- also keep all siblings
+  local function keep_siblings(element)
+    local parent = element.parent
+    for k, v in pairs(parent.children or {}) do
+      ids_to_keep[v.id] = true
+    end
+  end
+  -- and of course, keep all parents
+  local function keep_parents(element)
+    local parent = element.parent
+    if parent and parent.id then
+      ids_to_keep[parent.id] = true
+      -- we should keep siblings of all parents as well
+      keep_siblings(parent)
+      keep_parents(parent)
+    end
+  end
+  -- always keep the top-level TOC hiearchy, even if we cannot find any sectioning element on the page
+  keep_toplevel(tree)
+  for _, id in ipairs(ids) do
+    -- keep the current id
+    ids_to_keep[id] = true
+    local found_element = find_id(id, tree)
+    if found_element then
+      keep_children(found_element)
+      keep_siblings(found_element)
+      keep_parents(found_element)
+    end
+  end
+  return ids_to_keep
 end
 
+-- process the .4tc file and convert entries to a tree structure
+-- based on the sectioning level
+local function parse_4tc(parameters, toc_levels)
+  local tcfilename = parameters.input .. ".4tc"
+  if not mkutils.file_exists(tcfilename) then 
+    log:warning("Cannot find TOC: " .. tcfilename)
+    return {}
+  end
+  local tocentries = {}
+  local f = io.open(tcfilename, "r")
+  -- we need to find the lowest level used in the TOC
+  local lowestlevel = 999
+  for line in f:lines() do
+    -- entries looks like: \doTocEntry\tocsubsection{1.2.2}{\csname a:TocLink\endcsname{5}{x5-60001.2.2}{QQ2-5-6}{aaaa}}{7}\relax 
+    -- we want do extract tocsubsection and x5-60001.2.2
+    local toctype, id = line:match("\\doTocEntry\\(.-){.-}{.-{.-}{(.-)}")
+    if toctype then
+      local level = toc_levels[toctype]
+      if not level then 
+        log:warning("Cannot find TOC level for: " .. toctype)
+      else
+        lowestlevel = level < lowestlevel and level or lowestlevel
+        table.insert(tocentries, {type = toctype, id = id, level = level})
+      end
+    end
+  end
+  f:close()
+  local toc =  make_toc_tree(tocentries, lowestlevel)
+  return toc
+end
+
 local function remove_levels(toc, matched_ids)
-  -- sort the matched sections according to their levels
-  local levels, level_numbers = find_toc_levels(toc)
-  debug_print("remove levels", #levels)
-  -- for _, level in ipairs(levels) do
-  --   print(level, level_numbers[level], matched_ids[level])
-  -- end
-  local keep_branch = false
-  local matched_levels = {}
-  local toc_selector = make_toc_selector(toc_levels)
-  for _, el in ipairs(toc:query_selector(toc_selector)) do
-    local name, id = get_id(el)
-    -- get the current toc hiearchy level
-    local level = level_numbers[name]
-    -- get the matched id for the current level
-    local level_id = matched_ids[name]
-    local matched = level_id == id
-    local remove = true
-    -- we will use this for toc elements at lower hiearchy than is the top sectioning level on the page
-    if matched then keep_branch = true end
-    -- find the parent level to the current section level
-    local parent_level = toc_levels[level - 1]
-    local parent_matched = matched_levels[parent_level]
-    if matched then 
-      debug_print("match",name, id, level_id, level, #levels)
-      keep_branch = true
-      remove = false
-    elseif level==1 then 
-      -- don't remove the top level 
-      debug_print("part",name, id, level_id, level)
-      remove = false
-      matched_levels = {}
-      if not matched then keep_branch = false end
-    elseif keep_branch then 
-      -- if level >= (#levels - 1) then
-        if level > matched_ids._levels then
-          debug_print("level",name, id, level_id, level, parent_level, parent_matched)
-          remove = false
-        elseif matched_ids.ids[id] then
-          debug_print("matched id",name, id, level_id, level, parent_level, parent_matched)
-          remove = false
-        elseif parent_matched  then
-          debug_print("parent_matched",name, id, level_id, level, parent_level, parent_matched)
-          keep_branch = false
-          remove = false
-        end
-      -- else
-        -- print("remove", name, id, level_id, level, #matched_ids)
-      -- end
-    elseif parent_matched then
-      debug_print("parent_matched alternative",name, id, level_id, level, parent_level, parent_matched)
-      remove = false
-    else
-      debug_print("else",name, id, level_id, level, keep_branch)
-      keep_branch = false
+  -- remove links that aren't in the TOC hiearchy that should be kept
+  for _, link in ipairs(toc:query_selector("a")) do
+    local href = link:get_attribute("href")
+    -- find id in the href
+    local id = href:match("#(.+)")
+    if id and not matched_ids[id] then
+      -- toc links are in <span> elements that can contain the section number
+      -- we must remove them too
+      local parent = link:get_parent()
+      if parent:get_element_name() == "span" then
+        parent:remove_node()
+      else
+        -- if the parent node isn't <span>, remove at least the link itself
+        link:remove_node()
+      end
     end
-    matched_levels[name] = matched
-    if remove then
-      el:remove_node()
-      --print(name,id, level_id,  matched)
-    end
   end
-  
 end
 
 
-
-
-
--- local process = filter{ function(s)
-  -- local dom = domobject.parse(s)
-local function collapse_toc(dom, par)
+local function collapsetoc(dom, parameters)
   -- set options
+  local par = parameters
   local options = get_filter_settings "collapsetoc"
+  -- query to find the TOC element in DOM
   local toc_query = par.toc_query or options.toc_query or ".tableofcontents"
-  local title_query = par.title_query or options.title_query or ".partHead a, .chapterHead a, .sectionHead a, .subsectionHead a"
-  toc_levels = par.toc_levels or options.toc_levels or toc_levels
-  -- keep track of current id of each sectioning level
-  local current_ids, matched_ids = {}, {_levels = 0, ids = {}}
-  -- search sectioning elements
-  local titles = dom:query_selector(title_query)
-  local section_ids = {}
-  for _, x in ipairs(titles) do
-    -- get their id attributes and save them in a table
-    section_ids[#section_ids+1] = x:get_attribute("id")
+  -- query to select sectioning elements with id's
+  local title_query = par.title_query or options.title_query or "h1 a, h2 a, h3 a, h4 a, h5 a, h6 a" 
+  -- level of child levels to be kept in TOC
+  max_depth = par.max_depth or options.max_depth or max_depth
+  -- set level numbers for particular TOC entry types
+  local user_toc_levels = par.toc_levels or options.toc_levels or {}
+  -- join user's levels with default
+  for k,v in pairs(user_toc_levels) do toc_levels[k] = v end
+  -- parse the .4tc file to get TOC tree
+  toc = toc or parse_4tc(parameters, toc_levels)
+  -- find sections in the current html file
+  local ids = find_headers(dom, title_query)
+  log:debug("Ids", table.concat(ids, ","))
+  local ids_to_keep = find_toc_entries_to_keep(ids, toc)
+  local toc_dom = dom:query_selector(toc_query)[1]
+  if toc_dom then
+    remove_levels(toc_dom, ids_to_keep)
+  else
+    log:warning("Cannot find TOC element using query: " .. toc_query)
   end
-
-  -- we need to retrieve the first table of contents
-  local toctables = dom:query_selector(toc_query) or {}
-  -- process only when we got a TOC
-  debug_print("toc query", toc_query, #toctables)
-  if #toctables > 0 then
-    local tableofcontents = toctables[1]
-    -- all toc entries are in span elements
-    local toc = tableofcontents:query_selector("span")
-    local currentpart = false
-    local part_elements = {}
-    for _, el in ipairs(toc) do
-      -- get sectioning level and id of the current TOC entry
-      local name, id = get_id(el)
-      -- set the id of the current sectioning level
-      current_ids[name] = id
-      for _, sectid in ipairs(section_ids) do
-        -- detect if the current TOC entry match some sectioning element in the current document
-        if id == sectid then
-          currentpart = true
-          -- save the current id as a matched id
-          matched_ids.ids[id] = true
-          -- copy the TOC hiearchy for the current toc level
-          for i, level in ipairs(toc_levels) do 
-            -- print("xxx",i, level, current_ids[level])
-            matched_ids[level] = current_ids[level]
-            -- set the maximum matched level
-            if i > matched_ids._levels then matched_ids._levels = i end
-            if level == name then break end
-          end
-          debug_print("match", id)
-        end
-      end
-    end
-    remove_levels(tableofcontents, matched_ids)
-
-    -- remove sections from the last part
-    -- remove_sections(part_elements,currentpart)
-    -- remove unneeded br elements
-    local br = tableofcontents:query_selector("br")
-    for _, el in ipairs(br) do el:remove_node() end
-    -- remove unneded whitespace
-    for _, el in ipairs(tableofcontents:get_children()) do
-      if el:is_text() then el:remove_node() end
-    end
-  end
   return dom
-end 
+end
 
-return collapse_toc
-
--- Make:match("html$", process)
+return collapsetoc

Added: trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-itemparagraphs.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-itemparagraphs.lua	                        (rev 0)
+++ trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-itemparagraphs.lua	2021-07-26 20:29:50 UTC (rev 60077)
@@ -0,0 +1,26 @@
+-- TeX4ht puts contents of all \item commands into paragraphs. We are not
+-- able to detect if it contain only one paragraph, or more. If just one,
+-- we can remove the paragraph and put the contents directly to <li> element.
+return function(dom)
+  for _, li in ipairs(dom:query_selector("li")) do
+    local is_single_par = false
+    -- count elements and paragraphs that are direct children of <li>
+    -- remove the paragraph only if it is the only child element
+    local el_count, par_count = 0, 0
+    local par = {}
+    for _, el in ipairs(li._children) do
+      if el:is_element() then
+        el_count = el_count + 1
+        if el:get_element_name() == "p" then
+          par[#par+1] = el
+        end
+      end
+    end
+    if #par == 1 and el_count == 1 then
+      -- place paragraph children as direct children of <li>, this
+      -- efectivelly removes <p>
+      li._children = par[1]._children
+    end
+  end
+  return dom
+end


Property changes on: trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-itemparagraphs.lua
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-joincolors.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-joincolors.lua	2021-07-26 20:29:23 UTC (rev 60076)
+++ trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-joincolors.lua	2021-07-26 20:29:50 UTC (rev 60077)
@@ -25,6 +25,7 @@
   for class, color in pairs(used_colors) do
     t[#t+1] = string.format(".%s{color:%s;}", class, color)
   end
+  table.sort(t)
   return csscontent .. table.concat(t, "\n")
 end
 

Added: trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-sectionid.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-sectionid.lua	                        (rev 0)
+++ trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-sectionid.lua	2021-07-26 20:29:50 UTC (rev 60077)
@@ -0,0 +1,156 @@
+local mkutils   = require "mkutils"
+local log = logging.new("tocid")
+-- Unicode data distributed with ConTeXt
+-- defines "characters" table
+if not mkutils.isModuleAvailable("char-def") then
+  log:warning("char-def module not found")
+  log:warning("cannot fix section id's")
+  return function(dom) return dom end
+end
+require "char-def"
+local chardata = characters.data or {}
+
+
+local toc = nil
+
+local function is_letter(info)
+  -- test if character is letter
+  local category = info.category or ""
+  return category:match("^l") 
+end
+
+local function is_space(info)
+  local category = info.category or ""
+  return category == "zs"
+end
+
+local uchar = utf8.char
+local function normalize_letter(char, result)
+  local info = chardata[char] or {}
+  -- first get lower case of the letter
+  local lowercase = info.lccode or char
+  -- remove accents. the base letter is in the shcode field
+  local lowerinfo = chardata[lowercase] or {}
+  -- when no shcode, use the current lowercase char
+  local shcode = lowerinfo.shcode or lowercase
+  -- shcode can be table if it contains multiple characters
+  -- normaliz it to a table, so we can add all letters to 
+  -- the resulting string
+  if type(shcode) ~= "table" then shcode = {shcode} end
+  for _, x in ipairs(shcode) do
+    result[#result+1] = uchar(x)
+  end
+end
+
+local escape_name = function(name)
+  local result = {}
+  for _,char in utf8.codes(name) do
+    local info = chardata[char] or {}
+    if is_space(info) then
+      result[#result+1] = " "
+    elseif is_letter(info) then
+      normalize_letter(char, result)
+    end
+  end
+  --- convert table with normalized characters to string
+  local name = table.concat(result)
+  -- remove spaces
+  return name:gsub("%s+", "-")
+end
+
+local function parse_toc_line(line)
+  -- the section ids and titles are saved in the following format:
+  -- \csname a:TocLink\endcsname{1}{x1-20001}{QQ2-1-2}{Nazdar světe}
+  -- ............................... id ................. title ...
+  local id, name = line:match("a:TocLink.-{.-}{(.-)}{.-}{(.-)}")
+  if id then
+    return id, escape_name(name)
+  end
+end
+
+local used = {}
+
+local function parse_toc(filename)
+  local toc = {}
+  if not mkutils.file_exists(filename) then return nil, "Cannot open TOC file "  .. filename end
+  for line in io.lines(filename) do
+    local id, name = parse_toc_line(line)
+    local orig_name = name
+    -- not all lines in the .4tc file contains TOC entries
+    if id then
+      -- test if the same name was used already. user should be notified
+      if used[name] then
+        -- update 
+        name = name .. used[name]
+        log:debug("Duplicate id found: ".. orig_name .. ". New id: " .. name)
+      end
+      used[orig_name] = (used[orig_name] or 0) + 1
+      toc[id] = name
+    end
+  end
+  return toc
+end
+
+-- we don't want to change the original id, as there may be links to it from the outside
+-- so we will set it to the parent element (which should be h[1-6])
+local function set_id(el, id)
+  local section = el:get_parent()
+  local section_id = section:get_attribute("id")
+  if section_id and section_id~=id then -- if it already has id, we don't override it, but create dummy child instead
+    local new = section:create_element("span", {id=id})
+    section:add_child_node(new,1)
+  else
+    section:set_attribute("id", id)
+  end
+
+end
+
+    
+
+return  function(dom, par)
+    local msg
+    toc, msg = toc or parse_toc(par.input .. ".4tc")
+    msg = msg or "Cannot load TOC"
+    -- don't do anyting if toc cannot be found
+    if not toc then 
+      log:warning(msg) 
+      return dom
+    end
+    -- if user selects the "notoc" option on the command line, we 
+    -- will not update href links
+    local notoc = false
+    if par["tex4ht_sty_par"]:match("notoc") then notoc = true end
+    -- the HTML file can already contain ID that we want to assign
+    -- we will not set duplicate id from TOC in that case
+    local toc_ids = {}
+    for _, el in ipairs(dom:query_selector("[id]")) do
+      local id = el:get_attribute("id")
+      toc_ids[id] = true
+    end
+    -- process all elements with id atribute or <a href>
+    for _, el in ipairs(dom:query_selector "[id],a[href]") do
+      local id, href = el:get_attribute("id"), el:get_attribute("href") 
+      if id then
+        local name = toc[id]
+        -- replace id with new section id
+        if name and not toc_ids[name] then
+          set_id(el, name)
+        else
+          if name then
+            log:debug("Document already contains id: " .. name)
+          end
+        end
+      end
+      if href and notoc == false then
+        -- replace links to sections with new id
+        local base, anchor = href:match("^(.*)%#(.+)")
+        local name = toc[anchor]
+        if name then
+          el:set_attribute("href", base .. "#" .. name)
+        end
+      end
+    end
+    return dom
+  end
+
+


Property changes on: trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-sectionid.lua
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-tablerows.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-tablerows.lua	2021-07-26 20:29:23 UTC (rev 60076)
+++ trunk/Master/texmf-dist/scripts/make4ht/domfilters/make4ht-tablerows.lua	2021-07-26 20:29:50 UTC (rev 60077)
@@ -40,6 +40,12 @@
     -- that matches this pattern, so we should keep the row if we match them too)
     return not css:match(search_term)
   end
+  local hline_hr = function(row)
+    -- remove <hr> elements from "hline" rows
+    for _, hr in ipairs(row:query_selector(".hline hr")) do
+      hr:remove_node()
+    end
+  end
   local load_css_files = function()
     -- the empty rows can be styled using CSS, for example configuration for 
     -- Booktabs does that. We shouldn't remove such rows.
@@ -62,6 +68,7 @@
     -- find the empty rows
     for _, row in ipairs(tbl:query_selector("tr")) do
       if is_empty_row(row) and is_not_styled(row, css) then row:remove_node() end
+      hline_hr(row)
     end
 
   end

Deleted: trunk/Master/texmf-dist/scripts/make4ht/extensions/make4ht-ext-build_changed.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/extensions/make4ht-ext-build_changed.lua	2021-07-26 20:29:23 UTC (rev 60076)
+++ trunk/Master/texmf-dist/scripts/make4ht/extensions/make4ht-ext-build_changed.lua	2021-07-26 20:29:50 UTC (rev 60077)
@@ -1,68 +0,0 @@
--- this make4ht build file tries to recompile modified blog article sources
---
-
--- disable any compilation
-Make.build_seq = {}
-Make:add("tex4ht", "")
-Make:add("t4ht", "")
-
-local log = logging.new "compile newest"
-
-
-
--- construct the name of the generated HTML file from the .published file
-local function get_generated_html_name(published_file, directory, file_pattern)
-  local f = io.open(directory .. "/" .. published_file, "r")
-  local content = f:read("*all")
-  f:close()
-  local timestamp = tonumber(content)
-  local basename = mkutils.remove_extension(published_file)
-  local tex_file = basename .. ".tex"
-  -- expand fillename in the file_pattern
-  local expanded = file_pattern % {input = basename}
-  -- expand date in the file_pattern 
-  expanded = os.date(expanded, timestamp)
-  log:status("found source files :", directory, basename, expanded)
-  return {directory = directory, tex_file = tex_file, generated = expanded .. ".html"}
-end
-
--- process subdirectories of the basedir and look for the filename.published files
-local function find_published(basedir, file_pattern)
-  local published = {}
-  for f in lfs.dir(basedir) do
-    local fullname = basedir .. "/" .. f
-    local attributes = lfs.attributes(fullname)
-    -- process directories, but ignore . and ..
-    if attributes.mode == "directory" and f ~= "." and f~= ".." then
-      for name in lfs.dir(fullname) do
-        if name:match("published$") then
-          published[#published + 1]  =  get_generated_html_name(name, fullname, file_pattern)
-        end
-
-      end
-    end
-  end
-  return published
-end
-
--- find tex files that were modified later than the generated HTML files
-local function find_changed(published, site_root)
-  local keep = {}
-  for _, entry in ipairs(published) do
-    local source_attributes = lfs.attributes(entry.directory .. "/" .. entry.tex_file)
-    local dest_attributes = lfs.attributes(site_root .. "/" .. entry.generated)
-    -- 
-    print(entry.tex_file, entry.generated,  source_attributes.change < dest_attributes.change)
-  end
-end
-
-
-Make:add("rebuild", function(par)
-  local config = get_filter_settings "staticsite"
-  -- how the generated HTML files are named
-  local file_pattern = config.file_pattern or "%Y-%m-%d-${input}"
-  local published = find_published(par.tex_dir, file_pattern)
-  local changed = find_changed(published, config.site_root)
-end)
-
-Make:rebuild{tex_dir = "posts"}

Modified: trunk/Master/texmf-dist/scripts/make4ht/extensions/make4ht-ext-common_domfilters.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/extensions/make4ht-ext-common_domfilters.lua	2021-07-26 20:29:23 UTC (rev 60076)
+++ trunk/Master/texmf-dist/scripts/make4ht/extensions/make4ht-ext-common_domfilters.lua	2021-07-26 20:29:50 UTC (rev 60077)
@@ -26,7 +26,7 @@
     make:match("4om$", process, {charclasses= charclasses})
     count = 2
   else
-    local process = filter {"fixinlines", "idcolons", "joincharacters", "mathmlfixes", "tablerows","booktabs"}
+    local process = filter {"fixinlines", "idcolons", "joincharacters", "mathmlfixes", "tablerows","booktabs", "sectionid", "itemparagraphs"}
     make:match("html?$", process)
     count = 1
   end

Modified: trunk/Master/texmf-dist/scripts/make4ht/extensions/make4ht-ext-staticsite.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/extensions/make4ht-ext-staticsite.lua	2021-07-26 20:29:23 UTC (rev 60076)
+++ trunk/Master/texmf-dist/scripts/make4ht/extensions/make4ht-ext-staticsite.lua	2021-07-26 20:29:50 UTC (rev 60077)
@@ -16,7 +16,7 @@
     local f = io.open(published_name, "r")
     local readtime  = f:read("*line")
     time = tonumber(readtime)
-    log:info("Already pubslished", slug)
+    log:info("Already pubslished", os.date("%Y-%m-%d %H:%M", time))
     f:close()
   else
     -- escape 
@@ -76,7 +76,23 @@
   })
 end
 
+local function remove_maketitle(make)
+  -- use DOM filter to remove \maketitle block
+  local domfilter = require "make4ht-domfilter"
+  local process = domfilter {
+    function(dom)
+      local maketitles = dom:query_selector(".maketitle")
+      for _, el in ipairs(maketitles) do
+        log:debug("removing maketitle")
+        el:remove_node()
+      end
+      return dom
+    end
+  }
+  make:match("html$", process)
+end
 
+
 local function copy_files(filename, par)
   local function prepare_path(dir, subdir)
     local path = dir .. "/" .. subdir .. "/" .. filename
@@ -84,7 +100,7 @@
   end
   -- get extension settings
   local site_settings = get_filter_settings "staticsite"
-  local site_root = site_settings.site_root
+  local site_root = site_settings.site_root or "./"
   local map = site_settings.map or {}
   -- default path without subdir, will be used if the file is not matched
   -- by any pattern in the map
@@ -107,6 +123,14 @@
   local process = filter {
     "staticsite"
   }
+
+  -- detect if we should remove maketitle
+  local site_settings = get_filter_settings "staticsite"
+  -- \maketitle is removed by default, set `remove_maketitle=false` setting to disable that
+  if site_settings.remove_maketitle ~= false then
+    remove_maketitle(make)
+  end
+
   local settings = make.params
   -- get the published file name
   local slug = get_slug(settings)

Modified: trunk/Master/texmf-dist/scripts/make4ht/make4ht
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/make4ht	2021-07-26 20:29:23 UTC (rev 60076)
+++ trunk/Master/texmf-dist/scripts/make4ht/make4ht	2021-07-26 20:29:50 UTC (rev 60077)
@@ -29,7 +29,7 @@
 
 -- set version number. the template should be replaced by the
 -- actual version number by the build script
-local version = "v0.3g"
+local version = "v0.3h"
 mkparams.version_number = version
 
 local args = mkparams.get_args()
@@ -76,6 +76,9 @@
 if make:length() < 1 then
 	if mode == "draft" then
 		make:htlatex()
+  elseif mode == "clean" then
+    make:clean()
+    make.no_dvi_process = true
 	else
 		make:htlatex()
 		make:htlatex()
@@ -84,7 +87,7 @@
 end
 
 
-if not args["no-tex4ht"] then
+if not args["no-tex4ht"] and not make.no_dvi_process then
   make:tex4ht()
 end
 
@@ -92,7 +95,11 @@
 if #make.image_patterns > 0 then
   make.params.t4ht_par = make.params.t4ht_par .. " -p"
 end
-make:t4ht {ext = ext}
+
+if not make.no_dvi_process then
+  make:t4ht {ext = ext}
+end
+
 -- run extensions which modify the build sequence
 if #extensions > 0 then
   make = mkutils.extensions_modify_build(extensions, make)

Deleted: trunk/Master/texmf-dist/scripts/make4ht/make4ht-odtfilter.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/make4ht-odtfilter.lua	2021-07-26 20:29:23 UTC (rev 60076)
+++ trunk/Master/texmf-dist/scripts/make4ht/make4ht-odtfilter.lua	2021-07-26 20:29:50 UTC (rev 60077)
@@ -1,33 +0,0 @@
-local mkutils = require "mkutils"
-local zip = require "zip"
-
-
--- use function to change contents of the ODT file
-local function update_odt(odtfilename, file_path, fn)
-  -- get name of the odt file
-  local odtname = mkutils.remove_extension(odtfilename) .. ".odt"
-  -- open and read contents of the requested file inside ODT file
-  local odtfile = zip.open(odtname)
-  local local_file = odtfile:open(file_path)
-  local content = local_file:read("*all")
-  local_file:close()
-  odtfile:close()
-  -- update the content using user function
-  content = fn(content)
-  -- write the updated file
-  local local_file_file  = io.open(file_path,"w")
-  local_file_file:write(content)
-  local_file_file:close()
-  os.execute("zip " .. odtname .. " " .. file_path)
-  os.remove(file_path)
-end
-
-Make:match("tmp$", function(name, par)
-  update_odt(name, "content.xml", function(content)
-    return content:gsub("%&%#x([A-Fa-f0-9]+);", function(entity)
-      -- convert hexadecimal entity to Unicode
-      print(entity,utfchar(tonumber(entity, 16)))
-      return utfchar(tonumber(entity, 16))
-    end)
-  end)
-end)

Modified: trunk/Master/texmf-dist/scripts/make4ht/mkutils.lua
===================================================================
--- trunk/Master/texmf-dist/scripts/make4ht/mkutils.lua	2021-07-26 20:29:23 UTC (rev 60076)
+++ trunk/Master/texmf-dist/scripts/make4ht/mkutils.lua	2021-07-26 20:29:50 UTC (rev 60077)
@@ -55,7 +55,7 @@
 end
 
 -- 
-
+-- check if file exists
 function file_exists(file)
 	local f = io.open(file, "rb")
 	if f then f:close() end
@@ -62,6 +62,24 @@
 	return f ~= nil
 end
 
+-- check if Lua module exists
+-- source: https://stackoverflow.com/a/15434737/2467963
+function isModuleAvailable(name)
+  if package.loaded[name] then
+    return true
+  else
+    for _, searcher in ipairs(package.searchers or package.loaders) do
+      local loader = searcher(name)
+      if type(loader) == 'function' then
+        package.preload[name] = loader
+        return true
+      end
+    end
+    return false
+  end
+end
+
+
 -- searching for converted images
 function parse_lg(filename)
   log:info("Parse LG")
@@ -115,6 +133,10 @@
 -- in reality it isn't.
 -- local cp_func = os.type == "unix" and "mv" or "move"
 function cp(src,dest)
+  if not file_exists(src) then
+    -- try to find file using kpse library if it cannot be found
+    src = kpse.find_file(src) or src
+  end
 	local command = string.format('%s "%s" "%s"', cp_func, src, dest)
 	if cp_func == "copy" then command = command:gsub("/",'\\') end
 	log:info("Copy: "..command)
@@ -362,9 +384,77 @@
 
 
 
-env.Make:add("tex4ht","tex4ht ${tex4ht_par} \"${input}.${dvi}\"", nil, 1)
+-- env.Make:add("tex4ht","tex4ht ${tex4ht_par} \"${input}.${dvi}\"", nil, 1)
+env.Make:add("tex4ht",function(par)
+  -- detect if svg output is used
+  -- if yes, we need to pass the -g.svg option to tex4ht command
+  -- to support svg images for character pictures
+  local logfile = par.input .. ".log"
+  if file_exists(logfile) then
+    for line in io.lines(logfile) do
+      local options = line:match("TeX4ht package options:(.+)")
+      if options then
+        log:info(options)
+        if options:match("svg") then
+          par.tex4ht_par = (par.tex4ht_par or "") .. " -g.svg"
+        end
+        break
+      end
+    end
+  end
+  local command = "tex4ht ${tex4ht_par} \"${input}.${dvi}\"" % par
+  log:info("executing: " .. command)
+  return execute(command)
+end
+ , nil, 1)
 env.Make:add("t4ht","t4ht ${t4ht_par} \"${input}.${ext}\"",{ext="dvi"},1)
 
+env.Make:add("clean", function(par)
+  -- remove all functions that process produced files
+  -- we will provide only one function, that remove all of them
+  Make.matches = {}
+  local main_name = par.input
+  local remove_file = function(filename)
+    if file_exists(filename) then
+      log:info("removing file: " .. filename)
+      os.remove(filename)
+    end
+  end
+  -- try to find if the last converted file was in the ODT format
+  local lg_file = parse_lg(main_name .. ".lg") 
+  local is_odt = false
+  if lg_file and lg_file.files then
+    for _, x in ipairs(lg_file.files) do
+      is_odt = x:match("odt$") or is_odt
+    end
+  end
+  if is_odt then
+    Make:match("4om$",function(filename)
+      -- math temporary file
+      local to_remove = filename:gsub("4om$", "tmp")
+      remove_file(to_remove)
+      return false
+    end)
+    Make:match("4og$", remove_file)
+  end
+  Make:match("tmp$", function()
+    -- remove temporary and auxilary files
+    for _,ext in ipairs {"aux", "xref", "tmp", "4tc", "4ct", "idv", "lg","dvi", "log"} do
+      remove_file(main_name .. "." .. ext)
+    end
+  end)
+  Make:match(".*", function(filename, par)
+    -- remove only files that start with the input file basename
+    -- this should prevent removing of images. this also means that
+    -- images shouldn't be names as <filename>-hello.png for example
+    if filename:find(main_name, 1,true) then
+      -- log:info("Matched file", filename)
+      remove_file(filename)
+    end
+  end)
+
+end)
+
 -- enable extension in the config file
 -- the following two functions must be here and not in make4ht-lib.lua
 -- because of the access to env.settings



More information about the tex-live-commits mailing list.