texlive[75022] Master/texmf-dist: sec-slugname option, tex4ht r1666;

commits+karl at tug.org commits+karl at tug.org
Fri Apr 25 23:53:18 CEST 2025


Revision: 75022
          https://tug.org/svn/texlive?view=revision&revision=75022
Author:   karl
Date:     2025-04-25 23:53:18 +0200 (Fri, 25 Apr 2025)
Log Message:
-----------
sec-slugname option, tex4ht r1666; fulltoc option, tex4ht r1667

Revision Links:
--------------
    https://tug.org/svn/texlive?view=revision&revision=1666
    https://tug.org/svn/texlive?view=revision&revision=1667

Modified Paths:
--------------
    trunk/Master/texmf-dist/source/generic/tex4ht/ChangeLog
    trunk/Master/texmf-dist/source/generic/tex4ht/tex4ht-html4.tex
    trunk/Master/texmf-dist/tex/generic/tex4ht/html4.4ht

Modified: trunk/Master/texmf-dist/source/generic/tex4ht/ChangeLog
===================================================================
--- trunk/Master/texmf-dist/source/generic/tex4ht/ChangeLog	2025-04-25 21:16:54 UTC (rev 75021)
+++ trunk/Master/texmf-dist/source/generic/tex4ht/ChangeLog	2025-04-25 21:53:18 UTC (rev 75022)
@@ -1,3 +1,12 @@
+2025-04-25  Michal Hoftich  <michal.h21 at gmail.com>
+
+	* tex4ht-html4.tex (html4.4ht): added the "fulltoc" option. It will
+	add full table of contents to the every generated HTML page. It is
+	intended for use with the "collapsetoc" DOM filter from make4ht.
+
+	* tex4ht-html4.tex (html4.4ht): added the "sec-slugname" option. It
+	will sanitize filenames for cut sections.
+
 2025-04-10  Michal Hoftich  <michal.h21 at gmail.com>
 
 	* tex4ht-html4.tex (html4.4ht): fixed paragraph handling in

Modified: trunk/Master/texmf-dist/source/generic/tex4ht/tex4ht-html4.tex
===================================================================
--- trunk/Master/texmf-dist/source/generic/tex4ht/tex4ht-html4.tex	2025-04-25 21:16:54 UTC (rev 75021)
+++ trunk/Master/texmf-dist/source/generic/tex4ht/tex4ht-html4.tex	2025-04-25 21:53:18 UTC (rev 75022)
@@ -1,4 +1,4 @@
-% $Id: tex4ht-html4.tex 1664 2025-04-10 08:14:31Z michal_h21 $
+% $Id: tex4ht-html4.tex 1667 2025-04-25 14:39:28Z michal_h21 $
 % Compile 4 times: latex tex4ht-html4
 % Copy html4.4ht into the work directory before all but the last compilation.
 %
@@ -1159,6 +1159,13 @@
           \Log:Note{For section filenames that use full
                  jobname and section type use the command line 
                  option `cut-fullname'}
+          \:CheckOption{sec-slugname}
+          \if:Option
+             |<section slug names for cutat files|>
+          \else
+             \Log:Note{For section filenames based on slugified
+                      titles use the command line option `sec-slugname'}
+          \fi
        \fi
     \fi 
 \fi
@@ -1242,8 +1249,111 @@
 \egroup        
 >>>
 
+The sec-slugname option generates separate output files for each section, where
+the filenames are derived from a slugified version of the section titles.
+Slugification involves converting the title to lowercase, removing diacritics,
+replacing spaces with hyphens, and stripping out characters that are not
+alphanumeric or hyphens. This ensures clean, URL-friendly filenames that
+reflect the content structure while avoiding problematic characters.
 
+\<section slug names for cutat files\><<<
+\ifdefined\directlua
+|<slugname lua function|>
+|<slugname EXPL3 function|>
+\else
+  \:warning{The `sec-slugname' option requires LuaTeX. Try `sec-filename' with other engines.}%
+\fi
+>>>
 
+This code reuses make4ht library with Unicode data to generate slugified names.
+
+\<slugname lua function\><<<
+\directlua{
+
+local chardata = require "make4ht-char-def"
+local uchar = utf8.char
+
+
+local function is_letter(info)
+  %-- test if character is letter
+  local category = info.category or ""
+  return category:match("^l") 
+end
+
+local function is_number(char)
+  return char >= 48 and char <= 57
+end
+
+local function is_space(info)
+  local category = info.category or ""
+  return category == "zs"
+end
+
+
+% this code is reused from make4ht sectionid DOM filter
+% it will convert accented characters to ASCII
+local function normalize_letter(char, result)
+  local info = chardata[char] or {}
+  % -- first get lower case of the letter
+  local lowercase = info.lccode or char
+  %-- remove accents. the base letter is in the shcode field
+  local lowerinfo = chardata[lowercase] or {}
+  %-- when no shcode, use the current lowercase char
+  local shcode = lowerinfo.shcode or lowercase
+  %-- shcode can be table if it contains multiple characters
+  %-- normaliz it to a table, so we can add all letters to 
+  %-- the resulting string
+  shcode = type(shcode) == "table" and shcode or {shcode}
+  for _, x in ipairs(shcode) do
+    table.insert(result, uchar(x))
+  end
+end
+
+
+local used_names = {}
+
+function tex4ht_sanitize_section_name(filename)
+  % name = unicode.utf8.lower(name)
+  local result = {}
+  % create table with slugified characters
+  for _,char in utf8.codes(filename) do
+    local info = chardata[char] or {}
+    if is_space(info) then
+      %-- replace spaces with underscores
+      table.insert(result, "_")
+    elseif is_letter(info) then
+      normalize_letter(char, result)
+    elseif is_number(char) then
+      % the make4ht char library doesn't contain Unicode data for numbers, so we need to check them manually
+      table.insert(result, uchar(char))
+    end
+  end
+  % -- convert table with normalized characters to string
+  local name = table.concat(result)
+  % -- handle multiple names with the same name
+  local count = used_names[name] or 0
+  used_names[name] = count + 1
+  % add numeric suffix to the name if there are duplicates
+  % we need to use a different separator before the number, to prevent clashes
+  % with sections that contains numbers at the end
+  name = count > 0 and name .. "-" .. count or name
+  % register also the name with the suffix, to prevent filename clashes
+  return name
+end
+}
+>>>
+
+Configure the cutat filename to use the slugified version of the section name.
+
+\<slugname EXPL3 function\><<<
+\ExplSyntaxOn
+\Configure{CutAt-filename}{\expandafter\NextFile\expandafter{\directlua{
+% remove commands from the section name and pass it to the slugify function
+tex.print(tex4ht_sanitize_section_name("\luaescapestring{\text_purify:n{#2}}"))
+}.html}}
+\ExplSyntaxOff
+>>>
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{Tables of Contents: Choice of Entries}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -1848,6 +1958,50 @@
 >>>
 
 
+\<configure html4 latex\><<<
+|<fulltoc option|>
+>>>
+
+This configuration is for the full table of contents shown on each section page. 
+It is indented for use with the `collapsetoc' DOM filter, or with JS code that will 
+make collapsible dynamic menu.
+
+\<fulltoc option\><<<
+\:CheckOption{fulltoc} \if:Option
+
+% this configures list of sectioning types that should be included in the full TOC
+\NewConfigure{fulltocsections}{1}
+\Configure{fulltocsections}{chapter,likechapter,section,likesection,subsection,likesubsection}
+
+% put full TOC on each page
+\Configure{crosslinks+}{%
+  \bgroup
+  % container for the page toc
+  \Configure{tableofcontents}{\IgnorePar\EndP\HCode{<nav class="TOC">}\IgnorePar}
+  {\HCode{\Hnewline}}{\IgnorePar\HCode{</nav>\Hnewline}\ShowPar}{}{}%
+  \expandafter\TableOfContents\expandafter[\a:fulltocsections]% Print table of contents before crosslinks
+  \egroup
+  \ifvmode\IgnorePar\fi\EndP%
+  \HCode{<main class="main-content">\Hnewline<nav class="crosslinks-top">} }
+  {\HCode{</nav>\Hnewline}}%
+{\ifvmode\IgnorePar\fi\EndP%
+  \HCode{<nav class="crosslinks-bottom">}}{\HCode{</nav>}}{}{}
+
+% configuration for TOC on the main page, which is not configured by the previous command
+\Configure{tableofcontents}{\IgnorePar\EndP\HCode{<nav class="TOC">}\IgnorePar}
+{}{\IgnorePar\HCode{</nav>\Hnewline<main class="main-content">\Hnewline}\ShowPar}{}{}%
+
+% close the <main> element started in \Configure{crosslinks+} and \Configure{tableofcontents}
+\Configure{@/BODY}{\ifvmode\IgnorePar\fi\EndP\HCode{</main>}}
+
+\Css{nav.TOC > span{display:block;}}
+
+\else
+\Log:Note{For full TOC on each page compatible with the collapsetoc DOM filter,
+   use the `fulltoc' command line option.}
+\fi
+>>>
+
 \<html latex tocs\><<<
 \def\tocpart#1#2#3{\par
    \HCode{<span class="partToc" \a:LRdir>}\if !#1!\else  #1\ \ \fi #2\HCode{</span>}\par}%
@@ -20173,6 +20327,7 @@
    {\HCode{</p></div>}\par\ShowPar}
 \Css{@media print {div.crosslinks {visibility:hidden;}}}
 
+
 \Configure{halignTR} 
    {\HCode{ style="vertical-align:baseline;"}}
 \Configure{halignTBL} 
@@ -20199,6 +20354,7 @@
 \Css{table.tabular{border-collapse: collapse; border-spacing: 0;}}
 >>>
 
+
 The following should be just under LaTeX.
 
 \<configure html4 tex4ht\><<<

Modified: trunk/Master/texmf-dist/tex/generic/tex4ht/html4.4ht
===================================================================
--- trunk/Master/texmf-dist/tex/generic/tex4ht/html4.4ht	2025-04-25 21:16:54 UTC (rev 75021)
+++ trunk/Master/texmf-dist/tex/generic/tex4ht/html4.4ht	2025-04-25 21:53:18 UTC (rev 75022)
@@ -1,4 +1,4 @@
-% html4.4ht (2025-04-10-13:23), generated from tex4ht-html4.tex
+% html4.4ht (2025-04-25-14:24), generated from tex4ht-html4.tex
 % Copyright 2009-2025 TeX Users Group
 % Copyright 1997-2009 Eitan M. Gurari
 %
@@ -17,7 +17,7 @@
 %
 % If you modify this program, changing the
 % version identification would be appreciated.
-\immediate\write-1{version 2025-04-10-13:23}
+\immediate\write-1{version 2025-04-25-14:24}
 
 \exit:ifnot{8859-6,% 
 CJK,% 
@@ -441,6 +441,98 @@
           \Log:Note{For section filenames that use full
                  jobname and section type use the command line
                  option `cut-fullname'}
+          \:CheckOption{sec-slugname}
+          \if:Option
+             \ifdefined\directlua
+\directlua{
+
+local chardata = require "make4ht-char-def"
+local uchar = utf8.char
+
+
+local function is_letter(info)
+  %-- test if character is letter
+  local category = info.category or ""
+  return category:match("^l")
+end
+
+local function is_number(char)
+  return char >= 48 and char <= 57
+end
+
+local function is_space(info)
+  local category = info.category or ""
+  return category == "zs"
+end
+
+
+% this code is reused from make4ht sectionid DOM filter
+% it will convert accented characters to ASCII
+local function normalize_letter(char, result)
+  local info = chardata[char] or {}
+  % -- first get lower case of the letter
+  local lowercase = info.lccode or char
+  %-- remove accents. the base letter is in the shcode field
+  local lowerinfo = chardata[lowercase] or {}
+  %-- when no shcode, use the current lowercase char
+  local shcode = lowerinfo.shcode or lowercase
+  %-- shcode can be table if it contains multiple characters
+  %-- normaliz it to a table, so we can add all letters to
+  %-- the resulting string
+  shcode = type(shcode) == "table" and shcode or {shcode}
+  for _, x in ipairs(shcode) do
+    table.insert(result, uchar(x))
+  end
+end
+
+
+local used_names = {}
+
+function tex4ht_sanitize_section_name(filename)
+  % name = unicode.utf8.lower(name)
+  local result = {}
+  % create table with slugified characters
+  for _,char in utf8.codes(filename) do
+    local info = chardata[char] or {}
+    if is_space(info) then
+      %-- replace spaces with underscores
+      table.insert(result, "_")
+    elseif is_letter(info) then
+      normalize_letter(char, result)
+    elseif is_number(char) then
+      % the make4ht char library doesn't contain Unicode data for numbers, so we need to check them manually
+      table.insert(result, uchar(char))
+    end
+  end
+  % -- convert table with normalized characters to string
+  local name = table.concat(result)
+  % -- handle multiple names with the same name
+  local count = used_names[name] or 0
+  used_names[name] = count + 1
+  % add numeric suffix to the name if there are duplicates
+  % we need to use a different separator before the number, to prevent clashes
+  % with sections that contains numbers at the end
+  name = count > 0 and name .. "-" .. count or name
+  % register also the name with the suffix, to prevent filename clashes
+  return name
+end
+}
+
+\ExplSyntaxOn
+\Configure{CutAt-filename}{\expandafter\NextFile\expandafter{\directlua{
+% remove commands from the section name and pass it to the slugify function
+tex.print(tex4ht_sanitize_section_name("\luaescapestring{\text_purify:n{#2}}"))
+}.html}}
+\ExplSyntaxOff
+
+\else
+  \:warning{The `sec-slugname' option requires LuaTeX. Try `sec-filename' with other engines.}%
+\fi
+
+          \else
+             \Log:Note{For section filenames based on slugified
+                      titles use the command line option `sec-slugname'}
+          \fi
        \fi
     \fi
 \fi
@@ -1205,6 +1297,7 @@
    {\HCode{</p></div>}\par\ShowPar}
 \Css{@media print {div.crosslinks {visibility:hidden;}}}
 
+
 \Configure{halignTR}
    {\HCode{ style="vertical-align:baseline;"}}
 \Configure{halignTBL}
@@ -1423,6 +1516,40 @@
    {\ifTag{tex4ht-body}{\HCode{<br />}\Link{tex4ht-body}{}Home\EndLink}{}}
    {\IgnorePar\EndP\HCode{</div>}\ShowPar}
    {\HCode{<br />}}   {}
+\:CheckOption{fulltoc} \if:Option
+
+% this configures list of sectioning types that should be included in the full TOC
+\NewConfigure{fulltocsections}{1}
+\Configure{fulltocsections}{chapter,likechapter,section,likesection,subsection,likesubsection}
+
+% put full TOC on each page
+\Configure{crosslinks+}{%
+  \bgroup
+  % container for the page toc
+  \Configure{tableofcontents}{\IgnorePar\EndP\HCode{<nav class="TOC">}\IgnorePar}
+  {\HCode{\Hnewline}}{\IgnorePar\HCode{</nav>\Hnewline}\ShowPar}{}{}%
+  \expandafter\TableOfContents\expandafter[\a:fulltocsections]% Print table of contents before crosslinks
+  \egroup
+  \ifvmode\IgnorePar\fi\EndP%
+  \HCode{<main class="main-content">\Hnewline<nav class="crosslinks-top">} }
+  {\HCode{</nav>\Hnewline}}%
+{\ifvmode\IgnorePar\fi\EndP%
+  \HCode{<nav class="crosslinks-bottom">}}{\HCode{</nav>}}{}{}
+
+% configuration for TOC on the main page, which is not configured by the previous command
+\Configure{tableofcontents}{\IgnorePar\EndP\HCode{<nav class="TOC">}\IgnorePar}
+{}{\IgnorePar\HCode{</nav>\Hnewline<main class="main-content">\Hnewline}\ShowPar}{}{}%
+
+% close the <main> element started in \Configure{crosslinks+} and \Configure{tableofcontents}
+\Configure{@/BODY}{\ifvmode\IgnorePar\fi\EndP\HCode{</main>}}
+
+\Css{nav.TOC > span{display:block;}}
+
+\else
+\Log:Note{For full TOC on each page compatible with the collapsetoc DOM filter,
+   use the `fulltoc' command line option.}
+\fi
+
 \Css{li p.indent { text-indent: 0em }}
 \Css{li p:first-child{ margin-top:0em; }}
 \Css{li p:last-child, li div:last-child { margin-bottom:0.5em; }}



More information about the tex-live-commits mailing list.