[latex3-commits] [latex3/tagpdf] luatex-softhyphen: Implement softhyphen substitution (b94bd3ea)

github at latex-project.org github at latex-project.org
Sun Jul 21 08:39:46 CEST 2024


Repository : https://github.com/latex3/tagpdf
On branch  : luatex-softhyphen
Link       : https://github.com/latex3/tagpdf/commit/b94bd3ea6d24e9ff5b633ae82fdc8901353a5d12

>---------------------------------------------------------------

commit b94bd3ea6d24e9ff5b633ae82fdc8901353a5d12
Author: Marcel Fabian Krüger <tex at 2krueger.de>
Date:   Sun Jul 21 08:39:46 2024 +0200

    Implement softhyphen substitution


>---------------------------------------------------------------

b94bd3ea6d24e9ff5b633ae82fdc8901353a5d12
 CHANGELOG.md       |  6 ++++
 tagpdf-backend.dtx | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tagpdf.dtx         | 20 +++++++++++++
 3 files changed, 109 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0b33aacd..d1bd15cc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to the `tagpdf` package since the
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 this project uses date-based 'snapshot' version identifiers.
  
+## [Unreleased]
+
+### Added
+ - key activate/softhyphen and code to use soft hyphens for hyphenation
+   if supported by the font.
+
 ## [2024-06-20]
 Version 0.99c
 
diff --git a/tagpdf-backend.dtx b/tagpdf-backend.dtx
index 7e19e1e9..f158a3c5 100644
--- a/tagpdf-backend.dtx
+++ b/tagpdf-backend.dtx
@@ -252,6 +252,12 @@ local iwfontattributeid  = luatexbase.new_attribute ("g_@@_interwordfont_attr")
 local tagunmarkedbool= token.create("g_@@_tagunmarked_bool")
 local truebool       = token.create("c_true_bool")
 %    \end{macrocode}
+% with this token we can query the state of the softhyphen boolean
+% and so detect if hyphens should be marked with ActualText
+%    \begin{macrocode}
+local softhyphenbool = token.create("g_@@_softhyphen_bool")
+%    \end{macrocode}
+
 % Now a number of local versions from global tables.
 % Not all is perhaps needed, most node variants were copied from lua-debug.
 %    \begin{macrocode}
@@ -286,6 +292,9 @@ local KERN           = node.id("kern")
 local PENALTY        = node.id("penalty")
 local LOCAL_PAR      = node.id("local_par")
 local MATH           = node.id("math")
+
+local explicit_disc = 1
+local regular_disc = 3
 %    \end{macrocode}
 % Now we setup the main table structure. ltx is used by other latex code too!
 %    \begin{macrocode}
@@ -1267,6 +1276,80 @@ function ltx.@@.func.output_parenttree (abspage)
 end
 %    \end{macrocode}
 % \end{macro}
+%
+% \begin{macro}
+%   {
+%    process_softhyphen_pre
+%    process_softhyphen_post
+%   }
+%  First some local definitions. Since these are only needed locally everything gets wrapped into a block.
+%    \begin{macrocode}
+do
+  local properties = node.get_properties_table()
+  local is_soft_hyphen_prop = 'tagpdf.rewrite-softhyphen.is_soft_hyphen'
+  local hyphen_char = 0x2D
+  local soft_hyphen_char = 0xAD
+%    \end{macrocode}
+%
+%  A lookup table to test if the font supports the soft hyphen glyph.
+%    \begin{macrocode}
+  local softhyphen_fonts = setmetatable({}, {__index = function(t, fid)
+    local fdir = identifiers[fontid]
+    local format = fdir and fdir.format
+    local result = (format == 'opentype' or format == 'truetype')
+    local characters = fdir and fdir.characters
+    result = result and (characters and characters[soft_hyphen_char]) ~= nil
+    t[fid] = result
+    return result
+  end})
+%    \end{macrocode}
+%
+%  A pre shaping callback to mark hyphens as being hyphenation hyphens.
+%  This runs before shaping to avoid affecting hyphens moved into
+%  discretionaries during shaping.
+%    \begin{macrocode}
+  local function process_softhyphen_pre(head, _context, _dir)
+    if softhyphenbool.mode ~= truebool.mode then return true end
+    for disc, sub in node.traverse_id(DISC, head) do
+      if sub == explicit_disc or sub == regular_disc then
+        for n, _ch, _f in node.traverse_char(disc.pre) do
+          local props = properties[n]
+          if not props then
+            props = {}
+            properties[n] = props
+          end
+          props[is_soft_hyphen_prop] = true
+        end
+      end
+    end
+    return true
+  end
+
+%    \end{macrocode}
+%
+%  Finally do the actual replacement after shaping. No checking for double processing here
+%  since the operation is idempotent.
+%    \begin{macrocode}
+  local function process_softhyphen_post(head, _context, _dir)
+    if softhyphenbool.mode ~= truebool.mode then return true end
+    for disc, sub in node.traverse_id(DISC, head) do
+      for n, ch, fid in node.traverse_glyph(disc.pre) do
+        local props = properties[n]
+        if softhyphen_fonts[fid] and ch == hyphen_char and props and props[is_soft_hyphen_prop] then
+          n.char = soft_hyphen_char
+          props.glyph_info = nil
+        end
+      end
+    end
+    return true
+  end
+
+  luatexbase.add_to_callback('pre_shaping_filter', process_softhyphen_pre, 'tagpdf.rewrite-softhyphen')
+  luatexbase.add_to_callback('post_shaping_filter', process_softhyphen_post, 'tagpdf.rewrite-softhyphen')
+end
+%    \end{macrocode}
+% \end{macro}
+%
 %    \begin{macrocode}
 %</lua>
 %    \end{macrocode}
diff --git a/tagpdf.dtx b/tagpdf.dtx
index e5857034..0941bc52 100644
--- a/tagpdf.dtx
+++ b/tagpdf.dtx
@@ -72,6 +72,12 @@
 % marked up as artifact. The initial value is true.
 % \end{function}
 %
+% \begin{function}{activate/softhyphen (setup-key)}
+% This key allows to activates automatic handling of hyphens inserted
+% by hyphenation. It only is used in luamode and replaces hyphens
+% by U+00AD if the font supports this.
+% \end{function}
+% 
 % \begin{function}{page/tabsorder (setup-key), tabsorder (deprecated)}
 % This sets the tabsorder on a page. The values are |row|, |column|, |structure| (default)
 % or |none|. Currently this is set more or less globally. More finer control can be
@@ -354,6 +360,13 @@
 %    \end{macrocode}
 % \end{variable}
 %
+% \begin{variable}{\g_@@_softhyphen_bool}
+% This boolean controls if the code should try to automatically
+% handle hyphens from hyphenation. It is currently only used in luamode.
+%    \begin{macrocode}
+\bool_new:N \g_@@_softhyphen_bool
+%    \end{macrocode}
+% \end{variable}
 % \section{Variants of l3 commands}
 %     \begin{macrocode}
 \prg_generate_conditional_variant:Nnn \pdf_object_if_exist:n {e}{T,F,TF}
@@ -648,6 +661,13 @@
     tagunmarked .bool_gset:N = \g_@@_tagunmarked_bool,
 %    \end{macrocode}
 % \end{macro}
+% \begin{macro}{activate/softhyphen (setup-key)}
+% This key activates (in luamode) the handling of soft hyphens.
+%    \begin{macrocode}
+    activate/softhyphen     .bool_gset:N = \g_@@_softhyphen_bool,
+    activate/softhyphen     .initial:n  = false,
+%    \end{macrocode}
+% \end{macro}
 % \begin{macro}{page/tabsorder (setup-key),tabsorder (deprecated)}
 % This sets the tabsorder on a page. The values are |row|, |column|, |structure| (default)
 % or |none|. Currently this is set more or less globally. More finer control can be





More information about the latex3-commits mailing list.