[latex3-commits] [latex3/tagpdf] luatex-softhyphen: Implement softhyphen substitution (b94bd3ea)
github at latex-project.org
github at latex-project.org
Sun Jul 21 08:39:46 CEST 2024
Repository : https://github.com/latex3/tagpdf
On branch : luatex-softhyphen
Link : https://github.com/latex3/tagpdf/commit/b94bd3ea6d24e9ff5b633ae82fdc8901353a5d12
>---------------------------------------------------------------
commit b94bd3ea6d24e9ff5b633ae82fdc8901353a5d12
Author: Marcel Fabian Krüger <tex at 2krueger.de>
Date: Sun Jul 21 08:39:46 2024 +0200
Implement softhyphen substitution
>---------------------------------------------------------------
b94bd3ea6d24e9ff5b633ae82fdc8901353a5d12
CHANGELOG.md | 6 ++++
tagpdf-backend.dtx | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
tagpdf.dtx | 20 +++++++++++++
3 files changed, 109 insertions(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0b33aacd..d1bd15cc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to the `tagpdf` package since the
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
this project uses date-based 'snapshot' version identifiers.
+## [Unreleased]
+
+### Added
+ - key activate/softhyphen and code to use soft hyphens for hyphenation
+ if supported by the font.
+
## [2024-06-20]
Version 0.99c
diff --git a/tagpdf-backend.dtx b/tagpdf-backend.dtx
index 7e19e1e9..f158a3c5 100644
--- a/tagpdf-backend.dtx
+++ b/tagpdf-backend.dtx
@@ -252,6 +252,12 @@ local iwfontattributeid = luatexbase.new_attribute ("g_@@_interwordfont_attr")
local tagunmarkedbool= token.create("g_@@_tagunmarked_bool")
local truebool = token.create("c_true_bool")
% \end{macrocode}
+% with this token we can query the state of the softhyphen boolean
+% and so detect if hyphens should be marked with ActualText
+% \begin{macrocode}
+local softhyphenbool = token.create("g_@@_softhyphen_bool")
+% \end{macrocode}
+
% Now a number of local versions from global tables.
% Not all is perhaps needed, most node variants were copied from lua-debug.
% \begin{macrocode}
@@ -286,6 +292,9 @@ local KERN = node.id("kern")
local PENALTY = node.id("penalty")
local LOCAL_PAR = node.id("local_par")
local MATH = node.id("math")
+
+local explicit_disc = 1
+local regular_disc = 3
% \end{macrocode}
% Now we setup the main table structure. ltx is used by other latex code too!
% \begin{macrocode}
@@ -1267,6 +1276,80 @@ function ltx.@@.func.output_parenttree (abspage)
end
% \end{macrocode}
% \end{macro}
+%
+% \begin{macro}
+% {
+% process_softhyphen_pre
+% process_softhyphen_post
+% }
+% First some local definitions. Since these are only needed locally everything gets wrapped into a block.
+% \begin{macrocode}
+do
+ local properties = node.get_properties_table()
+ local is_soft_hyphen_prop = 'tagpdf.rewrite-softhyphen.is_soft_hyphen'
+ local hyphen_char = 0x2D
+ local soft_hyphen_char = 0xAD
+% \end{macrocode}
+%
+% A lookup table to test if the font supports the soft hyphen glyph.
+% \begin{macrocode}
+ local softhyphen_fonts = setmetatable({}, {__index = function(t, fid)
+ local fdir = identifiers[fontid]
+ local format = fdir and fdir.format
+ local result = (format == 'opentype' or format == 'truetype')
+ local characters = fdir and fdir.characters
+ result = result and (characters and characters[soft_hyphen_char]) ~= nil
+ t[fid] = result
+ return result
+ end})
+% \end{macrocode}
+%
+% A pre shaping callback to mark hyphens as being hyphenation hyphens.
+% This runs before shaping to avoid affecting hyphens moved into
+% discretionaries during shaping.
+% \begin{macrocode}
+ local function process_softhyphen_pre(head, _context, _dir)
+ if softhyphenbool.mode ~= truebool.mode then return true end
+ for disc, sub in node.traverse_id(DISC, head) do
+ if sub == explicit_disc or sub == regular_disc then
+ for n, _ch, _f in node.traverse_char(disc.pre) do
+ local props = properties[n]
+ if not props then
+ props = {}
+ properties[n] = props
+ end
+ props[is_soft_hyphen_prop] = true
+ end
+ end
+ end
+ return true
+ end
+
+% \end{macrocode}
+%
+% Finally do the actual replacement after shaping. No checking for double processing here
+% since the operation is idempotent.
+% \begin{macrocode}
+ local function process_softhyphen_post(head, _context, _dir)
+ if softhyphenbool.mode ~= truebool.mode then return true end
+ for disc, sub in node.traverse_id(DISC, head) do
+ for n, ch, fid in node.traverse_glyph(disc.pre) do
+ local props = properties[n]
+ if softhyphen_fonts[fid] and ch == hyphen_char and props and props[is_soft_hyphen_prop] then
+ n.char = soft_hyphen_char
+ props.glyph_info = nil
+ end
+ end
+ end
+ return true
+ end
+
+ luatexbase.add_to_callback('pre_shaping_filter', process_softhyphen_pre, 'tagpdf.rewrite-softhyphen')
+ luatexbase.add_to_callback('post_shaping_filter', process_softhyphen_post, 'tagpdf.rewrite-softhyphen')
+end
+% \end{macrocode}
+% \end{macro}
+%
% \begin{macrocode}
%</lua>
% \end{macrocode}
diff --git a/tagpdf.dtx b/tagpdf.dtx
index e5857034..0941bc52 100644
--- a/tagpdf.dtx
+++ b/tagpdf.dtx
@@ -72,6 +72,12 @@
% marked up as artifact. The initial value is true.
% \end{function}
%
+% \begin{function}{activate/softhyphen (setup-key)}
+% This key allows to activates automatic handling of hyphens inserted
+% by hyphenation. It only is used in luamode and replaces hyphens
+% by U+00AD if the font supports this.
+% \end{function}
+%
% \begin{function}{page/tabsorder (setup-key), tabsorder (deprecated)}
% This sets the tabsorder on a page. The values are |row|, |column|, |structure| (default)
% or |none|. Currently this is set more or less globally. More finer control can be
@@ -354,6 +360,13 @@
% \end{macrocode}
% \end{variable}
%
+% \begin{variable}{\g_@@_softhyphen_bool}
+% This boolean controls if the code should try to automatically
+% handle hyphens from hyphenation. It is currently only used in luamode.
+% \begin{macrocode}
+\bool_new:N \g_@@_softhyphen_bool
+% \end{macrocode}
+% \end{variable}
% \section{Variants of l3 commands}
% \begin{macrocode}
\prg_generate_conditional_variant:Nnn \pdf_object_if_exist:n {e}{T,F,TF}
@@ -648,6 +661,13 @@
tagunmarked .bool_gset:N = \g_@@_tagunmarked_bool,
% \end{macrocode}
% \end{macro}
+% \begin{macro}{activate/softhyphen (setup-key)}
+% This key activates (in luamode) the handling of soft hyphens.
+% \begin{macrocode}
+ activate/softhyphen .bool_gset:N = \g_@@_softhyphen_bool,
+ activate/softhyphen .initial:n = false,
+% \end{macrocode}
+% \end{macro}
% \begin{macro}{page/tabsorder (setup-key),tabsorder (deprecated)}
% This sets the tabsorder on a page. The values are |row|, |column|, |structure| (default)
% or |none|. Currently this is set more or less globally. More finer control can be
More information about the latex3-commits
mailing list.