[luatex] asking for style/code critique
Werner LEMBERG
wl at gnu.org
Sun Dec 18 20:58:28 CET 2022
Folks,
attached is my first serious attempt to write a 'hyphenate' callback
(to be used for LilyPond's Texinfo documentation). It is actually my
first Lua program ever. Fortunately, initial tests indicate that it
works aas expected :-)
Please have a quick look and check whether you can see any big
problems – mainly stylewise (besides using two spaces for indentation
instead of three) but perhaps codewise, too.
Thanks in advance.
Werner
-------------- next part --------------
-- code.lua
--
-- Written 2022 by Werner Lemberg <wl at gnu.org>.
--
-- If the LilyPond documentation is compiled with LuaTeX (which is the
-- default), Texinfo's `@code` macro gets enhanced by this Lua code to
-- do the following things.
--
-- (0) For further analysis, break the argument to `@code` into words.
-- Words that already contain discretionaries and penalties (for
-- example, by inserting `@/` or `@-`) are ignored.
-- (1) Insert hyphenation points in camel-case words.
-- (2) Add possible line breaks after `-` and `_`.
-- (3) Avoid that single-character words at the start or the end of
-- the `@code` argument are positioned at the end or start of an
-- output line, respectively. A typical case with LilyPond code
-- is `@code{@{ ... @}}`.
--
-- There will be at least two characters at the start or the end of a
-- word before inserting a hyphenation point or a possible line break.
--
-- Note that this code only works if `@allowcodebreaks false` is set.
-- Some shorthands.
char_hyphen = 0x2d
umatch = unicode.utf8.match
uchar = unicode.utf8.char
DISC = node.id("disc")
GLUE = node.id("glue")
GLYPH = node.id("glyph")
HLIST = node.id("hlist")
PENALTY = node.id("penalty")
-- This value must be the same as set in `common-macros.itexi` (using
-- `\attribute`).
code_attribute = 200
typography = function(head)
local words = {}
local idx = 1
local word_start = 0
local word_end = 0
local word_len = 0
local prev_font = -1
local only_characters = true
-- Loop over all nodes to find start, end, and length of words, to
-- be stored in array `words`.
for n in node.traverse(head) do
local in_word = false
-- Handle only stuff in `@code`.
if node.has_attribute(n, code_attribute) then
-- Only characters typeset with same font are considered to be
-- in the same word.
if n.id == GLYPH and (prev_font == -1 or prev_font == n.font) then
in_word = true
end
-- We don't handle discretionaries and penalties outside of a
-- word.
if (n.id == DISC or n.id == PENALTY) and word_len > 0 then
in_word = true
only_characters = false
end
end
if in_word then
if word_len == 0 then
word_start = n
end
word_end = n
word_len = word_len + 1
if n.id == GLYPH then
prev_font = n.font
end
else
-- Ignore words that already contain discretionaries or
-- penalties.
if word_len > 0 and only_characters then
words[idx] = { word_start, word_end, word_len }
idx = idx + 1
end
only_characters = true
-- The current node might still start a new word.
if node.has_attribute(n, code_attribute) and n.id == GLYPH then
in_word = true
word_start = n
word_end = n
word_len = 1
prev_font = n.font
else
word_len = 0
prev_font = -1
end
end
end
-- Now loop over all collected words.
for _, word in ipairs(words) do
word_start, word_end, word_len = table.unpack(word)
-- Check for `[<letter>_] [<letter>_] [-_] <letter> [<letter>_]`
-- and insert a penalty after `-` (or `_`) if we have a hit.
if word_len >= 5 then
local start = word_start
local len = word_len
while len >= 5 do
local c1 = start
local c2 = c1.next
local c3 = c2.next
local c4 = c3.next
local c5 = c4.next
if umatch(uchar(c1.char), "[%l%u_]")
and umatch(uchar(c2.char), "[%l%u_]")
and umatch(uchar(c3.char), "[_-]")
and umatch(uchar(c4.char), "[%l%u]")
and umatch(uchar(c5.char), "[%l%u_]") then
local pen = node.new(PENALTY)
pen.penalty = tex.hyphenpenalty
node.set_attribute(pen, code_attribute, 1)
node.insert_after(head, c3, pen)
len = len - 3
start = c4
else
len = len - 1
start = c2
end
end
end
-- Check for `<letter> <lowercase> <uppercase> <letter>` and
-- insert the equivalent to `\discretionary{-}{}{}` after
-- `<lowercase>` if we have a hit.
if word_len >= 4 then
local start = word_start
local len = word_len
while len >= 4 do
local c1 = start
local c2 = c1.next
local c3 = c2.next
local c4 = c3.next
-- The previous while-loop might have inserted penalty nodes;
-- we thus have to additionally check `c1` to `c4` for
-- validness.
if c1.id == GLYPH
and c2.id == GLYPH
and c3.id == GLYPH
and c4.id == GLYPH
and umatch(uchar(c1.char), "[%l%u]")
and umatch(uchar(c2.char), "%l")
and umatch(uchar(c3.char), "%u")
and umatch(uchar(c4.char), "[%l%u]") then
local hyphen = node.new(GLYPH)
hyphen.subtype = 1
hyphen.font = c2.font
hyphen.char = char_hyphen
local disc = node.new(DISC)
disc.subtype = 1
disc.penalty = tex.hyphenpenalty
disc.pre = hyphen
node.set_attribute(disc, code_attribute, 1)
node.insert_after(head, c2, disc)
len = len - 2
start = c3
else
len = len - 1
start = c2
end
end
end
end -- end of for-loop
for n in node.traverse(head) do
-- Check whether there is a single character at the beginning of
-- `@code`, followed by a space. If we have a hit, insert a
-- penalty after the character.
local non_code = n
if not node.has_attribute(non_code, code_attribute) then
local char = non_code.next
if char
and node.has_attribute(char, code_attribute)
and char.id == GLYPH then
local space = char.next
if space
and node.has_attribute(space, code_attribute)
and space.id == GLUE then
local pen = node.new(PENALTY)
pen.penalty = 10000
node.set_attribute(pen, code_attribute, 1)
node.insert_after(head, char, pen)
end
end
end
-- Check whether there is a single character at the end of
-- `@code`, preceded by a space. If we have a hit, insert a
-- penalty before the space.
local space = n
if node.has_attribute(space, code_attribute)
and space.id == GLUE then
local char = space.next
if char
and node.has_attribute(char, code_attribute)
and char.id == GLYPH then
-- We actually have to check for one more node because `@code`
-- ends with a call to `\null`, which creates an empty hbox.
local hbox = char.next
if hbox
and node.has_attribute(hbox, code_attribute)
and hbox.id == HLIST then
local non_code = hbox.next
if non_code
and not node.has_attribute(non_code, code_attribute) then
local pen = node.new(PENALTY)
pen.penalty = 10000
node.set_attribute(pen, code_attribute, 1)
node.insert_before(head, space, pen)
end
end
end
end
end -- end of for-loop
end
-- eof
More information about the luatex
mailing list.