[latex3-commits] [git/LaTeX3-latex3-babel] master: \babelprehyphenation - partial merge with 'post', insert penalty (022c799)
Javier
email at dante.de
Sat Nov 21 21:48:21 CET 2020
Repository : https://github.com/latex3/babel
On branch : master
Link : https://github.com/latex3/babel/commit/022c799283c75506e3b72fa28a51242317dc8f9f
>---------------------------------------------------------------
commit 022c799283c75506e3b72fa28a51242317dc8f9f
Author: Javier <email at localhost>
Date: Sat Nov 21 21:48:21 2020 +0100
\babelprehyphenation - partial merge with 'post', insert penalty
>---------------------------------------------------------------
022c799283c75506e3b72fa28a51242317dc8f9f
README.md | 2 +-
babel.dtx | 252 ++++++++++++++++++++++++-----------------------------------
babel.ins | 2 +-
babel.pdf | Bin 816205 -> 815299 bytes
bbcompat.dtx | 2 +-
5 files changed, 105 insertions(+), 153 deletions(-)
diff --git a/README.md b/README.md
index a7cd5cd..73e183f 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-## Babel 3.51.2197
+## Babel 3.51.2198
This package manages culturally-determined typographical (and other)
rules, and hyphenation patterns for a wide range of languages. Many
diff --git a/babel.dtx b/babel.dtx
index 6109f3d..99fe21d 100644
--- a/babel.dtx
+++ b/babel.dtx
@@ -31,7 +31,7 @@
%
% \iffalse
%<*filedriver>
-\ProvidesFile{babel.dtx}[2020/11/20 v3.51.2197 The Babel package]
+\ProvidesFile{babel.dtx}[2020/11/21 v3.51.2198 The Babel package]
\documentclass{ltxdoc}
\GetFileInfo{babel.dtx}
\usepackage{fontspec}
@@ -4823,8 +4823,8 @@ help from Bernd Raichle, for which I am grateful.
% \section{Tools}
%
% \begin{macrocode}
-%<<version=3.51.2197>>
-%<<date=2020/11/20>>
+%<<version=3.51.2198>>
+%<<date=2020/11/21>>
% \end{macrocode}
%
% \textbf{Do not use the following macros in \texttt{ldf} files. They
@@ -13651,9 +13651,11 @@ end
\catcode`\%=12
\catcode`\&=14
\directlua{
- Babel.linebreaking.post_replacements = {}
- Babel.linebreaking.pre_replacements = {}
+ Babel.linebreaking.replacements = {}
+ Babel.linebreaking.replacements[0] = {} &% pre
+ Babel.linebreaking.replacements[1] = {} &% post
+ &% Discretionaries contain strings as nodes
function Babel.str_to_nodes(fn, matches, base)
local n, head, last
if fn == nil then return nil end
@@ -13673,13 +13675,15 @@ end
return head
end
- function Babel.fetch_word(head, mode)
+ Babel.fetch_subtext = {}
+
+ Babel.fetch_subtext[1] = function(head)
local word_string = ''
local word_nodes = {}
local lang
local item = head
local inmath = false
- local mode = 0 &% 'word' -- first steps in merging with subtext
+ local mode = 0 &%%%% 'word' first steps in merging with subtext
while item do
@@ -13720,123 +13724,10 @@ end
end
end
- function Babel.post_hyphenate_replace(head)
- local u = unicode.utf8
- local lbkr = Babel.linebreaking.post_replacements
- local word_head = head
-
- while true do
- local w, wn, nw, lang = Babel.fetch_word(word_head)
- if not lang then return head end
-
- if not lbkr[lang] then
- break
- end
-
- &% For every pattern
- for k=1, #lbkr[lang] do
- local p = lbkr[lang][k].pattern
- local r = lbkr[lang][k].replace
-
- &% For every match.
- while true do
- local matches = { u.match(w, p) }
- if #matches < 2 then break end
-
- &% Get and remove empty captures (with (), which return a
- &% number with the position), and keep actual captures
- &% (from (...)), if any, in matches.
- local first = table.remove(matches, 1)
- local last = table.remove(matches, #matches)
-
- &% Fix offsets, from bytes to unicode. Explained above.
- first = u.len(w:sub(1, first-1)) + 1
- last = u.len(w:sub(1, last-1))
-
- local new &% used when inserting and removing nodes
- local changed = 0
-
- &% This loop traverses the replace list and takes the
- &% corresponding actions
- for q = first, last do
- local crep = r[q-first+1]
- local char_node = wn[q]
- local char_base = char_node
-
- if crep and crep.data then
- char_base = wn[crep.data+first-1]
- end
-
- if crep == {} then
- break
- elseif crep == nil then
- changed = changed + 1
- node.remove(head, char_node)
- elseif crep and (crep.pre or crep.no or crep.post) then
- changed = changed + 1
- d = node.new(7, 0) &% (disc, discretionary)
- d.pre = Babel.str_to_nodes(crep.pre, matches, char_base)
- d.post = Babel.str_to_nodes(crep.post, matches, char_base)
- d.replace = Babel.str_to_nodes(crep.no, matches, char_base)
- d.attr = char_base.attr
- if crep.pre == nil then &% TeXbook p96
- d.penalty = crep.penalty or tex.hyphenpenalty
- else
- d.penalty = crep.penalty or tex.exhyphenpenalty
- end
- head, new = node.insert_before(head, char_node, d)
- node.remove(head, char_node)
- if q == 1 then
- word_head = new
- end
- elseif crep and crep.string then
- changed = changed + 1
- local str = crep.string(matches)
- if str == '' then
- if q == 1 then
- word_head = char_node.next
- end
- head, new = node.remove(head, char_node)
- elseif char_node.id == 29 and u.len(str) == 1 then
- char_node.char = string.utfvalue(str)
- else
- local n
- for s in string.utfvalues(str) do
- if char_node.id == 7 then
- log('Automatic hyphens cannot be replaced, just removed.')
- else
- n = node.copy(char_base)
- end
- n.char = s
- if q == 1 then
- head, new = node.insert_before(head, char_node, n)
- word_head = new
- else
- node.insert_before(head, char_node, n)
- end
- end
-
- node.remove(head, char_node)
- end &% string length
- end &% if char and char.string
- end &% for char in match
- if changed > 20 then
- texio.write('Too many changes. Ignoring the rest.')
- elseif changed > 0 then
- w, wn, nw = Babel.fetch_word(word_head)
- end
-
- end &% for match
- end &% for patterns
- word_head = nw
- end &% for words
- return head
- end
-
- &%%%
+ &%%%
&% Preliminary code for \babelprehyphenation
- &% TODO. Copypaste pattern. Merge with fetch_word
- function Babel.fetch_subtext(head)
+ &% TODO. Copypaste pattern. Merge
+ Babel.fetch_subtext[0] = function(head)
local word_string = ''
local word_nodes = {}
local lang
@@ -13885,39 +13776,61 @@ end
end
end
- &% TODO. Copypaste pattern. Merge with pre_hyphenate_replace
function Babel.pre_hyphenate_replace(head)
+ Babel.hyphenate_replace(head, 0)
+ end
+
+ function Babel.post_hyphenate_replace(head)
+ Babel.hyphenate_replace(head, 1)
+ end
+
+ function Babel.hyphenate_replace(head, mode)
local u = unicode.utf8
- local lbkr = Babel.linebreaking.pre_replacements
+ local lbkr = Babel.linebreaking.replacements[mode]
+
local word_head = head
while true do &% for each subtext block
- local w, wn, nw, lang = Babel.fetch_subtext(word_head)
+
+ local w, wn, nw, lang = Babel.fetch_subtext[mode](word_head)
if not lang then return head end
if not lbkr[lang] then
break
end
- for k=1, #lbkr[lang] do &% for each saved posthyphen
+ &% For each saved posthyphen
+ for k=1, #lbkr[lang] do
local p = lbkr[lang][k].pattern
local r = lbkr[lang][k].replace
+ local last_match = 0
+
+ & print('====' .. p)
+
+ &% For every match.
while true do
- local matches = { u.match(w, p) }
- local reparse = true
+ local new &% used when inserting and removing nodes
+ local changed = 0
+ local refetch = false
+
+ local matches = { u.match(w, p, last_match) }
if #matches < 2 then break end
+ &% Get and remove empty captures (with (), which return a
+ &% number with the position), and keep actual captures
+ &% (from (...)), if any, in matches.
local first = table.remove(matches, 1)
- local last = table.remove(matches, #matches)
+ local last = table.remove(matches, #matches)
+ local save_last = last
- &% Fix offsets, from bytes to unicode.
+ &% print('*')
+ &% print(first, last, w)
+
+ &% Fix offsets, from bytes to unicode. Explained above.
first = u.len(w:sub(1, first-1)) + 1
last = u.len(w:sub(1, last-1))
- local new &% used when inserting and removing nodes
- local changed = 0
-
&% This loop traverses the matched substring and takes the
&% corresponding action stored in the replacement list.
&% sc is the position in substr nodes / string
@@ -13936,20 +13849,56 @@ end
end
if crep and next(crep) == nil then &% {}
- reparse = false
+ &% pass
+
elseif crep == nil then &% remove
changed = changed + 1
+ &% print('*')
+ &% print(sc, last_match, w)
node.remove(head, char_node)
table.remove(wn, sc)
- reparse = false
w = u.sub(w, 1, sc-1) .. u.sub(w, sc+1)
+ last_match = utf8.offset(w, sc)
+ &% print(sc, last_match, w)
sc = sc - 1
- elseif crep and crep.insert then
- &% print(crep.insert)
+
+ elseif mode == 1 and crep and (crep.pre or crep.no or crep.post) then
+ changed = changed + 1
+ refetch = true
+ d = node.new(7, 0) &% (disc, discretionary)
+ d.pre = Babel.str_to_nodes(crep.pre, matches, char_base)
+ d.post = Babel.str_to_nodes(crep.post, matches, char_base)
+ d.replace = Babel.str_to_nodes(crep.no, matches, char_base)
+ d.attr = char_base.attr
+ if crep.pre == nil then &% TeXbook p96
+ d.penalty = crep.penalty or tex.hyphenpenalty
+ else
+ d.penalty = crep.penalty or tex.exhyphenpenalty
+ end
+ head, new = node.insert_before(head, char_node, d)
+ node.remove(head, char_node)
+ if sc == 1 then
+ word_head = new
+ end
+
+ elseif mode == 0 and crep and crep.penalty then
+ if crep.insert then
+ changed = changed + 1
+ d = node.new(14, 0) &% (penalty, userpenalty)
+ d.attr = char_base.attr
+ d.penalty = crep.penalty
+ head, new = node.insert_before(head, char_node, d)
+ if sc == 1 then
+ word_head = new
+ end
+ last_match = save_last &% is utf8.offset(w, sc+1) ok?
+ end
+
elseif crep and crep.string then
changed = changed + 1
local str = crep.string(matches)
if str == '' then
+ refetch = true
if sc == 1 then
word_head = char_node.next
end
@@ -13958,9 +13907,10 @@ end
&% For one-to-one can we modifiy directly the
&% values without re-fetching.
char_node.char = string.utfvalue(str)
- reparse = false
w = u.sub(w, 1, sc-1) .. str .. u.sub(w, sc+1)
+ last_match = save_last &% utf8.offset(w, sc)
else
+ refetch = true
local n
for s in string.utfvalues(str) do
if char_node.id == 7 then
@@ -13980,30 +13930,29 @@ end
node.remove(head, char_node)
end &% string length
end &% if char and char.string
- end &% while char in match
+ end &% for char in match
- if changed > 20 then
+ if changed > 20 then &% TODO. Useful?
texio.write('Too many changes. Ignoring the rest.')
elseif changed > 0 then
- if reparse then
- w, wn, nw = Babel.fetch_subtext(word_head)
+ if refetch then
+ w, wn, nw, lang = Babel.fetch_subtext[mode](word_head)
else
- reparse = true
+ refetch = true
end
end
+
end &% for match
end &% for patterns
word_head = nw
- end &% for subtext
+ end &% for words
return head
end
- &%%% end of preliminary code for \babelprehyphenation
-
- &% The following functions belong to the next macro
&% This table stores capture maps, numbered consecutively
Babel.capture_maps = {}
+ &% The following functions belong to the next macro
function Babel.capture_func(key, cap)
local ret = "[[" .. cap:gsub('{([0-9])}', "]]..m[%1]..[[") .. "]]"
ret = ret:gsub('{([0-9])|([^|]+)|(.-)}', Babel.capture_func_map)
@@ -14061,13 +14010,15 @@ end
{\bbl at add@list\babeltempb{nil}}&%
{\directlua{
local rep = [[##1]]
+ rep = rep:gsub('^%s*(insert)%s*,', 'insert = true, ')
rep = rep:gsub( '(no)%s*=%s*([^%s,]*)', Babel.capture_func)
rep = rep:gsub( '(pre)%s*=%s*([^%s,]*)', Babel.capture_func)
rep = rep:gsub( '(post)%s*=%s*([^%s,]*)', Babel.capture_func)
rep = rep:gsub('(string)%s*=%s*([^%s,]*)', Babel.capture_func)
+ tex.print([[\string\babeltempa{{]] .. rep .. [[}}]])
}}}&%
\directlua{
- local lbkr = Babel.linebreaking.post_replacements
+ local lbkr = Babel.linebreaking.replacements[1]
local u = unicode.utf8
&% Convert pattern:
local patt = string.gsub([==[#2]==], '%s', '')
@@ -14096,11 +14047,12 @@ end
{\bbl at add@list\babeltempb{nil}}&%
{\directlua{
local rep = [[##1]]
+ rep = rep:gsub('^%s*(insert)%s*,', 'insert = true, ')
rep = rep:gsub('(string)%s*=%s*([^%s,]*)', Babel.capture_func)
tex.print([[\string\babeltempa{{]] .. rep .. [[}}]])
}}}&%
\directlua{
- local lbkr = Babel.linebreaking.pre_replacements
+ local lbkr = Babel.linebreaking.replacements[0]
local u = unicode.utf8
&% Convert pattern:
local patt = string.gsub([==[#2]==], '%s', '')
diff --git a/babel.ins b/babel.ins
index 7e7afca..3bae242 100644
--- a/babel.ins
+++ b/babel.ins
@@ -26,7 +26,7 @@
%% and covered by LPPL is defined by the unpacking scripts (with
%% extension .ins) which are part of the distribution.
%%
-\def\filedate{2020/11/20}
+\def\filedate{2020/11/21}
\def\batchfile{babel.ins}
\input docstrip.tex
diff --git a/babel.pdf b/babel.pdf
index 5e272eb..a07d953 100644
Binary files a/babel.pdf and b/babel.pdf differ
diff --git a/bbcompat.dtx b/bbcompat.dtx
index 826e25e..73dc94a 100644
--- a/bbcompat.dtx
+++ b/bbcompat.dtx
@@ -30,7 +30,7 @@
%
% \iffalse
%<*dtx>
-\ProvidesFile{bbcompat.dtx}[2020/11/20 v3.51.2197]
+\ProvidesFile{bbcompat.dtx}[2020/11/21 v3.51.2198]
%</dtx>
%
%% File 'bbcompat.dtx'
More information about the latex3-commits
mailing list.