[latex3-commits] [git/LaTeX3-latex3-babel] master: \babelprehyphenation - speed boost with 1-1 and ∞-1 string replacements. (7e36bc9)
Javier
email at dante.de
Fri Nov 20 17:39:42 CET 2020
Repository : https://github.com/latex3/babel
On branch : master
Link : https://github.com/latex3/babel/commit/7e36bc99f7327769f0e1182a6a28a6935ba0217f
>---------------------------------------------------------------
commit 7e36bc99f7327769f0e1182a6a28a6935ba0217f
Author: Javier <email at localhost>
Date: Fri Nov 20 17:39:42 2020 +0100
\babelprehyphenation - speed boost with 1-1 and ∞-1 string replacements.
>---------------------------------------------------------------
7e36bc99f7327769f0e1182a6a28a6935ba0217f
README.md | 3 ++-
babel.dtx | 72 +++++++++++++++++++++++++++++++++++++----------------------
babel.ins | 2 +-
babel.pdf | Bin 815841 -> 816205 bytes
bbcompat.dtx | 2 +-
5 files changed, 49 insertions(+), 30 deletions(-)
diff --git a/README.md b/README.md
index 5970533..a7cd5cd 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-## Babel 3.51.2195
+## Babel 3.51.2197
This package manages culturally-determined typographical (and other)
rules, and hyphenation patterns for a wide range of languages. Many
@@ -44,6 +44,7 @@ respective authors.
### Latest changes
```
3.52 2020-12-??
+ - Improved \babelprehyphenation
- Fixes:
- A couple of issues with \localeinfo and \getlocaleproperty
(#102, #105).
diff --git a/babel.dtx b/babel.dtx
index 1e8d76e..6109f3d 100644
--- a/babel.dtx
+++ b/babel.dtx
@@ -31,7 +31,7 @@
%
% \iffalse
%<*filedriver>
-\ProvidesFile{babel.dtx}[2020/11/18 v3.51.2195 The Babel package]
+\ProvidesFile{babel.dtx}[2020/11/20 v3.51.2197 The Babel package]
\documentclass{ltxdoc}
\GetFileInfo{babel.dtx}
\usepackage{fontspec}
@@ -4823,8 +4823,8 @@ help from Bernd Raichle, for which I am grateful.
% \section{Tools}
%
% \begin{macrocode}
-%<<version=3.51.2195>>
-%<<date=2020/11/18>>
+%<<version=3.51.2197>>
+%<<date=2020/11/20>>
% \end{macrocode}
%
% \textbf{Do not use the following macros in \texttt{ldf} files. They
@@ -6694,7 +6694,6 @@ help from Bernd Raichle, for which I am grateful.
\IfFileExists{\CurrentOption.ldf}%
{\bbl at load@language{\CurrentOption}}%
{#1\bbl at load@language{#2}#3}}
-% \DeclareOption{afrikaans}{\bbl at try@load at lang{}{dutch}{}}
\DeclareOption{hebrew}{%
\input{rlbabel.def}%
\bbl at load@language{hebrew}}
@@ -13714,7 +13713,6 @@ end
&% pass
else
- tex.write_nl(word_string)
return word_string, word_nodes, item, lang
end
@@ -13740,15 +13738,18 @@ end
local p = lbkr[lang][k].pattern
local r = lbkr[lang][k].replace
- &% For every match
+ &% For every match.
while true do
local matches = { u.match(w, p) }
if #matches < 2 then break end
+ &% Get and remove empty captures (with (), which return a
+ &% number with the position), and keep actual captures
+ &% (from (...)), if any, in matches.
local first = table.remove(matches, 1)
local last = table.remove(matches, #matches)
- &% Fix offsets, from bytes to unicode.
+ &% Fix offsets, from bytes to unicode. Explained above.
first = u.len(w:sub(1, first-1)) + 1
last = u.len(w:sub(1, last-1))
@@ -13877,6 +13878,7 @@ end
else
return word_string, word_nodes, item, lang
+
end
item = item.next
@@ -13889,7 +13891,7 @@ end
local lbkr = Babel.linebreaking.pre_replacements
local word_head = head
- while true do
+ while true do &% for each subtext block
local w, wn, nw, lang = Babel.fetch_subtext(word_head)
if not lang then return head end
@@ -13897,16 +13899,17 @@ end
break
end
- for k=1, #lbkr[lang] do
+ for k=1, #lbkr[lang] do &% for each saved posthyphen
local p = lbkr[lang][k].pattern
local r = lbkr[lang][k].replace
while true do
local matches = { u.match(w, p) }
+ local reparse = true
if #matches < 2 then break end
local first = table.remove(matches, 1)
- local last = table.remove(matches, #matches)
+ local last = table.remove(matches, #matches)
&% Fix offsets, from bytes to unicode.
first = u.len(w:sub(1, first-1)) + 1
@@ -13915,34 +13918,48 @@ end
local new &% used when inserting and removing nodes
local changed = 0
- &% This loop traverses the replace list and takes the
- &% corresponding actions
- for q = first, last do
- local crep = r[q-first+1]
- local char_node = wn[q]
+ &% This loop traverses the matched substring and takes the
+ &% corresponding action stored in the replacement list.
+ &% sc is the position in substr nodes / string
+ &% rc is the replacement table index
+ sc = first-1
+ rc = 0
+ while rc < last-first+1 do
+ sc = sc + 1
+ rc = rc + 1
+ local crep = r[rc]
+ local char_node = wn[sc]
local char_base = char_node
if crep and crep.data then
char_base = wn[crep.data+first-1]
end
- if crep == {} then
- break
- elseif crep == nil then
+ if crep and next(crep) == nil then &% {}
+ reparse = false
+ elseif crep == nil then &% remove
changed = changed + 1
node.remove(head, char_node)
+ table.remove(wn, sc)
+ reparse = false
+ w = u.sub(w, 1, sc-1) .. u.sub(w, sc+1)
+ sc = sc - 1
elseif crep and crep.insert then
&% print(crep.insert)
elseif crep and crep.string then
changed = changed + 1
local str = crep.string(matches)
if str == '' then
- if q == 1 then
+ if sc == 1 then
word_head = char_node.next
end
head, new = node.remove(head, char_node)
elseif char_node.id == 29 and u.len(str) == 1 then
+ &% For one-to-one can we modifiy directly the
+ &% values without re-fetching.
char_node.char = string.utfvalue(str)
+ reparse = false
+ w = u.sub(w, 1, sc-1) .. str .. u.sub(w, sc+1)
else
local n
for s in string.utfvalues(str) do
@@ -13952,7 +13969,7 @@ end
n = node.copy(char_base)
end
n.char = s
- if q == 1 then
+ if sc == 1 then
head, new = node.insert_before(head, char_node, n)
word_head = new
else
@@ -13963,19 +13980,21 @@ end
node.remove(head, char_node)
end &% string length
end &% if char and char.string
- end &% for char in match
+ end &% while char in match
+
if changed > 20 then
texio.write('Too many changes. Ignoring the rest.')
elseif changed > 0 then
- &% For one-to-one can we modifiy directly the
- &% values without re-fetching? Very likely.
- w, wn, nw = Babel.fetch_subtext(word_head)
+ if reparse then
+ w, wn, nw = Babel.fetch_subtext(word_head)
+ else
+ reparse = true
+ end
end
-
end &% for match
end &% for patterns
word_head = nw
- end &% for words
+ end &% for subtext
return head
end
&%%% end of preliminary code for \babelprehyphenation
@@ -14046,7 +14065,6 @@ end
rep = rep:gsub( '(pre)%s*=%s*([^%s,]*)', Babel.capture_func)
rep = rep:gsub( '(post)%s*=%s*([^%s,]*)', Babel.capture_func)
rep = rep:gsub('(string)%s*=%s*([^%s,]*)', Babel.capture_func)
- tex.print([[\string\babeltempa{{]] .. rep .. [[}}]])
}}}&%
\directlua{
local lbkr = Babel.linebreaking.post_replacements
diff --git a/babel.ins b/babel.ins
index 766c673..7e7afca 100644
--- a/babel.ins
+++ b/babel.ins
@@ -26,7 +26,7 @@
%% and covered by LPPL is defined by the unpacking scripts (with
%% extension .ins) which are part of the distribution.
%%
-\def\filedate{2020/11/18}
+\def\filedate{2020/11/20}
\def\batchfile{babel.ins}
\input docstrip.tex
diff --git a/babel.pdf b/babel.pdf
index d92e113..5e272eb 100644
Binary files a/babel.pdf and b/babel.pdf differ
diff --git a/bbcompat.dtx b/bbcompat.dtx
index 05140e9..826e25e 100644
--- a/bbcompat.dtx
+++ b/bbcompat.dtx
@@ -30,7 +30,7 @@
%
% \iffalse
%<*dtx>
-\ProvidesFile{bbcompat.dtx}[2020/11/18 v3.51.2195]
+\ProvidesFile{bbcompat.dtx}[2020/11/20 v3.51.2197]
%</dtx>
%
%% File 'bbcompat.dtx'
More information about the latex3-commits
mailing list.