[latex3-commits] [git/LaTeX3-latex3-babel] master: \babelprehyphenation - speed boost with 1-1 and ∞-1 string replacements. (7e36bc9)

Javier email at dante.de
Fri Nov 20 17:39:42 CET 2020


Repository : https://github.com/latex3/babel
On branch  : master
Link       : https://github.com/latex3/babel/commit/7e36bc99f7327769f0e1182a6a28a6935ba0217f

>---------------------------------------------------------------

commit 7e36bc99f7327769f0e1182a6a28a6935ba0217f
Author: Javier <email at localhost>
Date:   Fri Nov 20 17:39:42 2020 +0100

    \babelprehyphenation - speed boost with 1-1 and ∞-1 string replacements.


>---------------------------------------------------------------

7e36bc99f7327769f0e1182a6a28a6935ba0217f
 README.md    |   3 ++-
 babel.dtx    |  72 +++++++++++++++++++++++++++++++++++++----------------------
 babel.ins    |   2 +-
 babel.pdf    | Bin 815841 -> 816205 bytes
 bbcompat.dtx |   2 +-
 5 files changed, 49 insertions(+), 30 deletions(-)

diff --git a/README.md b/README.md
index 5970533..a7cd5cd 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-## Babel 3.51.2195
+## Babel 3.51.2197
 
 This package manages culturally-determined typographical (and other)
 rules, and hyphenation patterns for a wide range of languages.  Many
@@ -44,6 +44,7 @@ respective authors.
 ### Latest changes
 ```
 3.52   2020-12-??
+       - Improved \babelprehyphenation
        - Fixes:
          - A couple of issues with \localeinfo and \getlocaleproperty
            (#102, #105).
diff --git a/babel.dtx b/babel.dtx
index 1e8d76e..6109f3d 100644
--- a/babel.dtx
+++ b/babel.dtx
@@ -31,7 +31,7 @@
 %
 % \iffalse
 %<*filedriver>
-\ProvidesFile{babel.dtx}[2020/11/18 v3.51.2195 The Babel package]
+\ProvidesFile{babel.dtx}[2020/11/20 v3.51.2197 The Babel package]
 \documentclass{ltxdoc}
 \GetFileInfo{babel.dtx}
 \usepackage{fontspec}
@@ -4823,8 +4823,8 @@ help from Bernd Raichle, for which I am grateful.
 % \section{Tools}
 %
 %    \begin{macrocode}
-%<<version=3.51.2195>>
-%<<date=2020/11/18>>
+%<<version=3.51.2197>>
+%<<date=2020/11/20>>
 %    \end{macrocode}
 %
 % \textbf{Do not use the following macros in \texttt{ldf} files. They
@@ -6694,7 +6694,6 @@ help from Bernd Raichle, for which I am grateful.
   \IfFileExists{\CurrentOption.ldf}%
     {\bbl at load@language{\CurrentOption}}%
     {#1\bbl at load@language{#2}#3}}
-% \DeclareOption{afrikaans}{\bbl at try@load at lang{}{dutch}{}}
 \DeclareOption{hebrew}{%
   \input{rlbabel.def}%
   \bbl at load@language{hebrew}}
@@ -13714,7 +13713,6 @@ end
         &% pass
 
       else
-        tex.write_nl(word_string)
         return word_string, word_nodes, item, lang
       end
 
@@ -13740,15 +13738,18 @@ end
         local p = lbkr[lang][k].pattern 
         local r = lbkr[lang][k].replace
 
-        &% For every match
+        &% For every match. 
         while true do
           local matches = { u.match(w, p) }
           if #matches < 2 then break end
 
+          &% Get and remove empty captures (with (), which return a
+          &% number with the position), and keep actual captures
+          &% (from (...)), if any, in matches.
           local first = table.remove(matches, 1)
           local last =  table.remove(matches, #matches)
 
-          &% Fix offsets, from bytes to unicode.
+          &% Fix offsets, from bytes to unicode. Explained above.
           first = u.len(w:sub(1, first-1)) + 1
           last  = u.len(w:sub(1, last-1))
 
@@ -13877,6 +13878,7 @@ end
 
       else
         return word_string, word_nodes, item, lang
+
       end
 
       item = item.next
@@ -13889,7 +13891,7 @@ end
     local lbkr = Babel.linebreaking.pre_replacements
     local word_head = head
 
-    while true do
+    while true do  &% for each subtext block
       local w, wn, nw, lang = Babel.fetch_subtext(word_head)
       if not lang then return head end
 
@@ -13897,16 +13899,17 @@ end
         break
       end
 
-      for k=1, #lbkr[lang] do
+      for k=1, #lbkr[lang] do  &% for each saved posthyphen
         local p = lbkr[lang][k].pattern 
         local r = lbkr[lang][k].replace
         
         while true do
           local matches = { u.match(w, p) }
+          local reparse = true
           if #matches < 2 then break end
 
           local first = table.remove(matches, 1)
-          local last =  table.remove(matches, #matches)
+          local last  = table.remove(matches, #matches)
 
           &% Fix offsets, from bytes to unicode.
           first = u.len(w:sub(1, first-1)) + 1
@@ -13915,34 +13918,48 @@ end
           local new  &% used when inserting and removing nodes
           local changed = 0
 
-          &% This loop traverses the replace list and takes the
-          &% corresponding actions
-          for q = first, last do   
-            local crep = r[q-first+1]
-            local char_node = wn[q]
+          &% This loop traverses the matched substring and takes the
+          &% corresponding action stored in the replacement list.
+          &% sc is the position in substr nodes / string
+          &% rc is the replacement table index
+          sc = first-1
+          rc = 0
+          while rc < last-first+1 do 
+            sc = sc + 1
+            rc = rc + 1
+            local crep = r[rc]
+            local char_node = wn[sc]
             local char_base = char_node
 
             if crep and crep.data then
               char_base = wn[crep.data+first-1]
             end
             
-            if crep == {} then
-              break
-            elseif crep == nil then
+            if crep and next(crep) == nil then &% {}
+              reparse = false
+            elseif crep == nil then &% remove
               changed = changed + 1
               node.remove(head, char_node)
+              table.remove(wn, sc)
+              reparse = false
+              w = u.sub(w, 1, sc-1) .. u.sub(w, sc+1)
+              sc = sc - 1
             elseif crep and crep.insert then
               &% print(crep.insert)
             elseif crep and crep.string then
               changed = changed + 1
               local str = crep.string(matches)
               if str == '' then 
-                if q == 1 then
+                if sc == 1 then
                   word_head = char_node.next
                 end
                 head, new = node.remove(head, char_node)
               elseif char_node.id == 29 and u.len(str) == 1 then
+                &% For one-to-one can we modifiy directly the
+                &% values without re-fetching.
                 char_node.char = string.utfvalue(str)
+                reparse = false
+                w = u.sub(w, 1, sc-1) .. str .. u.sub(w, sc+1)
               else
                 local n
                 for s in string.utfvalues(str) do
@@ -13952,7 +13969,7 @@ end
                     n = node.copy(char_base)
                   end
                   n.char = s
-                  if q == 1 then
+                  if sc == 1 then
                     head, new = node.insert_before(head, char_node, n)   
                     word_head = new
                   else 
@@ -13963,19 +13980,21 @@ end
                 node.remove(head, char_node)
               end  &% string length
             end  &% if char and char.string
-          end  &% for char in match
+          end  &% while char in match
+
           if changed > 20 then
             texio.write('Too many changes. Ignoring the rest.')
           elseif changed > 0 then
-            &% For one-to-one can we modifiy directly the
-            &% values without re-fetching? Very likely.
-            w, wn, nw = Babel.fetch_subtext(word_head)   
+            if reparse then
+              w, wn, nw = Babel.fetch_subtext(word_head) 
+            else
+              reparse = true
+            end
           end
-
         end  &% for match
       end  &% for patterns
       word_head = nw
-    end  &% for words
+    end  &% for subtext
     return head
   end
   &%%% end of preliminary code for \babelprehyphenation
@@ -14046,7 +14065,6 @@ end
            rep = rep:gsub(   '(pre)%s*=%s*([^%s,]*)', Babel.capture_func)
            rep = rep:gsub(  '(post)%s*=%s*([^%s,]*)', Babel.capture_func)
            rep = rep:gsub('(string)%s*=%s*([^%s,]*)', Babel.capture_func)
-           tex.print([[\string\babeltempa{{]] .. rep .. [[}}]])
          }}}&%
     \directlua{
       local lbkr = Babel.linebreaking.post_replacements
diff --git a/babel.ins b/babel.ins
index 766c673..7e7afca 100644
--- a/babel.ins
+++ b/babel.ins
@@ -26,7 +26,7 @@
 %% and covered by LPPL is defined by the unpacking scripts (with
 %% extension .ins) which are part of the distribution.
 %%
-\def\filedate{2020/11/18}
+\def\filedate{2020/11/20}
 \def\batchfile{babel.ins}
 \input docstrip.tex
 
diff --git a/babel.pdf b/babel.pdf
index d92e113..5e272eb 100644
Binary files a/babel.pdf and b/babel.pdf differ
diff --git a/bbcompat.dtx b/bbcompat.dtx
index 05140e9..826e25e 100644
--- a/bbcompat.dtx
+++ b/bbcompat.dtx
@@ -30,7 +30,7 @@
 %
 % \iffalse
 %<*dtx>
-\ProvidesFile{bbcompat.dtx}[2020/11/18 v3.51.2195]
+\ProvidesFile{bbcompat.dtx}[2020/11/20 v3.51.2197]
 %</dtx>
 %
 %% File 'bbcompat.dtx'





More information about the latex3-commits mailing list.