texlive[59229] Master/texmf-dist: babel (16may21)

commits+karl at tug.org commits+karl at tug.org
Sun May 16 22:41:27 CEST 2021


Revision: 59229
          http://tug.org/svn/texlive?view=revision&revision=59229
Author:   karl
Date:     2021-05-16 22:41:27 +0200 (Sun, 16 May 2021)
Log Message:
-----------
babel (16may21)

Modified Paths:
--------------
    trunk/Master/texmf-dist/doc/latex/babel/README.md
    trunk/Master/texmf-dist/doc/latex/babel/babel.pdf
    trunk/Master/texmf-dist/source/latex/babel/babel.dtx
    trunk/Master/texmf-dist/source/latex/babel/babel.ins
    trunk/Master/texmf-dist/source/latex/babel/bbcompat.dtx
    trunk/Master/texmf-dist/source/latex/babel/locale.zip
    trunk/Master/texmf-dist/tex/generic/babel/babel-data-cjk.lua
    trunk/Master/texmf-dist/tex/generic/babel/babel.def
    trunk/Master/texmf-dist/tex/generic/babel/babel.sty
    trunk/Master/texmf-dist/tex/generic/babel/hyphen.cfg
    trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-ar.ini
    trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-arabic.tex
    trunk/Master/texmf-dist/tex/generic/babel/locale/fa/babel-fa.ini
    trunk/Master/texmf-dist/tex/generic/babel/locale/nb/babel-nb.ini
    trunk/Master/texmf-dist/tex/generic/babel/luababel.def
    trunk/Master/texmf-dist/tex/generic/babel/nil.ldf

Modified: trunk/Master/texmf-dist/doc/latex/babel/README.md
===================================================================
--- trunk/Master/texmf-dist/doc/latex/babel/README.md	2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/doc/latex/babel/README.md	2021-05-16 20:41:27 UTC (rev 59229)
@@ -1,4 +1,4 @@
-## Babel 3.58
+## Babel 3.59
 
 This package manages culturally-determined typographical (and other)
 rules, and hyphenation patterns for a wide range of languages. Many
@@ -8,9 +8,9 @@
 
 The latest stable version is available on <https://ctan.org/pkg/babel>.
 
-Changes in version 3.58 are described in:
+Changes in version 3.59 are described in:
 
-https://github.com/latex3/babel/blob/master/news-guides/news/whats-new-in-babel-3.58.md
+https://github.com/latex3/babel/blob/master/news-guides/news/whats-new-in-babel-3.59.md
 
 Apart from the manual, you can find information on some aspects of babel at:
 
@@ -46,30 +46,12 @@
 
 ### Summary of Latest changes
 ```
-3.58   2021-04-26
-       * More predefined transforms (lua):
-         - doubleletter.hyphen: Norsk
-         - oneletter.nobreak:  Czech, Polish, Slovak
-         - hyphen.repeat:      Czech, Polish, Portuguese, Slovak, Spanish
-         - punctuation.space:  Hindi, Sanskrit
-         - transliteration.hk: Sanskrit 
-         - transliteration.gajica: Serbian
-       * 'prehyphenation' transforms are now disabled in verbatim.
-       * New line breaking mode ‘unhyphenated’.
-       * Fix: \shorthandoff*{^} was not revertible (#126).
-       * Experimental code for Arabic justification (with elongated
-         forms: lua).
-         
-3.57   2021-04-07
-       * Predefined transforms (lua):
-         - Arabic:     transliteration.dad
-         - Croatian:   digraphs.ligatures
-         - Greek:      diaeresis.hyphen
-         - Hindi:      transliteration.hk
-         - Hungarian:  digraphs.hyphen
-       * Transforms: {xxxx} syntax also in string=.
-       * Preliminary code for Uyghur hyphenation (lua).
-       * magyar as alternative to hungarian in \babelprovide.
+3.59   2021-05-16
+       * Tentative kashida with user-definable rules (via transforms).
+       * Fixes:
+         - \babel at texpdf inconsistently defined (#130)
+         - Partial fix for #131 (quotation with CJK).
+       * Removed doubleletter.hyphen (Norsk), which serves to nothing.
 ```
 
 ### Previous changes

Modified: trunk/Master/texmf-dist/doc/latex/babel/babel.pdf
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/source/latex/babel/babel.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/babel/babel.dtx	2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/source/latex/babel/babel.dtx	2021-05-16 20:41:27 UTC (rev 59229)
@@ -31,7 +31,7 @@
 %
 % \iffalse
 %<*filedriver>
-\ProvidesFile{babel.dtx}[2021/04/26 v3.58 The Babel package]
+\ProvidesFile{babel.dtx}[2021/05/16 v3.59 The Babel package]
 \documentclass{ltxdoc}
 \GetFileInfo{babel.dtx}
 \usepackage{fontspec}
@@ -2579,6 +2579,18 @@
 Currently used only in Southeast Asian scrips, like Thai. Ignored if 0
 (which is the default value). 
 
+\Describe{justification=}{\texttt{kashida} $\string|$
+\texttt{elongated} $\string|$ \texttt{unhyphenated}}
+\New{3.59} There are currently three options, mainly for the Arabic
+script. It sets the linebreaking and justification method, which can be
+based on the the \textsc{arabic tatweel} character or in the
+‘justification alternatives’ OpenType table (\texttt{jalt}). For an
+explanation see the \href{https://github.com/latex3/babel/blob/master/%
+news-guides/news/whats-new-in-babel-3.59.md}{\babel\ site}.
+
+\Describe{linebreaking=}{}
+\New{3.59} Just a synonymous for \texttt{justification}.
+
 \Describe{mapfont=}{\texttt{direction}}
 Assigns the font for the writing direction of this language (only with
 |bidi=basic|). Whenever possible, instead of this option use |onchar|,
@@ -3080,7 +3092,7 @@
 \trans{Hindi, Sanskrit}{transliteration.hk}{The Harvard-Kyoto system to
 romanize Devanagari.}
 
-\trans{}{punctuation.space}{Inserts a space before the following
+\trans{Hindi, Sanskrit}{punctuation.space}{Inserts a space before the following
 four characters: \textit{!?:;}\,.}
 
 \trans{Hungarian}{digraphs.hyphen}{Hyphenates the long digraphs
@@ -3088,10 +3100,10 @@
 \textit{ssz}, \textit{tty} and \textit{zzs} as \textit{cs-cs},
 \textit{dz-dz}, etc.}
 
-\trans{Norsk}{doubleletter.hyphen}{Hyphenates the doble-letter groups
-\textit{bb}, \textit{dd}, \textit{ff}, \textit{gg}, \textit{ll},
-\textit{mm}, \textit{nn}, \textit{pp}, \textit{rr}, \textit{ss},
-\textit{tt} as \textit{bb-b}, \textit{dd-d}, etc.}
+\trans{Arabic, Persian}{kashida.plain}{Experimental. A very simple and
+basic transform for ‘plain’ Arabic fonts, which attempts to distribute
+the tatwil as evenly as possible (starting at the end of the line). See
+the news for version 3.59.}
 
 \trans{Serbian}{transliteration.gajica}{(Note |serbian| with |ini|
 files refers to the Cyrillic script, which is here the target.) The
@@ -3106,9 +3118,9 @@
 \New{3.37-3.39} \textit{With \luatex{}} it is now possible to define
 non-standard hyphenation rules, like |f-f| $\to$ |ff-f|, repeated
 hyphens, ranked ruled (or more precisely, ‘penalized’ hyphenation
-points), and so on. No rules are currently provided by default, but
-they can be defined as shown in the following example, where |{1}| is
-the first captured char (between |()| in the pattern):
+points), and so on. Only a few rules are currently provided (see
+below), but they can be defined as shown in the following example,
+where |{1}| is the first captured char (between |()| in the pattern):
 \begin{verbatim}
 \babelposthyphenation{german}{([fmtrp]) | {1}}
 {
@@ -3147,10 +3159,8 @@
 is the locale instead of the name of the hyphenation patterns; (2) in the
 search patterns |=| has no special meaning, while \verb+|+ stands for
 an ordinary space; (3) in the replacement, discretionaries are not
-accepted.
+accepted. 
 
-It handles glyphs and spaces.
-
 This feature is activated with the first |\babelposthyphenation| or
 |\babelprehyphenation|.
 
@@ -3188,7 +3198,7 @@
   \babel{} by default recognizes this setting if the font has been
   declared with |\babelfont|. The \textit{transforms} mechanism
   supplements rather than replaces OTF features.
-  
+
   With \xetex{}, where \textit{transforms} are not available, there is
   still another approach, with font mappings, mainly meant to perform
   encoding conversions and transliterations. Mappings, however, are
@@ -3880,12 +3890,11 @@
 Currently this macro understands the following keys (and only for
 \textsf{luatex}), with values |on| or |off|: |bidi.text|,
 |bidi.mirroring|, |bidi.mapdigits|, |layout.lists|, |layout.tabular|,
-|linebreak.sea|, |linebreak.cjk|. For example, you can set
+|linebreak.sea|, |linebreak.cjk|, |justify.arabic|. For example, you can set
 |\babeladjust{bidi.text=off}| if you are using an alternative algorithm
-or with large sections not requiring it. With \textsf{luahbtex} you may
-need |bidi.mirroring=off|. Use with care, because these options do not
-deactivate other related options (like paragraph direction with
-|bidi.text|).
+or with large sections not requiring it. Use with care, because these
+options do not deactivate other related options (like paragraph
+direction with |bidi.text|).
 
 \subsection{Tips, workarounds, known issues and notes}
 
@@ -4958,8 +4967,8 @@
 % \section{Tools}
 %
 %    \begin{macrocode}
-%<<version=3.58>>
-%<<date=2021/04/26>>
+%<<version=3.59>>
+%<<date=2021/05/16>>
 %    \end{macrocode}
 %
 % \textbf{Do not use the following macros in \texttt{ldf} files. They
@@ -7653,9 +7662,13 @@
   \else
     \edef\bbl at tempa{\bbl at cl{lnbrk}}%
   \fi
+  % linebreaking - handle u, e, k (v in the future)
   \bbl at xin@{/u}{/\bbl at tempa}%
+  \ifin@\else\bbl at xin@{/e}{/\bbl at tempa}\fi % elongated forms 
+  \ifin@\else\bbl at xin@{/k}{/\bbl at tempa}\fi % only kashida
+  \ifin@\else\bbl at xin@{/v}{/\bbl at tempa}\fi % variable font
   \ifin@
-    % 'unhyphenated' = allow stretching
+    % unhyphenated/kashida/elongated = allow stretching
     \language\l at unhyphenated
     \babel at savevariable\emergencystretch
     \emergencystretch\maxdimen
@@ -8985,7 +8998,7 @@
 %    \begin{macrocode}
 \def\babel at texpdf#1#2#3#4{%
   \ifx\texorpdfstring\@undefined
-    \textormath{#1}{#2}%
+    \textormath{#1}{#3}%
   \else
     \texorpdfstring{\textormath{#1}{#3}}{#2}%
     % \texorpdfstring{\textormath{#1}{#3}}{\textormath{#2}{#4}}%
@@ -10788,6 +10801,7 @@
   \let\bbl at KVP@language\@nil
   \let\bbl at KVP@hyphenrules\@nil
   \let\bbl at KVP@linebreaking\@nil
+  \let\bbl at KVP@justification\@nil
   \let\bbl at KVP@mapfont\@nil
   \let\bbl at KVP@maparabic\@nil
   \let\bbl at KVP@mapdigits\@nil
@@ -10968,6 +10982,20 @@
     \bbl at csarg\edef{intsp@#2}{\bbl at KVP@intraspace}%
   \fi
   \bbl at provide@intraspace
+  %
+  \ifx\bbl at KVP@justification\@nil\else
+     \let\bbl at KVP@linebreaking\bbl at KVP@justification
+  \fi
+  \ifx\bbl at KVP@linebreaking\@nil\else
+    \bbl at xin@{,\bbl at KVP@linebreaking,}{,elongated,kashida,cjk,unhyphenated,}%
+    \ifin@
+      \bbl at csarg\xdef
+        {lnbrk@\languagename}{\expandafter\@car\bbl at KVP@linebreaking\@nil}%
+    \fi
+  \fi
+  \bbl at xin@{/e}{/\bbl at cl{lnbrk}}%
+  \ifin@\else\bbl at xin@{/k}{/\bbl at cl{lnbrk}}\fi
+  \ifin@\bbl at arabicjust\fi
   % == Line breaking: hyphenate.other.locale/.script==
   \ifx\bbl at lbkflag\@empty
     \bbl at ifunset{bbl at hyotl@\languagename}{}%
@@ -12126,6 +12154,10 @@
   \bbl at adjust@lua{linebreak}{cjk_enabled=true}}
 \@namedef{bbl at ADJ@linebreak.cjk at off}{%
   \bbl at adjust@lua{linebreak}{cjk_enabled=false}}
+\@namedef{bbl at ADJ@justify.arabic at on}{%
+  \bbl at adjust@lua{linebreak}{arabic.justify_enabled=true}}
+\@namedef{bbl at ADJ@justify.arabic at off}{%
+  \bbl at adjust@lua{linebreak}{arabic.justify_enabled=false}}
 %
 \def\bbl at adjust@layout#1{%
   \ifvmode
@@ -13571,7 +13603,23 @@
       end
     end
   }^^
-  \bbl at luahyphenate}
+  \bbl at luahyphenate}  
+%    \end{macrocode}
+%    
+% \subsection{CJK line breaking}
+%
+% Minimal line breaking for CJK scripts, mainly intended for simple
+% documents and short texts as a secundary language. Only line
+% breaking, with a little stretching for justification, without any
+% attempt to adjust the spacing. It is based on (but does not strictly
+% follow) the Unicode algorithm.
+%
+% We first need a little table with the corresponding line breaking
+% properties. A few characters have an additional key for the width
+% (fullwidth \textit{vs.} halfwidth), not yet used. There is a separate
+% file, defined below.
+%
+%    \begin{macrocode}
 \catcode`\%=14
 \gdef\bbl at cjkintraspace{%
   \let\bbl at cjkintraspace\relax
@@ -13696,23 +13744,216 @@
      \fi}}
 %    \end{macrocode}
 %
-% \subsection{CJK line breaking}
+% \subsection{Arabic justification}
 %
-% Minimal line breaking for CJK scripts, mainly intended for simple
-% documents and short texts as a secundary language. Only line
-% breaking, with a little stretching for justification, without any
-% attempt to adjust the spacing. It is based on (but does not strictly
-% follow) the Unicode algorithm.
+%    \begin{macrocode}
+\ifnum\bbl at bidimode>100 \ifnum\bbl at bidimode<200
+\def\bblar at chars{%
+  0628,0629,062A,062B,062C,062D,062E,062F,0630,0631,0632,0633,%
+  0634,0635,0636,0637,0638,0639,063A,063B,063C,063D,063E,063F,%
+  0640,0641,0642,0643,0644,0645,0646,0647,0649}
+\def\bblar at elongated{%
+  0626,0628,062A,062B,0633,0634,0635,0636,063B,%
+  063C,063D,063E,063F,0641,0642,0643,0644,0646,%
+  0649,064A}
+\begingroup
+  \catcode`_=11 \catcode`:=11
+  \gdef\bblar at nofswarn{\gdef\msg_warning:nnx##1##2##3{}}
+\endgroup
+\gdef\bbl at arabicjust{%
+  \let\bbl at arabicjust\relax
+  \newattribute\bblar at kashida
+  \bblar at kashida=\z@
+  \expandafter\bbl at add\csname selectfont \endcsname{{\bbl at parsejalt}}%
+  \directlua{
+    Babel.arabic.elong_map   = Babel.arabic.elong_map or {}
+    Babel.arabic.elong_map[\the\localeid]   = {}
+    luatexbase.add_to_callback('post_linebreak_filter',
+      Babel.arabic.justify, 'Babel.arabic.justify')
+  }}% 
+% Save both node lists to make replacement. TODO. Save also widths to
+% make computations
+\def\bblar at fetchjalt#1#2#3#4{%
+  \bbl at exp{\\\bbl at foreach{#1}}{%
+    \bbl at ifunset{bblar at JE@##1}%
+      {\setbox\z@\hbox{^^^^200d\char"##1#2}}%
+      {\setbox\z@\hbox{^^^^200d\char"\@nameuse{bblar at JE@##1}#2}}%
+    \directlua{%
+      local last = nil
+      for item in node.traverse(tex.box[0].head) do
+        if item.id == node.id'glyph' and item.char > 0x600 and
+            not (item.char == 0x200D) then
+          last = item
+        end
+      end
+      Babel.arabic.#3['##1#4'] = last.char
+    }}}
+% Brute force. No rules at all, yet. The ideal: look at jalt table. And
+% perhaps other tables (falt?, cswh?). What about kaf? And diacritic
+% positioning?
+\gdef\bbl at parsejalt{%
+  \ifx\addfontfeature\@undefined\else
+    \bbl at xin@{/e}{/\bbl at cl{lnbrk}}%
+    \ifin@
+      \directlua{%
+        if Babel.arabic.elong_map[\the\localeid][\fontid\font] == nil then
+          Babel.arabic.elong_map[\the\localeid][\fontid\font] = {}
+          tex.print([[\string\csname\space bbl at parsejalti\endcsname]])
+        end
+      }%
+    \fi
+  \fi}
+\gdef\bbl at parsejalti{%
+  \begingroup
+    \let\bbl at parsejalt\relax     % To avoid infinite loop
+    \edef\bbl at tempb{\fontid\font}%
+    \bblar at nofswarn
+    \bblar at fetchjalt\bblar at elongated{}{from}{}%
+    \bblar at fetchjalt\bblar at chars{^^^^064a}{from}{a}% Alef maksura
+    \bblar at fetchjalt\bblar at chars{^^^^0649}{from}{y}% Yeh
+    \addfontfeature{RawFeature=+jalt}%
+    % \@namedef{bblar at JE@0643}{06AA}% todo: catch medial kaf
+    \bblar at fetchjalt\bblar at elongated{}{dest}{}%
+    \bblar at fetchjalt\bblar at chars{^^^^064a}{dest}{a}%
+    \bblar at fetchjalt\bblar at chars{^^^^0649}{dest}{y}%
+      \directlua{%
+        for k, v in pairs(Babel.arabic.from) do
+          if Babel.arabic.dest[k] and
+              not (Babel.arabic.from[k] == Babel.arabic.dest[k]) then
+            Babel.arabic.elong_map[\the\localeid][\bbl at tempb]
+               [Babel.arabic.from[k]] = Babel.arabic.dest[k]
+          end
+        end
+      }%
+  \endgroup}
 %
-% We first need a little table with the corresponding line breaking
-% properties. A few characters have an additional key for the width
-% (fullwidth \textit{vs.} halfwidth), not yet used. There is a separate
-% file, defined below.
+\begingroup
+\catcode`#=11
+\catcode`~=11
+\directlua{
+
+Babel.arabic = Babel.arabic or {}
+Babel.arabic.from = {}
+Babel.arabic.dest = {}
+Babel.arabic.justify_factor = 0.95
+Babel.arabic.justify_enabled = true
+
+function Babel.arabic.justify(head)
+  if not Babel.arabic.justify_enabled then return head end
+  local d, new
+  local k_list, k_item, pos_inline
+  local width, width_new, full, k_curr, wt_pos, goal
+  local subst_done = false
+  local elong_map = Babel.arabic.elong_map
+  local last_line
+  local GLYPH = node.id'glyph'
+  local KASHIDA = luatexbase.registernumber'bblar at kashida'
+  local LOCALE = luatexbase.registernumber'bbl at attr@locale'
+
+  for line in node.traverse_id(node.id'hlist', head) do
+    % Exclude last line. todo. But-- it discards one-word lines, too!
+    % ? Look for glue = 12:15
+    if (line.glue_sign == 1 and line.glue_order == 0) then
+      elongs = {}     % Stores elongated candidates of each line
+      k_list = {}     % And all letters with kashida
+      pos_inline = 0  % Not yet used
+
+      for n in node.traverse_id(GLYPH, line.head) do
+        pos_inline = pos_inline + 1 % To find where it is. Not used.
+
+        % Elongated glyphs
+        if elong_map then
+          local locale = node.get_attribute(n, LOCALE)
+          if elong_map[locale] and elong_map[locale][n.font] and 
+              elong_map[locale][n.font][n.char] then
+            table.insert(elongs, {node = n, locale = locale} )
+            node.set_attribute(n.prev, KASHIDA, 0)
+          end
+        end
+
+        % Tatwil
+        if Babel.kashida_wts then
+          local k_wt = node.get_attribute(n, KASHIDA)
+          if k_wt > 0 then % todo. parameter for multi inserts
+            table.insert(k_list, {node = n, weight = k_wt, pos = pos_inline})
+          end
+        end
+
+      end % of node.traverse_id
+
+      if #elongs == 0 and #k_list == 0 then goto next_line end
+
+      full = line.width
+      goal = full * Babel.arabic.justify_factor % A bit crude
+      width = node.dimensions(line.head)    % The 'natural' width
+
+      % == Elongated ==
+      % Original idea taken from 'chikenize'
+      while (#elongs > 0 and width < goal) do
+        subst_done = true
+        local x = #elongs
+        local curr = elongs[x].node
+        local oldchar = curr.char
+        curr.char = elong_map[elongs[x].locale][curr.font][curr.char]
+        width = node.dimensions(line.head)  % Check if the line is too wide
+        % Substitute back if the line would be too wide and break:
+        if width > goal then
+          curr.char = oldchar
+          break
+        end 
+        % If continue, pop the just substituted node from the list:
+        table.remove(elongs, x)
+      end
+
+      % == Tatwil ==
+      if #k_list == 0 then goto next_line end
+
+      width = node.dimensions(line.head)    % The 'natural' width
+      k_curr = #k_list
+      wt_pos = 1
+
+      while width < goal do
+        subst_done = true
+        k_item = k_list[k_curr].node
+        if k_list[k_curr].weight == Babel.kashida_wts[wt_pos] then       
+          d = node.copy(k_item)
+          d.char = 0x0640
+          line.head, new = node.insert_after(line.head, k_item, d)
+          width_new = node.dimensions(line.head)
+          if width > goal or width == width_new then
+            node.remove(line.head, new) % Better compute before
+            break
+          end
+          width = width_new
+        end
+        if k_curr == 1 then
+          k_curr = #k_list
+          wt_pos = (wt_pos >= table.getn(Babel.kashida_wts)) and 1 or wt_pos+1   
+        else
+          k_curr = k_curr - 1
+        end
+      end
+
+      ::next_line::
+
+      % Must take into account marks and ins, see luatex manual.
+      % Have to be executed only if there are changes. Investigate
+      % what's going on exactly.
+      if subst_done then
+        node.insert_before(head, line, node.hpack(line.head, full, 'exactly'))
+        node.remove(head, line)
+      end
+    end % if process line
+  end % for lines
+  return head
+end
+}
+\endgroup
+\fi\fi % Arabic just block
+%    \end{macrocode}
 %
-% \textit{Work in progress.} 
+% \subsection{Common stuff}
 %
-% Common stuff.
-%
 %    \begin{macrocode}
 \AddBabelHook{babel-fontspec}{afterextras}{\bbl at switchfont}
 \AddBabelHook{babel-fontspec}{beforestart}{\bbl at ckeckstdfonts}
@@ -13951,11 +14192,11 @@
   end
 
   Babel.fetch_subtext = {}
-  
+
   Babel.ignore_pre_char = function(node)
     return (node.lang == \the\l at nohyphenation)
   end
-  
+
   &% Merging both functions doesn't seen feasible, because there are too
   &% many differences.
   Babel.fetch_subtext[0] = function(head)
@@ -14136,6 +14377,7 @@
         &% after the match, either as found by u.match (faster) or the
         &% computed position based on sc if w has changed.
         local last_match = 0
+        local step = 0
 
         &% For every match.
         while true do
@@ -14143,9 +14385,9 @@
             print('=====')
           end
           local new  &% used when inserting and removing nodes
-          local refetch = false
 
           local matches = { u.match(w, p, last_match) }
+          
           if #matches < 2 then break end
 
           &% Get and remove empty captures (with ()'s, which return a
@@ -14210,6 +14452,10 @@
             if crep and crep.data then
               item_base = data_nodes[crep.data]
             end
+            
+            if crep then
+              step = crep.step or 0
+            end
 
             if crep and next(crep) == nil then &% = {}
               last_match = save_last    &% Optimization
@@ -14220,9 +14466,16 @@
               table.remove(w_nodes, sc)
               w = u.sub(w, 1, sc-1) .. u.sub(w, sc+1)
               sc = sc - 1  &% Nothing has been inserted.
-              last_match = utf8.offset(w, sc+1)
+              last_match = utf8.offset(w, sc+1+step)
               goto next
 
+            elseif crep and crep.kashida then &% Experimental
+              node.set_attribute(item,
+                 luatexbase.registernumber'bblar at kashida', 
+                 crep.kashida)
+              last_match = utf8.offset(w, sc+1+step)
+              goto next
+
             elseif crep and crep.string then
               local str = crep.string(matches)
               if str == '' then  &% Gather with nil
@@ -14256,7 +14509,7 @@
                 end  &% for
                 node.remove(head, item)
               end  &% if ''
-              last_match = utf8.offset(w, sc+1)
+              last_match = utf8.offset(w, sc+1+step)
               goto next
 
             elseif mode == 1 and crep and (crep.pre or crep.no or crep.post) then
@@ -14311,7 +14564,7 @@
 
             end  &% ie replacement cases
 
-            &% Shared by disc, space and penalty. 
+            &% Shared by disc, space and penalty.
             if sc == 1 then
               word_head = head
             end
@@ -14325,7 +14578,7 @@
               w = u.sub(w, 1, sc-1) .. placeholder .. u.sub(w, sc+1)
             end
 
-            last_match = utf8.offset(w, sc+1)
+            last_match = utf8.offset(w, sc+1+step)
 
             ::next::
 
@@ -14395,6 +14648,26 @@
     return "]]..Babel.capt_map(m[" .. capno .. "]," ..
            (mlen) .. ").." .. "[["
   end
+  
+  &% Create/Extend reversed sorted list of kashida weights:
+  function Babel.capture_kashida(key, wt)
+    wt = tonumber(wt)
+    if Babel.kashida_wts then
+      for p, q in ipairs(Babel.kashida_wts) do
+        if wt  == q then
+          break
+        elseif wt > q then
+          table.insert(Babel.kashida_wts, p, wt)
+          break
+        elseif table.getn(Babel.kashida_wts) == p then
+          table.insert(Babel.kashida_wts, wt)
+        end
+      end
+    else
+      Babel.kashida_wts = { wt }
+    end
+    return 'kashida = ' .. wt
+  end
 }
 %    \end{macrocode}
 %
@@ -14473,10 +14746,11 @@
            rep = rep:gsub('^%s*(remove)%s*$', 'remove = true')
            rep = rep:gsub('^%s*(insert)%s*,', 'insert = true, ')
            rep = rep:gsub('(string)%s*=%s*([^%s,]*)', Babel.capture_func)
-           rep = rep:gsub( '(space)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
+           rep = rep:gsub('(space)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
              'space = {' .. '%2, %3, %4' .. '}')
-           rep = rep:gsub( '(spacefactor)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
+           rep = rep:gsub('(spacefactor)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
              'spacefactor = {' .. '%2, %3, %4' .. '}')
+           rep = rep:gsub('(kashida)%s*=%s*([^%s,]*)', Babel.capture_kashida)
            tex.print([[\string\babeltempa{{]] .. rep .. [[}}]])
          }}}&%
     \directlua{
@@ -21562,8 +21836,10 @@
 
 Babel.cjk_characters = {
   [0x0021]={c='ex'},
+  [0x0022]={c='qu'},          
   [0x0024]={c='pr'},
   [0x0025]={c='po'},
+  [0x0027]={c='qu'},          
   [0x0028]={c='op'},
   [0x0029]={c='cp'},
   [0x002B]={c='pr'},
@@ -21584,10 +21860,18 @@
   [0x00A3]={c='pr'},
   [0x00A4]={c='pr'},
   [0x00A5]={c='pr'},
+  [0x00AB]={c='qu'},          
   [0x00B0]={c='po'},
   [0x00B1]={c='pr'},
+  [0x00BB]={c='qu'},          
+  [0x2018]={c='qu'},          
+  [0x2019]={c='qu'},          
   [0x201A]={c='op'},
+  [0x201B]={c='qu'},    
+  [0x201C]={c='qu'},    
+  [0x201D]={c='qu'},          
   [0x201E]={c='op'},
+  [0x201F]={c='qu'},          
   [0x2024]={c='in'},
   [0x2025]={c='in'},
   [0x2026]={c='in'},
@@ -21599,6 +21883,8 @@
   [0x2035]={c='po'},
   [0x2036]={c='po'},
   [0x2037]={c='po'},
+  [0x2039]={c='qu'},          
+  [0x203A]={c='qu'},            
   [0x203C]={c='ns'},
   [0x203D]={c='ns'},
   [0x2044]={c='is'},
@@ -21657,7 +21943,23 @@
   [0x29FD]={c='cl'},
   [0x2CF9]={c='ex'},
   [0x2CFE]={c='ex'},
+  [0x2E02]={c='qu'},          
+  [0x2E03]={c='qu'},          
+  [0x2E04]={c='qu'},          
+  [0x2E05]={c='qu'},          
+  [0x2E06]={c='qu'},    
+  [0x2E07]={c='qu'},    
+  [0x2E08]={c='qu'},    
+  [0x2E09]={c='qu'},          
+  [0x2E0A]={c='qu'},          
+  [0x2E0B]={c='qu'},          
+  [0x2E0C]={c='qu'},          
+  [0x2E0D]={c='qu'},          
   [0x2E18]={c='op'},
+  [0x2E1C]={c='qu'},          
+  [0x2E1D]={c='qu'},          
+  [0x2E20]={c='qu'},          
+  [0x2E21]={c='qu'}, 
   [0x2E22]={c='op'},
   [0x2E23]={c='cl'},
   [0x2E24]={c='op'},
@@ -21810,6 +22112,7 @@
   ['po'] = { ['op']=1, ['pr']=1, ['po']=1, ['in']=1, ['I']=1 },
   ['in'] = { ['op']=1, ['pr']=1, ['po']=1,           ['I']=1 },
   ['hy'] = { ['op']=1, ['pr']=1, ['po']=1, ['in']=1, ['I']=1 },
+  ['qu'] = { },
   --
   ['I']  = { ['op']=1, ['pr']=1, ['I']=1, ['O']=1 },
   ['O']  = {                     ['I']=1 }

Modified: trunk/Master/texmf-dist/source/latex/babel/babel.ins
===================================================================
--- trunk/Master/texmf-dist/source/latex/babel/babel.ins	2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/source/latex/babel/babel.ins	2021-05-16 20:41:27 UTC (rev 59229)
@@ -26,7 +26,7 @@
 %% and covered by LPPL is defined by the unpacking scripts (with
 %% extension .ins) which are part of the distribution.
 %%
-\def\filedate{2021/04/26}
+\def\filedate{2021/05/16}
 \def\batchfile{babel.ins}
 \input docstrip.tex
 

Modified: trunk/Master/texmf-dist/source/latex/babel/bbcompat.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/babel/bbcompat.dtx	2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/source/latex/babel/bbcompat.dtx	2021-05-16 20:41:27 UTC (rev 59229)
@@ -30,7 +30,7 @@
 %
 % \iffalse
 %<*dtx>
-\ProvidesFile{bbcompat.dtx}[2021/04/26 v3.58]
+\ProvidesFile{bbcompat.dtx}[2021/05/16 v3.59]
 %</dtx>
 %
 %% File 'bbcompat.dtx'

Modified: trunk/Master/texmf-dist/source/latex/babel/locale.zip
===================================================================
(Binary files differ)

Modified: trunk/Master/texmf-dist/tex/generic/babel/babel-data-cjk.lua
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/babel-data-cjk.lua	2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/babel-data-cjk.lua	2021-05-16 20:41:27 UTC (rev 59229)
@@ -36,8 +36,10 @@
 
 Babel.cjk_characters = {
   [0x0021]={c='ex'},
+  [0x0022]={c='qu'},
   [0x0024]={c='pr'},
   [0x0025]={c='po'},
+  [0x0027]={c='qu'},
   [0x0028]={c='op'},
   [0x0029]={c='cp'},
   [0x002B]={c='pr'},
@@ -58,10 +60,18 @@
   [0x00A3]={c='pr'},
   [0x00A4]={c='pr'},
   [0x00A5]={c='pr'},
+  [0x00AB]={c='qu'},
   [0x00B0]={c='po'},
   [0x00B1]={c='pr'},
+  [0x00BB]={c='qu'},
+  [0x2018]={c='qu'},
+  [0x2019]={c='qu'},
   [0x201A]={c='op'},
+  [0x201B]={c='qu'},
+  [0x201C]={c='qu'},
+  [0x201D]={c='qu'},
   [0x201E]={c='op'},
+  [0x201F]={c='qu'},
   [0x2024]={c='in'},
   [0x2025]={c='in'},
   [0x2026]={c='in'},
@@ -73,6 +83,8 @@
   [0x2035]={c='po'},
   [0x2036]={c='po'},
   [0x2037]={c='po'},
+  [0x2039]={c='qu'},
+  [0x203A]={c='qu'},
   [0x203C]={c='ns'},
   [0x203D]={c='ns'},
   [0x2044]={c='is'},
@@ -131,7 +143,23 @@
   [0x29FD]={c='cl'},
   [0x2CF9]={c='ex'},
   [0x2CFE]={c='ex'},
+  [0x2E02]={c='qu'},
+  [0x2E03]={c='qu'},
+  [0x2E04]={c='qu'},
+  [0x2E05]={c='qu'},
+  [0x2E06]={c='qu'},
+  [0x2E07]={c='qu'},
+  [0x2E08]={c='qu'},
+  [0x2E09]={c='qu'},
+  [0x2E0A]={c='qu'},
+  [0x2E0B]={c='qu'},
+  [0x2E0C]={c='qu'},
+  [0x2E0D]={c='qu'},
   [0x2E18]={c='op'},
+  [0x2E1C]={c='qu'},
+  [0x2E1D]={c='qu'},
+  [0x2E20]={c='qu'},
+  [0x2E21]={c='qu'},
   [0x2E22]={c='op'},
   [0x2E23]={c='cl'},
   [0x2E24]={c='op'},
@@ -284,6 +312,7 @@
   ['po'] = { ['op']=1, ['pr']=1, ['po']=1, ['in']=1, ['I']=1 },
   ['in'] = { ['op']=1, ['pr']=1, ['po']=1,           ['I']=1 },
   ['hy'] = { ['op']=1, ['pr']=1, ['po']=1, ['in']=1, ['I']=1 },
+  ['qu'] = { },
   --
   ['I']  = { ['op']=1, ['pr']=1, ['I']=1, ['O']=1 },
   ['O']  = {                     ['I']=1 }

Modified: trunk/Master/texmf-dist/tex/generic/babel/babel.def
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/babel.def	2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/babel.def	2021-05-16 20:41:27 UTC (rev 59229)
@@ -39,7 +39,7 @@
     \wlog{File: #1 #4 #3 <#2>}%
     \let\ProvidesFile\@undefined}
 \fi
-\ProvidesFile{babel.def}[2021/04/26 3.58 Babel common definitions]
+\ProvidesFile{babel.def}[2021/05/16 3.59 Babel common definitions]
 \ifx\AtBeginDocument\@undefined  % TODO. change test.
     % == Code for plain ==
 \def\@empty{}
@@ -397,8 +397,8 @@
 \fi
 \countdef\last at language=19  % TODO. why? remove?
 \def\addlanguage{\csname newlanguage\endcsname}
-\def\bbl at version{3.58}
-\def\bbl at date{2021/04/26}
+\def\bbl at version{3.59}
+\def\bbl at date{2021/05/16}
 \def\adddialect#1#2{%
   \global\chardef#1#2\relax
   \bbl at usehooks{adddialect}{{#1}{#2}}%
@@ -694,9 +694,13 @@
   \else
     \edef\bbl at tempa{\bbl at cl{lnbrk}}%
   \fi
+  % linebreaking - handle u, e, k (v in the future)
   \bbl at xin@{/u}{/\bbl at tempa}%
+  \ifin@\else\bbl at xin@{/e}{/\bbl at tempa}\fi % elongated forms
+  \ifin@\else\bbl at xin@{/k}{/\bbl at tempa}\fi % only kashida
+  \ifin@\else\bbl at xin@{/v}{/\bbl at tempa}\fi % variable font
   \ifin@
-    % 'unhyphenated' = allow stretching
+    % unhyphenated/kashida/elongated = allow stretching
     \language\l at unhyphenated
     \babel at savevariable\emergencystretch
     \emergencystretch\maxdimen
@@ -1483,7 +1487,7 @@
 
 \def\babel at texpdf#1#2#3#4{%
   \ifx\texorpdfstring\@undefined
-    \textormath{#1}{#2}%
+    \textormath{#1}{#3}%
   \else
     \texorpdfstring{\textormath{#1}{#3}}{#2}%
     % \texorpdfstring{\textormath{#1}{#3}}{\textormath{#2}{#4}}%
@@ -2357,6 +2361,7 @@
   \let\bbl at KVP@language\@nil
   \let\bbl at KVP@hyphenrules\@nil
   \let\bbl at KVP@linebreaking\@nil
+  \let\bbl at KVP@justification\@nil
   \let\bbl at KVP@mapfont\@nil
   \let\bbl at KVP@maparabic\@nil
   \let\bbl at KVP@mapdigits\@nil
@@ -2537,6 +2542,20 @@
     \bbl at csarg\edef{intsp@#2}{\bbl at KVP@intraspace}%
   \fi
   \bbl at provide@intraspace
+  %
+  \ifx\bbl at KVP@justification\@nil\else
+     \let\bbl at KVP@linebreaking\bbl at KVP@justification
+  \fi
+  \ifx\bbl at KVP@linebreaking\@nil\else
+    \bbl at xin@{,\bbl at KVP@linebreaking,}{,elongated,kashida,cjk,unhyphenated,}%
+    \ifin@
+      \bbl at csarg\xdef
+        {lnbrk@\languagename}{\expandafter\@car\bbl at KVP@linebreaking\@nil}%
+    \fi
+  \fi
+  \bbl at xin@{/e}{/\bbl at cl{lnbrk}}%
+  \ifin@\else\bbl at xin@{/k}{/\bbl at cl{lnbrk}}\fi
+  \ifin@\bbl at arabicjust\fi
   % == Line breaking: hyphenate.other.locale/.script==
   \ifx\bbl at lbkflag\@empty
     \bbl at ifunset{bbl at hyotl@\languagename}{}%
@@ -3478,6 +3497,10 @@
   \bbl at adjust@lua{linebreak}{cjk_enabled=true}}
 \@namedef{bbl at ADJ@linebreak.cjk at off}{%
   \bbl at adjust@lua{linebreak}{cjk_enabled=false}}
+\@namedef{bbl at ADJ@justify.arabic at on}{%
+  \bbl at adjust@lua{linebreak}{arabic.justify_enabled=true}}
+\@namedef{bbl at ADJ@justify.arabic at off}{%
+  \bbl at adjust@lua{linebreak}{arabic.justify_enabled=false}}
 \def\bbl at adjust@layout#1{%
   \ifvmode
     #1%

Modified: trunk/Master/texmf-dist/tex/generic/babel/babel.sty
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/babel.sty	2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/babel.sty	2021-05-16 20:41:27 UTC (rev 59229)
@@ -33,7 +33,7 @@
 %%
 
 \NeedsTeXFormat{LaTeX2e}[2005/12/01]
-\ProvidesPackage{babel}[2021/04/26 3.58 The Babel package]
+\ProvidesPackage{babel}[2021/05/16 3.59 The Babel package]
 \@ifpackagewith{babel}{debug}
   {\providecommand\bbl at trace[1]{\message{^^J[ #1 ]}}%
    \let\bbl at debug\@firstofone

Modified: trunk/Master/texmf-dist/tex/generic/babel/hyphen.cfg
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/hyphen.cfg	2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/hyphen.cfg	2021-05-16 20:41:27 UTC (rev 59229)
@@ -37,10 +37,10 @@
     \wlog{File: #1 #4 #3 <#2>}%
     \let\ProvidesFile\@undefined}
 \fi
-\ProvidesFile{hyphen.cfg}[2021/04/26 3.58 Babel hyphens]
+\ProvidesFile{hyphen.cfg}[2021/05/16 3.59 Babel hyphens]
 \xdef\bbl at format{\jobname}
-\def\bbl at version{3.58}
-\def\bbl at date{2021/04/26}
+\def\bbl at version{3.59}
+\def\bbl at date{2021/05/16}
 \ifx\AtBeginDocument\@undefined
   \def\@empty{}
   \let\orig at dump\dump

Modified: trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-ar.ini
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-ar.ini	2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-ar.ini	2021-05-16 20:41:27 UTC (rev 59229)
@@ -11,8 +11,8 @@
 
 [identification]
 charset = utf8
-version = 1.9
-date = 2021-04-24
+version = 1.10
+date = 2021-05-16
 name.local = العربية
 name.english = Arabic
 name.babel = arabic
@@ -207,5 +207,10 @@
 ; 1-letter
 transliteration.dad.8.0 = { ([{007C}AbtjHxdrzsSDTZ`RfqklmnhUIYaui+opCvgJe]) }
 transliteration.dad.8.1 =   { string = {1|{007C}AbtjHxdrzsSDTZ`RfqklmnhUIYaui+opCvgJe|ءابتجحخدرزسصضطظعغفقكلمنهوىيَُِّْپچڤگژۀ} }
+; 
+kashida.plain.1.0 = { ()[يئهشسقفغعضصنمكلظطخحجثتب]()[يئهشسقفغعضصنمكلظطخحجثتباأإآوؤذدزرة] }
+kashida.plain.1.1 = { kashida = 500 }
+kashida.plain.2.0 = { ()ل()[اأإآ] }
+kashida.plain.2.1 = { kashida = 0 }
 
 

Modified: trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-arabic.tex
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-arabic.tex	2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-arabic.tex	2021-05-16 20:41:27 UTC (rev 59229)
@@ -10,87 +10,4 @@
 \BabelBeforeIni{ar}{%
 }
 
-\ifcase\bbl at engine\or
-
-\directlua{
-
-Babel.ar_tolong = {}
-
-function Babel.ar_justify(head)
-  local substlist = Babel.ar_tolong
-  local GLYPH = node.id'glyph'
-  local substs, width, goal
-  local subst_done = true % false
-  math.randomseed(1)
-  for line in node.traverse_id(node.id'hlist', head) do
-    if (line.glue_sign == 1 and line.glue_order == 0) then % exclude last line!
-      substs = {} % we store all “expandable” letters of each line
-      for n in node.traverse_id(GLYPH, line.head) do
-        if (substlist[n.char]) then
-          table.insert(substs, n)
-        end
-      end
-      line.glue_set = 0   % deactivate normal glue expansion
-      width = node.dimensions(line.head)    % check the new width
-      goal = line.width
-      
-      while (width < goal and \string#substs > 0) do
-        x = math.random(\string#substs)     % choose randomly a glyph
-        oldchar = substs[x].char
-        substs[x].char = substlist[substs[x].char]
-        subst_done = true
-        width = node.dimensions(line.head)  % check if the line is too wide
-        % substitute back if the line would be too wide and break:
-        if width > goal then substs[x].char = oldchar break end 
-        % if further substitutions have to be done, remove the just
-        % substituted node from the list:
-        table.remove(substs,x)
-      end
-      % Must take into account marks and ins, see luatex manual.
-      % Have to be executed only if there are changes.
-      if subst_done then
-        line.head = node.hpack(line.head, goal, 'exactly')
-      end
-    end
-  end
-  return head
-end
-
-}
-
-\gdef\ArabicSetupJust{%
-  \directlua{
-    Babel.ar_tolong   = {}
-    luatexbase.add_to_callback('post_linebreak_filter',
-      Babel.ar_justify, 'Babel.ar_justify')
-  }% 
-  % It must be done for each font, and stored separately.
-  % Locale must be taken into account too. Brute force.
-  % No rules at all, yet. The ideal: look at jalt table.
-  % And perhaps other tables (falt?, cswh?). What about kaf?
-  \begingroup
-    \bbl at foreach{%   
-        0628,0629,062A,062B,062C,062D,062E,062F,0630,0631,0632,0633,%
-        0634,0635,0636,0637,0638,0639,063A,063B,063C,063D,063E,063F,%
-        0640,0641,0642,0643,0644,0645,0646,0647,0649}{%
-      \setbox\z@\hbox{%  Only final, for the moment
-        ^^^^200d\char"##1=%
-        \addfontfeature{RawFeature=+jalt}%
-        ^^^^200d\char"##1}%
-      \directlua{
-        local chars = {}
-        for item in node.traverse(tex.box[0].head) do
-          if item.id == node.id'glyph' and item.char > 128 and
-              not (item.char == 0x200D) then
-            table.insert(chars, item.char)
-          end
-        end
-        if not (chars[1] == chars[2]) then
-          Babel.ar_tolong[chars[1]] = chars[2]
-        end
-      }}%
-  \endgroup}
-  
-\fi
-
-\endinput
\ No newline at end of file
+\endinput

Modified: trunk/Master/texmf-dist/tex/generic/babel/locale/fa/babel-fa.ini
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/locale/fa/babel-fa.ini	2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/locale/fa/babel-fa.ini	2021-05-16 20:41:27 UTC (rev 59229)
@@ -11,8 +11,8 @@
 
 [identification]
 charset = utf8
-version = 1.7
-date = 2020-10-11
+version = 1.8
+date = 2021-05-16
 name.local = فارسی
 name.english = Persian
 name.babel = persian
@@ -177,3 +177,9 @@
 abjad = ا ب ج د ه‍ و ز ح ط ی ک ل م ن س ع ف ص ق ر ش ت ث خ ذ ض ظ غ
 alphabetic = ا ب پ ت ث ج چ ح خ د ذ ر ز ژ س ش ص ض ط ظ ع غ ف ق ک گ ل م ن و ه‍ ی 
 
+[transforms.prehyphenation]
+kashida.plain.1.0 = { ()[يئهشسقفغعضصنمكلظطخحچجثتپب]()[يئهشسقفغعضصنمكلظطخحچجثتپباأإآوؤذدژزرة] }
+kashida.plain.1.1 = { kashida = 500 }
+kashida.plain.2.0 = { ()ل()[اأإآ] }
+kashida.plain.2.1 = { kashida = 0 }
+

Modified: trunk/Master/texmf-dist/tex/generic/babel/locale/nb/babel-nb.ini
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/locale/nb/babel-nb.ini	2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/locale/nb/babel-nb.ini	2021-05-16 20:41:27 UTC (rev 59229)
@@ -11,8 +11,8 @@
 
 [identification]
 charset = utf8
-version = 1.4
-date = 2021-04-24
+version = 1.5
+date = 2021-05-16
 name.local = norsk bokmål
 name.english = Norwegian Bokmål
 name.babel = norsk norwegianbokmal
@@ -186,8 +186,4 @@
 
 [counters]
 
-[transforms.posthyphenation]
-doubleletter.hyphen.1.0 = {()([BDFGLMNPRSTbdfglmnprst])|(){1}}
-doubleletter.hyphen.1.1 = { no = {1}, pre = {1}{1}- }
-doubleletter.hyphen.1.2 = remove
 

Modified: trunk/Master/texmf-dist/tex/generic/babel/luababel.def
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/luababel.def	2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/luababel.def	2021-05-16 20:41:27 UTC (rev 59229)
@@ -534,6 +534,203 @@
      \ifx\bbl at KVP@intrapenalty\@nil\else
        \expandafter\bbl at intrapenalty\bbl at KVP@intrapenalty\@@
      \fi}}
+\ifnum\bbl at bidimode>100 \ifnum\bbl at bidimode<200
+\def\bblar at chars{%
+  0628,0629,062A,062B,062C,062D,062E,062F,0630,0631,0632,0633,%
+  0634,0635,0636,0637,0638,0639,063A,063B,063C,063D,063E,063F,%
+  0640,0641,0642,0643,0644,0645,0646,0647,0649}
+\def\bblar at elongated{%
+  0626,0628,062A,062B,0633,0634,0635,0636,063B,%
+  063C,063D,063E,063F,0641,0642,0643,0644,0646,%
+  0649,064A}
+\begingroup
+  \catcode`_=11 \catcode`:=11
+  \gdef\bblar at nofswarn{\gdef\msg_warning:nnx##1##2##3{}}
+\endgroup
+\gdef\bbl at arabicjust{%
+  \let\bbl at arabicjust\relax
+  \newattribute\bblar at kashida
+  \bblar at kashida=\z@
+  \expandafter\bbl at add\csname selectfont \endcsname{{\bbl at parsejalt}}%
+  \directlua{
+    Babel.arabic.elong_map   = Babel.arabic.elong_map or {}
+    Babel.arabic.elong_map[\the\localeid]   = {}
+    luatexbase.add_to_callback('post_linebreak_filter',
+      Babel.arabic.justify, 'Babel.arabic.justify')
+  }}%
+\def\bblar at fetchjalt#1#2#3#4{%
+  \bbl at exp{\\\bbl at foreach{#1}}{%
+    \bbl at ifunset{bblar at JE@##1}%
+      {\setbox\z@\hbox{^^^^200d\char"##1#2}}%
+      {\setbox\z@\hbox{^^^^200d\char"\@nameuse{bblar at JE@##1}#2}}%
+    \directlua{%
+      local last = nil
+      for item in node.traverse(tex.box[0].head) do
+        if item.id == node.id'glyph' and item.char > 0x600 and
+            not (item.char == 0x200D) then
+          last = item
+        end
+      end
+      Babel.arabic.#3['##1#4'] = last.char
+    }}}
+\gdef\bbl at parsejalt{%
+  \ifx\addfontfeature\@undefined\else
+    \bbl at xin@{/e}{/\bbl at cl{lnbrk}}%
+    \ifin@
+      \directlua{%
+        if Babel.arabic.elong_map[\the\localeid][\fontid\font] == nil then
+          Babel.arabic.elong_map[\the\localeid][\fontid\font] = {}
+          tex.print([[\string\csname\space bbl at parsejalti\endcsname]])
+        end
+      }%
+    \fi
+  \fi}
+\gdef\bbl at parsejalti{%
+  \begingroup
+    \let\bbl at parsejalt\relax     % To avoid infinite loop
+    \edef\bbl at tempb{\fontid\font}%
+    \bblar at nofswarn
+    \bblar at fetchjalt\bblar at elongated{}{from}{}%
+    \bblar at fetchjalt\bblar at chars{^^^^064a}{from}{a}% Alef maksura
+    \bblar at fetchjalt\bblar at chars{^^^^0649}{from}{y}% Yeh
+    \addfontfeature{RawFeature=+jalt}%
+    % \@namedef{bblar at JE@0643}{06AA}% todo: catch medial kaf
+    \bblar at fetchjalt\bblar at elongated{}{dest}{}%
+    \bblar at fetchjalt\bblar at chars{^^^^064a}{dest}{a}%
+    \bblar at fetchjalt\bblar at chars{^^^^0649}{dest}{y}%
+      \directlua{%
+        for k, v in pairs(Babel.arabic.from) do
+          if Babel.arabic.dest[k] and
+              not (Babel.arabic.from[k] == Babel.arabic.dest[k]) then
+            Babel.arabic.elong_map[\the\localeid][\bbl at tempb]
+               [Babel.arabic.from[k]] = Babel.arabic.dest[k]
+          end
+        end
+      }%
+  \endgroup}
+\begingroup
+\catcode`#=11
+\catcode`~=11
+\directlua{
+
+Babel.arabic = Babel.arabic or {}
+Babel.arabic.from = {}
+Babel.arabic.dest = {}
+Babel.arabic.justify_factor = 0.95
+Babel.arabic.justify_enabled = true
+
+function Babel.arabic.justify(head)
+  if not Babel.arabic.justify_enabled then return head end
+  local d, new
+  local k_list, k_item, pos_inline
+  local width, width_new, full, k_curr, wt_pos, goal
+  local subst_done = false
+  local elong_map = Babel.arabic.elong_map
+  local last_line
+  local GLYPH = node.id'glyph'
+  local KASHIDA = luatexbase.registernumber'bblar at kashida'
+  local LOCALE = luatexbase.registernumber'bbl at attr@locale'
+
+  for line in node.traverse_id(node.id'hlist', head) do
+    % Exclude last line. todo. But-- it discards one-word lines, too!
+    % ? Look for glue = 12:15
+    if (line.glue_sign == 1 and line.glue_order == 0) then
+      elongs = {}     % Stores elongated candidates of each line
+      k_list = {}     % And all letters with kashida
+      pos_inline = 0  % Not yet used
+
+      for n in node.traverse_id(GLYPH, line.head) do
+        pos_inline = pos_inline + 1 % To find where it is. Not used.
+
+        % Elongated glyphs
+        if elong_map then
+          local locale = node.get_attribute(n, LOCALE)
+          if elong_map[locale] and elong_map[locale][n.font] and
+              elong_map[locale][n.font][n.char] then
+            table.insert(elongs, {node = n, locale = locale} )
+            node.set_attribute(n.prev, KASHIDA, 0)
+          end
+        end
+
+        % Tatwil
+        if Babel.kashida_wts then
+          local k_wt = node.get_attribute(n, KASHIDA)
+          if k_wt > 0 then % todo. parameter for multi inserts
+            table.insert(k_list, {node = n, weight = k_wt, pos = pos_inline})
+          end
+        end
+
+      end % of node.traverse_id
+
+      if #elongs == 0 and #k_list == 0 then goto next_line end
+
+      full = line.width
+      goal = full * Babel.arabic.justify_factor % A bit crude
+      width = node.dimensions(line.head)    % The 'natural' width
+
+      % == Elongated ==
+      % Original idea taken from 'chikenize'
+      while (#elongs > 0 and width < goal) do
+        subst_done = true
+        local x = #elongs
+        local curr = elongs[x].node
+        local oldchar = curr.char
+        curr.char = elong_map[elongs[x].locale][curr.font][curr.char]
+        width = node.dimensions(line.head)  % Check if the line is too wide
+        % Substitute back if the line would be too wide and break:
+        if width > goal then
+          curr.char = oldchar
+          break
+        end
+        % If continue, pop the just substituted node from the list:
+        table.remove(elongs, x)
+      end
+
+      % == Tatwil ==
+      if #k_list == 0 then goto next_line end
+
+      width = node.dimensions(line.head)    % The 'natural' width
+      k_curr = #k_list
+      wt_pos = 1
+
+      while width < goal do
+        subst_done = true
+        k_item = k_list[k_curr].node
+        if k_list[k_curr].weight == Babel.kashida_wts[wt_pos] then
+          d = node.copy(k_item)
+          d.char = 0x0640
+          line.head, new = node.insert_after(line.head, k_item, d)
+          width_new = node.dimensions(line.head)
+          if width > goal or width == width_new then
+            node.remove(line.head, new) % Better compute before
+            break
+          end
+          width = width_new
+        end
+        if k_curr == 1 then
+          k_curr = #k_list
+          wt_pos = (wt_pos >= table.getn(Babel.kashida_wts)) and 1 or wt_pos+1
+        else
+          k_curr = k_curr - 1
+        end
+      end
+
+      ::next_line::
+
+      % Must take into account marks and ins, see luatex manual.
+      % Have to be executed only if there are changes. Investigate
+      % what's going on exactly.
+      if subst_done then
+        node.insert_before(head, line, node.hpack(line.head, full, 'exactly'))
+        node.remove(head, line)
+      end
+    end % if process line
+  end % for lines
+  return head
+end
+}
+\endgroup
+\fi\fi % Arabic just block
 \AddBabelHook{babel-fontspec}{afterextras}{\bbl at switchfont}
 \AddBabelHook{babel-fontspec}{beforestart}{\bbl at ckeckstdfonts}
 \DisableBabelHook{babel-fontspec}
@@ -1113,6 +1310,7 @@
         &% after the match, either as found by u.match (faster) or the
         &% computed position based on sc if w has changed.
         local last_match = 0
+        local step = 0
 
         &% For every match.
         while true do
@@ -1120,9 +1318,9 @@
             print('=====')
           end
           local new  &% used when inserting and removing nodes
-          local refetch = false
 
           local matches = { u.match(w, p, last_match) }
+
           if #matches < 2 then break end
 
           &% Get and remove empty captures (with ()'s, which return a
@@ -1188,6 +1386,10 @@
               item_base = data_nodes[crep.data]
             end
 
+            if crep then
+              step = crep.step or 0
+            end
+
             if crep and next(crep) == nil then &% = {}
               last_match = save_last    &% Optimization
               goto next
@@ -1197,9 +1399,16 @@
               table.remove(w_nodes, sc)
               w = u.sub(w, 1, sc-1) .. u.sub(w, sc+1)
               sc = sc - 1  &% Nothing has been inserted.
-              last_match = utf8.offset(w, sc+1)
+              last_match = utf8.offset(w, sc+1+step)
               goto next
 
+            elseif crep and crep.kashida then &% Experimental
+              node.set_attribute(item,
+                 luatexbase.registernumber'bblar at kashida',
+                 crep.kashida)
+              last_match = utf8.offset(w, sc+1+step)
+              goto next
+
             elseif crep and crep.string then
               local str = crep.string(matches)
               if str == '' then  &% Gather with nil
@@ -1233,7 +1442,7 @@
                 end  &% for
                 node.remove(head, item)
               end  &% if ''
-              last_match = utf8.offset(w, sc+1)
+              last_match = utf8.offset(w, sc+1+step)
               goto next
 
             elseif mode == 1 and crep and (crep.pre or crep.no or crep.post) then
@@ -1302,7 +1511,7 @@
               w = u.sub(w, 1, sc-1) .. placeholder .. u.sub(w, sc+1)
             end
 
-            last_match = utf8.offset(w, sc+1)
+            last_match = utf8.offset(w, sc+1+step)
 
             ::next::
 
@@ -1372,6 +1581,26 @@
     return "]]..Babel.capt_map(m[" .. capno .. "]," ..
            (mlen) .. ").." .. "[["
   end
+
+  &% Create/Extend reversed sorted list of kashida weights:
+  function Babel.capture_kashida(key, wt)
+    wt = tonumber(wt)
+    if Babel.kashida_wts then
+      for p, q in ipairs(Babel.kashida_wts) do
+        if wt  == q then
+          break
+        elseif wt > q then
+          table.insert(Babel.kashida_wts, p, wt)
+          break
+        elseif table.getn(Babel.kashida_wts) == p then
+          table.insert(Babel.kashida_wts, wt)
+        end
+      end
+    else
+      Babel.kashida_wts = { wt }
+    end
+    return 'kashida = ' .. wt
+  end
 }
 \catcode`\#=6
 \gdef\babelposthyphenation#1#2#3{&%
@@ -1432,10 +1661,11 @@
            rep = rep:gsub('^%s*(remove)%s*$', 'remove = true')
            rep = rep:gsub('^%s*(insert)%s*,', 'insert = true, ')
            rep = rep:gsub('(string)%s*=%s*([^%s,]*)', Babel.capture_func)
-           rep = rep:gsub( '(space)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
+           rep = rep:gsub('(space)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
              'space = {' .. '%2, %3, %4' .. '}')
-           rep = rep:gsub( '(spacefactor)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
+           rep = rep:gsub('(spacefactor)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
              'spacefactor = {' .. '%2, %3, %4' .. '}')
+           rep = rep:gsub('(kashida)%s*=%s*([^%s,]*)', Babel.capture_kashida)
            tex.print([[\string\babeltempa{{]] .. rep .. [[}}]])
          }}}&%
     \directlua{

Modified: trunk/Master/texmf-dist/tex/generic/babel/nil.ldf
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/nil.ldf	2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/nil.ldf	2021-05-16 20:41:27 UTC (rev 59229)
@@ -32,7 +32,7 @@
 %% extension |.ins|) which are part of the distribution.
 %%
 
-\ProvidesLanguage{nil}[2021/04/26 3.58 Nil language]
+\ProvidesLanguage{nil}[2021/05/16 3.59 Nil language]
 \LdfInit{nil}{datenil}
 \ifx\l at nil\@undefined
   \newlanguage\l at nil



More information about the tex-live-commits mailing list.