texlive[59229] Master/texmf-dist: babel (16may21)
commits+karl at tug.org
commits+karl at tug.org
Sun May 16 22:41:27 CEST 2021
Revision: 59229
http://tug.org/svn/texlive?view=revision&revision=59229
Author: karl
Date: 2021-05-16 22:41:27 +0200 (Sun, 16 May 2021)
Log Message:
-----------
babel (16may21)
Modified Paths:
--------------
trunk/Master/texmf-dist/doc/latex/babel/README.md
trunk/Master/texmf-dist/doc/latex/babel/babel.pdf
trunk/Master/texmf-dist/source/latex/babel/babel.dtx
trunk/Master/texmf-dist/source/latex/babel/babel.ins
trunk/Master/texmf-dist/source/latex/babel/bbcompat.dtx
trunk/Master/texmf-dist/source/latex/babel/locale.zip
trunk/Master/texmf-dist/tex/generic/babel/babel-data-cjk.lua
trunk/Master/texmf-dist/tex/generic/babel/babel.def
trunk/Master/texmf-dist/tex/generic/babel/babel.sty
trunk/Master/texmf-dist/tex/generic/babel/hyphen.cfg
trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-ar.ini
trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-arabic.tex
trunk/Master/texmf-dist/tex/generic/babel/locale/fa/babel-fa.ini
trunk/Master/texmf-dist/tex/generic/babel/locale/nb/babel-nb.ini
trunk/Master/texmf-dist/tex/generic/babel/luababel.def
trunk/Master/texmf-dist/tex/generic/babel/nil.ldf
Modified: trunk/Master/texmf-dist/doc/latex/babel/README.md
===================================================================
--- trunk/Master/texmf-dist/doc/latex/babel/README.md 2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/doc/latex/babel/README.md 2021-05-16 20:41:27 UTC (rev 59229)
@@ -1,4 +1,4 @@
-## Babel 3.58
+## Babel 3.59
This package manages culturally-determined typographical (and other)
rules, and hyphenation patterns for a wide range of languages. Many
@@ -8,9 +8,9 @@
The latest stable version is available on <https://ctan.org/pkg/babel>.
-Changes in version 3.58 are described in:
+Changes in version 3.59 are described in:
-https://github.com/latex3/babel/blob/master/news-guides/news/whats-new-in-babel-3.58.md
+https://github.com/latex3/babel/blob/master/news-guides/news/whats-new-in-babel-3.59.md
Apart from the manual, you can find information on some aspects of babel at:
@@ -46,30 +46,12 @@
### Summary of Latest changes
```
-3.58 2021-04-26
- * More predefined transforms (lua):
- - doubleletter.hyphen: Norsk
- - oneletter.nobreak: Czech, Polish, Slovak
- - hyphen.repeat: Czech, Polish, Portuguese, Slovak, Spanish
- - punctuation.space: Hindi, Sanskrit
- - transliteration.hk: Sanskrit
- - transliteration.gajica: Serbian
- * 'prehyphenation' transforms are now disabled in verbatim.
- * New line breaking mode ‘unhyphenated’.
- * Fix: \shorthandoff*{^} was not revertible (#126).
- * Experimental code for Arabic justification (with elongated
- forms: lua).
-
-3.57 2021-04-07
- * Predefined transforms (lua):
- - Arabic: transliteration.dad
- - Croatian: digraphs.ligatures
- - Greek: diaeresis.hyphen
- - Hindi: transliteration.hk
- - Hungarian: digraphs.hyphen
- * Transforms: {xxxx} syntax also in string=.
- * Preliminary code for Uyghur hyphenation (lua).
- * magyar as alternative to hungarian in \babelprovide.
+3.59 2021-05-16
+ * Tentative kashida with user-definable rules (via transforms).
+ * Fixes:
+ - \babel at texpdf inconsistently defined (#130)
+ - Partial fix for #131 (quotation with CJK).
+ * Removed doubleletter.hyphen (Norsk), which serves to nothing.
```
### Previous changes
Modified: trunk/Master/texmf-dist/doc/latex/babel/babel.pdf
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/source/latex/babel/babel.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/babel/babel.dtx 2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/source/latex/babel/babel.dtx 2021-05-16 20:41:27 UTC (rev 59229)
@@ -31,7 +31,7 @@
%
% \iffalse
%<*filedriver>
-\ProvidesFile{babel.dtx}[2021/04/26 v3.58 The Babel package]
+\ProvidesFile{babel.dtx}[2021/05/16 v3.59 The Babel package]
\documentclass{ltxdoc}
\GetFileInfo{babel.dtx}
\usepackage{fontspec}
@@ -2579,6 +2579,18 @@
Currently used only in Southeast Asian scrips, like Thai. Ignored if 0
(which is the default value).
+\Describe{justification=}{\texttt{kashida} $\string|$
+\texttt{elongated} $\string|$ \texttt{unhyphenated}}
+\New{3.59} There are currently three options, mainly for the Arabic
+script. It sets the linebreaking and justification method, which can be
+based on the the \textsc{arabic tatweel} character or in the
+‘justification alternatives’ OpenType table (\texttt{jalt}). For an
+explanation see the \href{https://github.com/latex3/babel/blob/master/%
+news-guides/news/whats-new-in-babel-3.59.md}{\babel\ site}.
+
+\Describe{linebreaking=}{}
+\New{3.59} Just a synonymous for \texttt{justification}.
+
\Describe{mapfont=}{\texttt{direction}}
Assigns the font for the writing direction of this language (only with
|bidi=basic|). Whenever possible, instead of this option use |onchar|,
@@ -3080,7 +3092,7 @@
\trans{Hindi, Sanskrit}{transliteration.hk}{The Harvard-Kyoto system to
romanize Devanagari.}
-\trans{}{punctuation.space}{Inserts a space before the following
+\trans{Hindi, Sanskrit}{punctuation.space}{Inserts a space before the following
four characters: \textit{!?:;}\,.}
\trans{Hungarian}{digraphs.hyphen}{Hyphenates the long digraphs
@@ -3088,10 +3100,10 @@
\textit{ssz}, \textit{tty} and \textit{zzs} as \textit{cs-cs},
\textit{dz-dz}, etc.}
-\trans{Norsk}{doubleletter.hyphen}{Hyphenates the doble-letter groups
-\textit{bb}, \textit{dd}, \textit{ff}, \textit{gg}, \textit{ll},
-\textit{mm}, \textit{nn}, \textit{pp}, \textit{rr}, \textit{ss},
-\textit{tt} as \textit{bb-b}, \textit{dd-d}, etc.}
+\trans{Arabic, Persian}{kashida.plain}{Experimental. A very simple and
+basic transform for ‘plain’ Arabic fonts, which attempts to distribute
+the tatwil as evenly as possible (starting at the end of the line). See
+the news for version 3.59.}
\trans{Serbian}{transliteration.gajica}{(Note |serbian| with |ini|
files refers to the Cyrillic script, which is here the target.) The
@@ -3106,9 +3118,9 @@
\New{3.37-3.39} \textit{With \luatex{}} it is now possible to define
non-standard hyphenation rules, like |f-f| $\to$ |ff-f|, repeated
hyphens, ranked ruled (or more precisely, ‘penalized’ hyphenation
-points), and so on. No rules are currently provided by default, but
-they can be defined as shown in the following example, where |{1}| is
-the first captured char (between |()| in the pattern):
+points), and so on. Only a few rules are currently provided (see
+below), but they can be defined as shown in the following example,
+where |{1}| is the first captured char (between |()| in the pattern):
\begin{verbatim}
\babelposthyphenation{german}{([fmtrp]) | {1}}
{
@@ -3147,10 +3159,8 @@
is the locale instead of the name of the hyphenation patterns; (2) in the
search patterns |=| has no special meaning, while \verb+|+ stands for
an ordinary space; (3) in the replacement, discretionaries are not
-accepted.
+accepted.
-It handles glyphs and spaces.
-
This feature is activated with the first |\babelposthyphenation| or
|\babelprehyphenation|.
@@ -3188,7 +3198,7 @@
\babel{} by default recognizes this setting if the font has been
declared with |\babelfont|. The \textit{transforms} mechanism
supplements rather than replaces OTF features.
-
+
With \xetex{}, where \textit{transforms} are not available, there is
still another approach, with font mappings, mainly meant to perform
encoding conversions and transliterations. Mappings, however, are
@@ -3880,12 +3890,11 @@
Currently this macro understands the following keys (and only for
\textsf{luatex}), with values |on| or |off|: |bidi.text|,
|bidi.mirroring|, |bidi.mapdigits|, |layout.lists|, |layout.tabular|,
-|linebreak.sea|, |linebreak.cjk|. For example, you can set
+|linebreak.sea|, |linebreak.cjk|, |justify.arabic|. For example, you can set
|\babeladjust{bidi.text=off}| if you are using an alternative algorithm
-or with large sections not requiring it. With \textsf{luahbtex} you may
-need |bidi.mirroring=off|. Use with care, because these options do not
-deactivate other related options (like paragraph direction with
-|bidi.text|).
+or with large sections not requiring it. Use with care, because these
+options do not deactivate other related options (like paragraph
+direction with |bidi.text|).
\subsection{Tips, workarounds, known issues and notes}
@@ -4958,8 +4967,8 @@
% \section{Tools}
%
% \begin{macrocode}
-%<<version=3.58>>
-%<<date=2021/04/26>>
+%<<version=3.59>>
+%<<date=2021/05/16>>
% \end{macrocode}
%
% \textbf{Do not use the following macros in \texttt{ldf} files. They
@@ -7653,9 +7662,13 @@
\else
\edef\bbl at tempa{\bbl at cl{lnbrk}}%
\fi
+ % linebreaking - handle u, e, k (v in the future)
\bbl at xin@{/u}{/\bbl at tempa}%
+ \ifin@\else\bbl at xin@{/e}{/\bbl at tempa}\fi % elongated forms
+ \ifin@\else\bbl at xin@{/k}{/\bbl at tempa}\fi % only kashida
+ \ifin@\else\bbl at xin@{/v}{/\bbl at tempa}\fi % variable font
\ifin@
- % 'unhyphenated' = allow stretching
+ % unhyphenated/kashida/elongated = allow stretching
\language\l at unhyphenated
\babel at savevariable\emergencystretch
\emergencystretch\maxdimen
@@ -8985,7 +8998,7 @@
% \begin{macrocode}
\def\babel at texpdf#1#2#3#4{%
\ifx\texorpdfstring\@undefined
- \textormath{#1}{#2}%
+ \textormath{#1}{#3}%
\else
\texorpdfstring{\textormath{#1}{#3}}{#2}%
% \texorpdfstring{\textormath{#1}{#3}}{\textormath{#2}{#4}}%
@@ -10788,6 +10801,7 @@
\let\bbl at KVP@language\@nil
\let\bbl at KVP@hyphenrules\@nil
\let\bbl at KVP@linebreaking\@nil
+ \let\bbl at KVP@justification\@nil
\let\bbl at KVP@mapfont\@nil
\let\bbl at KVP@maparabic\@nil
\let\bbl at KVP@mapdigits\@nil
@@ -10968,6 +10982,20 @@
\bbl at csarg\edef{intsp@#2}{\bbl at KVP@intraspace}%
\fi
\bbl at provide@intraspace
+ %
+ \ifx\bbl at KVP@justification\@nil\else
+ \let\bbl at KVP@linebreaking\bbl at KVP@justification
+ \fi
+ \ifx\bbl at KVP@linebreaking\@nil\else
+ \bbl at xin@{,\bbl at KVP@linebreaking,}{,elongated,kashida,cjk,unhyphenated,}%
+ \ifin@
+ \bbl at csarg\xdef
+ {lnbrk@\languagename}{\expandafter\@car\bbl at KVP@linebreaking\@nil}%
+ \fi
+ \fi
+ \bbl at xin@{/e}{/\bbl at cl{lnbrk}}%
+ \ifin@\else\bbl at xin@{/k}{/\bbl at cl{lnbrk}}\fi
+ \ifin@\bbl at arabicjust\fi
% == Line breaking: hyphenate.other.locale/.script==
\ifx\bbl at lbkflag\@empty
\bbl at ifunset{bbl at hyotl@\languagename}{}%
@@ -12126,6 +12154,10 @@
\bbl at adjust@lua{linebreak}{cjk_enabled=true}}
\@namedef{bbl at ADJ@linebreak.cjk at off}{%
\bbl at adjust@lua{linebreak}{cjk_enabled=false}}
+\@namedef{bbl at ADJ@justify.arabic at on}{%
+ \bbl at adjust@lua{linebreak}{arabic.justify_enabled=true}}
+\@namedef{bbl at ADJ@justify.arabic at off}{%
+ \bbl at adjust@lua{linebreak}{arabic.justify_enabled=false}}
%
\def\bbl at adjust@layout#1{%
\ifvmode
@@ -13571,7 +13603,23 @@
end
end
}^^
- \bbl at luahyphenate}
+ \bbl at luahyphenate}
+% \end{macrocode}
+%
+% \subsection{CJK line breaking}
+%
+% Minimal line breaking for CJK scripts, mainly intended for simple
+% documents and short texts as a secundary language. Only line
+% breaking, with a little stretching for justification, without any
+% attempt to adjust the spacing. It is based on (but does not strictly
+% follow) the Unicode algorithm.
+%
+% We first need a little table with the corresponding line breaking
+% properties. A few characters have an additional key for the width
+% (fullwidth \textit{vs.} halfwidth), not yet used. There is a separate
+% file, defined below.
+%
+% \begin{macrocode}
\catcode`\%=14
\gdef\bbl at cjkintraspace{%
\let\bbl at cjkintraspace\relax
@@ -13696,23 +13744,216 @@
\fi}}
% \end{macrocode}
%
-% \subsection{CJK line breaking}
+% \subsection{Arabic justification}
%
-% Minimal line breaking for CJK scripts, mainly intended for simple
-% documents and short texts as a secundary language. Only line
-% breaking, with a little stretching for justification, without any
-% attempt to adjust the spacing. It is based on (but does not strictly
-% follow) the Unicode algorithm.
+% \begin{macrocode}
+\ifnum\bbl at bidimode>100 \ifnum\bbl at bidimode<200
+\def\bblar at chars{%
+ 0628,0629,062A,062B,062C,062D,062E,062F,0630,0631,0632,0633,%
+ 0634,0635,0636,0637,0638,0639,063A,063B,063C,063D,063E,063F,%
+ 0640,0641,0642,0643,0644,0645,0646,0647,0649}
+\def\bblar at elongated{%
+ 0626,0628,062A,062B,0633,0634,0635,0636,063B,%
+ 063C,063D,063E,063F,0641,0642,0643,0644,0646,%
+ 0649,064A}
+\begingroup
+ \catcode`_=11 \catcode`:=11
+ \gdef\bblar at nofswarn{\gdef\msg_warning:nnx##1##2##3{}}
+\endgroup
+\gdef\bbl at arabicjust{%
+ \let\bbl at arabicjust\relax
+ \newattribute\bblar at kashida
+ \bblar at kashida=\z@
+ \expandafter\bbl at add\csname selectfont \endcsname{{\bbl at parsejalt}}%
+ \directlua{
+ Babel.arabic.elong_map = Babel.arabic.elong_map or {}
+ Babel.arabic.elong_map[\the\localeid] = {}
+ luatexbase.add_to_callback('post_linebreak_filter',
+ Babel.arabic.justify, 'Babel.arabic.justify')
+ }}%
+% Save both node lists to make replacement. TODO. Save also widths to
+% make computations
+\def\bblar at fetchjalt#1#2#3#4{%
+ \bbl at exp{\\\bbl at foreach{#1}}{%
+ \bbl at ifunset{bblar at JE@##1}%
+ {\setbox\z@\hbox{^^^^200d\char"##1#2}}%
+ {\setbox\z@\hbox{^^^^200d\char"\@nameuse{bblar at JE@##1}#2}}%
+ \directlua{%
+ local last = nil
+ for item in node.traverse(tex.box[0].head) do
+ if item.id == node.id'glyph' and item.char > 0x600 and
+ not (item.char == 0x200D) then
+ last = item
+ end
+ end
+ Babel.arabic.#3['##1#4'] = last.char
+ }}}
+% Brute force. No rules at all, yet. The ideal: look at jalt table. And
+% perhaps other tables (falt?, cswh?). What about kaf? And diacritic
+% positioning?
+\gdef\bbl at parsejalt{%
+ \ifx\addfontfeature\@undefined\else
+ \bbl at xin@{/e}{/\bbl at cl{lnbrk}}%
+ \ifin@
+ \directlua{%
+ if Babel.arabic.elong_map[\the\localeid][\fontid\font] == nil then
+ Babel.arabic.elong_map[\the\localeid][\fontid\font] = {}
+ tex.print([[\string\csname\space bbl at parsejalti\endcsname]])
+ end
+ }%
+ \fi
+ \fi}
+\gdef\bbl at parsejalti{%
+ \begingroup
+ \let\bbl at parsejalt\relax % To avoid infinite loop
+ \edef\bbl at tempb{\fontid\font}%
+ \bblar at nofswarn
+ \bblar at fetchjalt\bblar at elongated{}{from}{}%
+ \bblar at fetchjalt\bblar at chars{^^^^064a}{from}{a}% Alef maksura
+ \bblar at fetchjalt\bblar at chars{^^^^0649}{from}{y}% Yeh
+ \addfontfeature{RawFeature=+jalt}%
+ % \@namedef{bblar at JE@0643}{06AA}% todo: catch medial kaf
+ \bblar at fetchjalt\bblar at elongated{}{dest}{}%
+ \bblar at fetchjalt\bblar at chars{^^^^064a}{dest}{a}%
+ \bblar at fetchjalt\bblar at chars{^^^^0649}{dest}{y}%
+ \directlua{%
+ for k, v in pairs(Babel.arabic.from) do
+ if Babel.arabic.dest[k] and
+ not (Babel.arabic.from[k] == Babel.arabic.dest[k]) then
+ Babel.arabic.elong_map[\the\localeid][\bbl at tempb]
+ [Babel.arabic.from[k]] = Babel.arabic.dest[k]
+ end
+ end
+ }%
+ \endgroup}
%
-% We first need a little table with the corresponding line breaking
-% properties. A few characters have an additional key for the width
-% (fullwidth \textit{vs.} halfwidth), not yet used. There is a separate
-% file, defined below.
+\begingroup
+\catcode`#=11
+\catcode`~=11
+\directlua{
+
+Babel.arabic = Babel.arabic or {}
+Babel.arabic.from = {}
+Babel.arabic.dest = {}
+Babel.arabic.justify_factor = 0.95
+Babel.arabic.justify_enabled = true
+
+function Babel.arabic.justify(head)
+ if not Babel.arabic.justify_enabled then return head end
+ local d, new
+ local k_list, k_item, pos_inline
+ local width, width_new, full, k_curr, wt_pos, goal
+ local subst_done = false
+ local elong_map = Babel.arabic.elong_map
+ local last_line
+ local GLYPH = node.id'glyph'
+ local KASHIDA = luatexbase.registernumber'bblar at kashida'
+ local LOCALE = luatexbase.registernumber'bbl at attr@locale'
+
+ for line in node.traverse_id(node.id'hlist', head) do
+ % Exclude last line. todo. But-- it discards one-word lines, too!
+ % ? Look for glue = 12:15
+ if (line.glue_sign == 1 and line.glue_order == 0) then
+ elongs = {} % Stores elongated candidates of each line
+ k_list = {} % And all letters with kashida
+ pos_inline = 0 % Not yet used
+
+ for n in node.traverse_id(GLYPH, line.head) do
+ pos_inline = pos_inline + 1 % To find where it is. Not used.
+
+ % Elongated glyphs
+ if elong_map then
+ local locale = node.get_attribute(n, LOCALE)
+ if elong_map[locale] and elong_map[locale][n.font] and
+ elong_map[locale][n.font][n.char] then
+ table.insert(elongs, {node = n, locale = locale} )
+ node.set_attribute(n.prev, KASHIDA, 0)
+ end
+ end
+
+ % Tatwil
+ if Babel.kashida_wts then
+ local k_wt = node.get_attribute(n, KASHIDA)
+ if k_wt > 0 then % todo. parameter for multi inserts
+ table.insert(k_list, {node = n, weight = k_wt, pos = pos_inline})
+ end
+ end
+
+ end % of node.traverse_id
+
+ if #elongs == 0 and #k_list == 0 then goto next_line end
+
+ full = line.width
+ goal = full * Babel.arabic.justify_factor % A bit crude
+ width = node.dimensions(line.head) % The 'natural' width
+
+ % == Elongated ==
+ % Original idea taken from 'chikenize'
+ while (#elongs > 0 and width < goal) do
+ subst_done = true
+ local x = #elongs
+ local curr = elongs[x].node
+ local oldchar = curr.char
+ curr.char = elong_map[elongs[x].locale][curr.font][curr.char]
+ width = node.dimensions(line.head) % Check if the line is too wide
+ % Substitute back if the line would be too wide and break:
+ if width > goal then
+ curr.char = oldchar
+ break
+ end
+ % If continue, pop the just substituted node from the list:
+ table.remove(elongs, x)
+ end
+
+ % == Tatwil ==
+ if #k_list == 0 then goto next_line end
+
+ width = node.dimensions(line.head) % The 'natural' width
+ k_curr = #k_list
+ wt_pos = 1
+
+ while width < goal do
+ subst_done = true
+ k_item = k_list[k_curr].node
+ if k_list[k_curr].weight == Babel.kashida_wts[wt_pos] then
+ d = node.copy(k_item)
+ d.char = 0x0640
+ line.head, new = node.insert_after(line.head, k_item, d)
+ width_new = node.dimensions(line.head)
+ if width > goal or width == width_new then
+ node.remove(line.head, new) % Better compute before
+ break
+ end
+ width = width_new
+ end
+ if k_curr == 1 then
+ k_curr = #k_list
+ wt_pos = (wt_pos >= table.getn(Babel.kashida_wts)) and 1 or wt_pos+1
+ else
+ k_curr = k_curr - 1
+ end
+ end
+
+ ::next_line::
+
+ % Must take into account marks and ins, see luatex manual.
+ % Have to be executed only if there are changes. Investigate
+ % what's going on exactly.
+ if subst_done then
+ node.insert_before(head, line, node.hpack(line.head, full, 'exactly'))
+ node.remove(head, line)
+ end
+ end % if process line
+ end % for lines
+ return head
+end
+}
+\endgroup
+\fi\fi % Arabic just block
+% \end{macrocode}
%
-% \textit{Work in progress.}
+% \subsection{Common stuff}
%
-% Common stuff.
-%
% \begin{macrocode}
\AddBabelHook{babel-fontspec}{afterextras}{\bbl at switchfont}
\AddBabelHook{babel-fontspec}{beforestart}{\bbl at ckeckstdfonts}
@@ -13951,11 +14192,11 @@
end
Babel.fetch_subtext = {}
-
+
Babel.ignore_pre_char = function(node)
return (node.lang == \the\l at nohyphenation)
end
-
+
&% Merging both functions doesn't seen feasible, because there are too
&% many differences.
Babel.fetch_subtext[0] = function(head)
@@ -14136,6 +14377,7 @@
&% after the match, either as found by u.match (faster) or the
&% computed position based on sc if w has changed.
local last_match = 0
+ local step = 0
&% For every match.
while true do
@@ -14143,9 +14385,9 @@
print('=====')
end
local new &% used when inserting and removing nodes
- local refetch = false
local matches = { u.match(w, p, last_match) }
+
if #matches < 2 then break end
&% Get and remove empty captures (with ()'s, which return a
@@ -14210,6 +14452,10 @@
if crep and crep.data then
item_base = data_nodes[crep.data]
end
+
+ if crep then
+ step = crep.step or 0
+ end
if crep and next(crep) == nil then &% = {}
last_match = save_last &% Optimization
@@ -14220,9 +14466,16 @@
table.remove(w_nodes, sc)
w = u.sub(w, 1, sc-1) .. u.sub(w, sc+1)
sc = sc - 1 &% Nothing has been inserted.
- last_match = utf8.offset(w, sc+1)
+ last_match = utf8.offset(w, sc+1+step)
goto next
+ elseif crep and crep.kashida then &% Experimental
+ node.set_attribute(item,
+ luatexbase.registernumber'bblar at kashida',
+ crep.kashida)
+ last_match = utf8.offset(w, sc+1+step)
+ goto next
+
elseif crep and crep.string then
local str = crep.string(matches)
if str == '' then &% Gather with nil
@@ -14256,7 +14509,7 @@
end &% for
node.remove(head, item)
end &% if ''
- last_match = utf8.offset(w, sc+1)
+ last_match = utf8.offset(w, sc+1+step)
goto next
elseif mode == 1 and crep and (crep.pre or crep.no or crep.post) then
@@ -14311,7 +14564,7 @@
end &% ie replacement cases
- &% Shared by disc, space and penalty.
+ &% Shared by disc, space and penalty.
if sc == 1 then
word_head = head
end
@@ -14325,7 +14578,7 @@
w = u.sub(w, 1, sc-1) .. placeholder .. u.sub(w, sc+1)
end
- last_match = utf8.offset(w, sc+1)
+ last_match = utf8.offset(w, sc+1+step)
::next::
@@ -14395,6 +14648,26 @@
return "]]..Babel.capt_map(m[" .. capno .. "]," ..
(mlen) .. ").." .. "[["
end
+
+ &% Create/Extend reversed sorted list of kashida weights:
+ function Babel.capture_kashida(key, wt)
+ wt = tonumber(wt)
+ if Babel.kashida_wts then
+ for p, q in ipairs(Babel.kashida_wts) do
+ if wt == q then
+ break
+ elseif wt > q then
+ table.insert(Babel.kashida_wts, p, wt)
+ break
+ elseif table.getn(Babel.kashida_wts) == p then
+ table.insert(Babel.kashida_wts, wt)
+ end
+ end
+ else
+ Babel.kashida_wts = { wt }
+ end
+ return 'kashida = ' .. wt
+ end
}
% \end{macrocode}
%
@@ -14473,10 +14746,11 @@
rep = rep:gsub('^%s*(remove)%s*$', 'remove = true')
rep = rep:gsub('^%s*(insert)%s*,', 'insert = true, ')
rep = rep:gsub('(string)%s*=%s*([^%s,]*)', Babel.capture_func)
- rep = rep:gsub( '(space)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
+ rep = rep:gsub('(space)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
'space = {' .. '%2, %3, %4' .. '}')
- rep = rep:gsub( '(spacefactor)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
+ rep = rep:gsub('(spacefactor)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
'spacefactor = {' .. '%2, %3, %4' .. '}')
+ rep = rep:gsub('(kashida)%s*=%s*([^%s,]*)', Babel.capture_kashida)
tex.print([[\string\babeltempa{{]] .. rep .. [[}}]])
}}}&%
\directlua{
@@ -21562,8 +21836,10 @@
Babel.cjk_characters = {
[0x0021]={c='ex'},
+ [0x0022]={c='qu'},
[0x0024]={c='pr'},
[0x0025]={c='po'},
+ [0x0027]={c='qu'},
[0x0028]={c='op'},
[0x0029]={c='cp'},
[0x002B]={c='pr'},
@@ -21584,10 +21860,18 @@
[0x00A3]={c='pr'},
[0x00A4]={c='pr'},
[0x00A5]={c='pr'},
+ [0x00AB]={c='qu'},
[0x00B0]={c='po'},
[0x00B1]={c='pr'},
+ [0x00BB]={c='qu'},
+ [0x2018]={c='qu'},
+ [0x2019]={c='qu'},
[0x201A]={c='op'},
+ [0x201B]={c='qu'},
+ [0x201C]={c='qu'},
+ [0x201D]={c='qu'},
[0x201E]={c='op'},
+ [0x201F]={c='qu'},
[0x2024]={c='in'},
[0x2025]={c='in'},
[0x2026]={c='in'},
@@ -21599,6 +21883,8 @@
[0x2035]={c='po'},
[0x2036]={c='po'},
[0x2037]={c='po'},
+ [0x2039]={c='qu'},
+ [0x203A]={c='qu'},
[0x203C]={c='ns'},
[0x203D]={c='ns'},
[0x2044]={c='is'},
@@ -21657,7 +21943,23 @@
[0x29FD]={c='cl'},
[0x2CF9]={c='ex'},
[0x2CFE]={c='ex'},
+ [0x2E02]={c='qu'},
+ [0x2E03]={c='qu'},
+ [0x2E04]={c='qu'},
+ [0x2E05]={c='qu'},
+ [0x2E06]={c='qu'},
+ [0x2E07]={c='qu'},
+ [0x2E08]={c='qu'},
+ [0x2E09]={c='qu'},
+ [0x2E0A]={c='qu'},
+ [0x2E0B]={c='qu'},
+ [0x2E0C]={c='qu'},
+ [0x2E0D]={c='qu'},
[0x2E18]={c='op'},
+ [0x2E1C]={c='qu'},
+ [0x2E1D]={c='qu'},
+ [0x2E20]={c='qu'},
+ [0x2E21]={c='qu'},
[0x2E22]={c='op'},
[0x2E23]={c='cl'},
[0x2E24]={c='op'},
@@ -21810,6 +22112,7 @@
['po'] = { ['op']=1, ['pr']=1, ['po']=1, ['in']=1, ['I']=1 },
['in'] = { ['op']=1, ['pr']=1, ['po']=1, ['I']=1 },
['hy'] = { ['op']=1, ['pr']=1, ['po']=1, ['in']=1, ['I']=1 },
+ ['qu'] = { },
--
['I'] = { ['op']=1, ['pr']=1, ['I']=1, ['O']=1 },
['O'] = { ['I']=1 }
Modified: trunk/Master/texmf-dist/source/latex/babel/babel.ins
===================================================================
--- trunk/Master/texmf-dist/source/latex/babel/babel.ins 2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/source/latex/babel/babel.ins 2021-05-16 20:41:27 UTC (rev 59229)
@@ -26,7 +26,7 @@
%% and covered by LPPL is defined by the unpacking scripts (with
%% extension .ins) which are part of the distribution.
%%
-\def\filedate{2021/04/26}
+\def\filedate{2021/05/16}
\def\batchfile{babel.ins}
\input docstrip.tex
Modified: trunk/Master/texmf-dist/source/latex/babel/bbcompat.dtx
===================================================================
--- trunk/Master/texmf-dist/source/latex/babel/bbcompat.dtx 2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/source/latex/babel/bbcompat.dtx 2021-05-16 20:41:27 UTC (rev 59229)
@@ -30,7 +30,7 @@
%
% \iffalse
%<*dtx>
-\ProvidesFile{bbcompat.dtx}[2021/04/26 v3.58]
+\ProvidesFile{bbcompat.dtx}[2021/05/16 v3.59]
%</dtx>
%
%% File 'bbcompat.dtx'
Modified: trunk/Master/texmf-dist/source/latex/babel/locale.zip
===================================================================
(Binary files differ)
Modified: trunk/Master/texmf-dist/tex/generic/babel/babel-data-cjk.lua
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/babel-data-cjk.lua 2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/babel-data-cjk.lua 2021-05-16 20:41:27 UTC (rev 59229)
@@ -36,8 +36,10 @@
Babel.cjk_characters = {
[0x0021]={c='ex'},
+ [0x0022]={c='qu'},
[0x0024]={c='pr'},
[0x0025]={c='po'},
+ [0x0027]={c='qu'},
[0x0028]={c='op'},
[0x0029]={c='cp'},
[0x002B]={c='pr'},
@@ -58,10 +60,18 @@
[0x00A3]={c='pr'},
[0x00A4]={c='pr'},
[0x00A5]={c='pr'},
+ [0x00AB]={c='qu'},
[0x00B0]={c='po'},
[0x00B1]={c='pr'},
+ [0x00BB]={c='qu'},
+ [0x2018]={c='qu'},
+ [0x2019]={c='qu'},
[0x201A]={c='op'},
+ [0x201B]={c='qu'},
+ [0x201C]={c='qu'},
+ [0x201D]={c='qu'},
[0x201E]={c='op'},
+ [0x201F]={c='qu'},
[0x2024]={c='in'},
[0x2025]={c='in'},
[0x2026]={c='in'},
@@ -73,6 +83,8 @@
[0x2035]={c='po'},
[0x2036]={c='po'},
[0x2037]={c='po'},
+ [0x2039]={c='qu'},
+ [0x203A]={c='qu'},
[0x203C]={c='ns'},
[0x203D]={c='ns'},
[0x2044]={c='is'},
@@ -131,7 +143,23 @@
[0x29FD]={c='cl'},
[0x2CF9]={c='ex'},
[0x2CFE]={c='ex'},
+ [0x2E02]={c='qu'},
+ [0x2E03]={c='qu'},
+ [0x2E04]={c='qu'},
+ [0x2E05]={c='qu'},
+ [0x2E06]={c='qu'},
+ [0x2E07]={c='qu'},
+ [0x2E08]={c='qu'},
+ [0x2E09]={c='qu'},
+ [0x2E0A]={c='qu'},
+ [0x2E0B]={c='qu'},
+ [0x2E0C]={c='qu'},
+ [0x2E0D]={c='qu'},
[0x2E18]={c='op'},
+ [0x2E1C]={c='qu'},
+ [0x2E1D]={c='qu'},
+ [0x2E20]={c='qu'},
+ [0x2E21]={c='qu'},
[0x2E22]={c='op'},
[0x2E23]={c='cl'},
[0x2E24]={c='op'},
@@ -284,6 +312,7 @@
['po'] = { ['op']=1, ['pr']=1, ['po']=1, ['in']=1, ['I']=1 },
['in'] = { ['op']=1, ['pr']=1, ['po']=1, ['I']=1 },
['hy'] = { ['op']=1, ['pr']=1, ['po']=1, ['in']=1, ['I']=1 },
+ ['qu'] = { },
--
['I'] = { ['op']=1, ['pr']=1, ['I']=1, ['O']=1 },
['O'] = { ['I']=1 }
Modified: trunk/Master/texmf-dist/tex/generic/babel/babel.def
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/babel.def 2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/babel.def 2021-05-16 20:41:27 UTC (rev 59229)
@@ -39,7 +39,7 @@
\wlog{File: #1 #4 #3 <#2>}%
\let\ProvidesFile\@undefined}
\fi
-\ProvidesFile{babel.def}[2021/04/26 3.58 Babel common definitions]
+\ProvidesFile{babel.def}[2021/05/16 3.59 Babel common definitions]
\ifx\AtBeginDocument\@undefined % TODO. change test.
% == Code for plain ==
\def\@empty{}
@@ -397,8 +397,8 @@
\fi
\countdef\last at language=19 % TODO. why? remove?
\def\addlanguage{\csname newlanguage\endcsname}
-\def\bbl at version{3.58}
-\def\bbl at date{2021/04/26}
+\def\bbl at version{3.59}
+\def\bbl at date{2021/05/16}
\def\adddialect#1#2{%
\global\chardef#1#2\relax
\bbl at usehooks{adddialect}{{#1}{#2}}%
@@ -694,9 +694,13 @@
\else
\edef\bbl at tempa{\bbl at cl{lnbrk}}%
\fi
+ % linebreaking - handle u, e, k (v in the future)
\bbl at xin@{/u}{/\bbl at tempa}%
+ \ifin@\else\bbl at xin@{/e}{/\bbl at tempa}\fi % elongated forms
+ \ifin@\else\bbl at xin@{/k}{/\bbl at tempa}\fi % only kashida
+ \ifin@\else\bbl at xin@{/v}{/\bbl at tempa}\fi % variable font
\ifin@
- % 'unhyphenated' = allow stretching
+ % unhyphenated/kashida/elongated = allow stretching
\language\l at unhyphenated
\babel at savevariable\emergencystretch
\emergencystretch\maxdimen
@@ -1483,7 +1487,7 @@
\def\babel at texpdf#1#2#3#4{%
\ifx\texorpdfstring\@undefined
- \textormath{#1}{#2}%
+ \textormath{#1}{#3}%
\else
\texorpdfstring{\textormath{#1}{#3}}{#2}%
% \texorpdfstring{\textormath{#1}{#3}}{\textormath{#2}{#4}}%
@@ -2357,6 +2361,7 @@
\let\bbl at KVP@language\@nil
\let\bbl at KVP@hyphenrules\@nil
\let\bbl at KVP@linebreaking\@nil
+ \let\bbl at KVP@justification\@nil
\let\bbl at KVP@mapfont\@nil
\let\bbl at KVP@maparabic\@nil
\let\bbl at KVP@mapdigits\@nil
@@ -2537,6 +2542,20 @@
\bbl at csarg\edef{intsp@#2}{\bbl at KVP@intraspace}%
\fi
\bbl at provide@intraspace
+ %
+ \ifx\bbl at KVP@justification\@nil\else
+ \let\bbl at KVP@linebreaking\bbl at KVP@justification
+ \fi
+ \ifx\bbl at KVP@linebreaking\@nil\else
+ \bbl at xin@{,\bbl at KVP@linebreaking,}{,elongated,kashida,cjk,unhyphenated,}%
+ \ifin@
+ \bbl at csarg\xdef
+ {lnbrk@\languagename}{\expandafter\@car\bbl at KVP@linebreaking\@nil}%
+ \fi
+ \fi
+ \bbl at xin@{/e}{/\bbl at cl{lnbrk}}%
+ \ifin@\else\bbl at xin@{/k}{/\bbl at cl{lnbrk}}\fi
+ \ifin@\bbl at arabicjust\fi
% == Line breaking: hyphenate.other.locale/.script==
\ifx\bbl at lbkflag\@empty
\bbl at ifunset{bbl at hyotl@\languagename}{}%
@@ -3478,6 +3497,10 @@
\bbl at adjust@lua{linebreak}{cjk_enabled=true}}
\@namedef{bbl at ADJ@linebreak.cjk at off}{%
\bbl at adjust@lua{linebreak}{cjk_enabled=false}}
+\@namedef{bbl at ADJ@justify.arabic at on}{%
+ \bbl at adjust@lua{linebreak}{arabic.justify_enabled=true}}
+\@namedef{bbl at ADJ@justify.arabic at off}{%
+ \bbl at adjust@lua{linebreak}{arabic.justify_enabled=false}}
\def\bbl at adjust@layout#1{%
\ifvmode
#1%
Modified: trunk/Master/texmf-dist/tex/generic/babel/babel.sty
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/babel.sty 2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/babel.sty 2021-05-16 20:41:27 UTC (rev 59229)
@@ -33,7 +33,7 @@
%%
\NeedsTeXFormat{LaTeX2e}[2005/12/01]
-\ProvidesPackage{babel}[2021/04/26 3.58 The Babel package]
+\ProvidesPackage{babel}[2021/05/16 3.59 The Babel package]
\@ifpackagewith{babel}{debug}
{\providecommand\bbl at trace[1]{\message{^^J[ #1 ]}}%
\let\bbl at debug\@firstofone
Modified: trunk/Master/texmf-dist/tex/generic/babel/hyphen.cfg
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/hyphen.cfg 2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/hyphen.cfg 2021-05-16 20:41:27 UTC (rev 59229)
@@ -37,10 +37,10 @@
\wlog{File: #1 #4 #3 <#2>}%
\let\ProvidesFile\@undefined}
\fi
-\ProvidesFile{hyphen.cfg}[2021/04/26 3.58 Babel hyphens]
+\ProvidesFile{hyphen.cfg}[2021/05/16 3.59 Babel hyphens]
\xdef\bbl at format{\jobname}
-\def\bbl at version{3.58}
-\def\bbl at date{2021/04/26}
+\def\bbl at version{3.59}
+\def\bbl at date{2021/05/16}
\ifx\AtBeginDocument\@undefined
\def\@empty{}
\let\orig at dump\dump
Modified: trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-ar.ini
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-ar.ini 2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-ar.ini 2021-05-16 20:41:27 UTC (rev 59229)
@@ -11,8 +11,8 @@
[identification]
charset = utf8
-version = 1.9
-date = 2021-04-24
+version = 1.10
+date = 2021-05-16
name.local = العربية
name.english = Arabic
name.babel = arabic
@@ -207,5 +207,10 @@
; 1-letter
transliteration.dad.8.0 = { ([{007C}AbtjHxdrzsSDTZ`RfqklmnhUIYaui+opCvgJe]) }
transliteration.dad.8.1 = { string = {1|{007C}AbtjHxdrzsSDTZ`RfqklmnhUIYaui+opCvgJe|ءابتجحخدرزسصضطظعغفقكلمنهوىيَُِّْپچڤگژۀ} }
+;
+kashida.plain.1.0 = { ()[يئهشسقفغعضصنمكلظطخحجثتب]()[يئهشسقفغعضصنمكلظطخحجثتباأإآوؤذدزرة] }
+kashida.plain.1.1 = { kashida = 500 }
+kashida.plain.2.0 = { ()ل()[اأإآ] }
+kashida.plain.2.1 = { kashida = 0 }
Modified: trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-arabic.tex
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-arabic.tex 2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/locale/ar/babel-arabic.tex 2021-05-16 20:41:27 UTC (rev 59229)
@@ -10,87 +10,4 @@
\BabelBeforeIni{ar}{%
}
-\ifcase\bbl at engine\or
-
-\directlua{
-
-Babel.ar_tolong = {}
-
-function Babel.ar_justify(head)
- local substlist = Babel.ar_tolong
- local GLYPH = node.id'glyph'
- local substs, width, goal
- local subst_done = true % false
- math.randomseed(1)
- for line in node.traverse_id(node.id'hlist', head) do
- if (line.glue_sign == 1 and line.glue_order == 0) then % exclude last line!
- substs = {} % we store all “expandable” letters of each line
- for n in node.traverse_id(GLYPH, line.head) do
- if (substlist[n.char]) then
- table.insert(substs, n)
- end
- end
- line.glue_set = 0 % deactivate normal glue expansion
- width = node.dimensions(line.head) % check the new width
- goal = line.width
-
- while (width < goal and \string#substs > 0) do
- x = math.random(\string#substs) % choose randomly a glyph
- oldchar = substs[x].char
- substs[x].char = substlist[substs[x].char]
- subst_done = true
- width = node.dimensions(line.head) % check if the line is too wide
- % substitute back if the line would be too wide and break:
- if width > goal then substs[x].char = oldchar break end
- % if further substitutions have to be done, remove the just
- % substituted node from the list:
- table.remove(substs,x)
- end
- % Must take into account marks and ins, see luatex manual.
- % Have to be executed only if there are changes.
- if subst_done then
- line.head = node.hpack(line.head, goal, 'exactly')
- end
- end
- end
- return head
-end
-
-}
-
-\gdef\ArabicSetupJust{%
- \directlua{
- Babel.ar_tolong = {}
- luatexbase.add_to_callback('post_linebreak_filter',
- Babel.ar_justify, 'Babel.ar_justify')
- }%
- % It must be done for each font, and stored separately.
- % Locale must be taken into account too. Brute force.
- % No rules at all, yet. The ideal: look at jalt table.
- % And perhaps other tables (falt?, cswh?). What about kaf?
- \begingroup
- \bbl at foreach{%
- 0628,0629,062A,062B,062C,062D,062E,062F,0630,0631,0632,0633,%
- 0634,0635,0636,0637,0638,0639,063A,063B,063C,063D,063E,063F,%
- 0640,0641,0642,0643,0644,0645,0646,0647,0649}{%
- \setbox\z@\hbox{% Only final, for the moment
- ^^^^200d\char"##1=%
- \addfontfeature{RawFeature=+jalt}%
- ^^^^200d\char"##1}%
- \directlua{
- local chars = {}
- for item in node.traverse(tex.box[0].head) do
- if item.id == node.id'glyph' and item.char > 128 and
- not (item.char == 0x200D) then
- table.insert(chars, item.char)
- end
- end
- if not (chars[1] == chars[2]) then
- Babel.ar_tolong[chars[1]] = chars[2]
- end
- }}%
- \endgroup}
-
-\fi
-
-\endinput
\ No newline at end of file
+\endinput
Modified: trunk/Master/texmf-dist/tex/generic/babel/locale/fa/babel-fa.ini
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/locale/fa/babel-fa.ini 2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/locale/fa/babel-fa.ini 2021-05-16 20:41:27 UTC (rev 59229)
@@ -11,8 +11,8 @@
[identification]
charset = utf8
-version = 1.7
-date = 2020-10-11
+version = 1.8
+date = 2021-05-16
name.local = فارسی
name.english = Persian
name.babel = persian
@@ -177,3 +177,9 @@
abjad = ا ب ج د ه و ز ح ط ی ک ل م ن س ع ف ص ق ر ش ت ث خ ذ ض ظ غ
alphabetic = ا ب پ ت ث ج چ ح خ د ذ ر ز ژ س ش ص ض ط ظ ع غ ف ق ک گ ل م ن و ه ی
+[transforms.prehyphenation]
+kashida.plain.1.0 = { ()[يئهشسقفغعضصنمكلظطخحچجثتپب]()[يئهشسقفغعضصنمكلظطخحچجثتپباأإآوؤذدژزرة] }
+kashida.plain.1.1 = { kashida = 500 }
+kashida.plain.2.0 = { ()ل()[اأإآ] }
+kashida.plain.2.1 = { kashida = 0 }
+
Modified: trunk/Master/texmf-dist/tex/generic/babel/locale/nb/babel-nb.ini
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/locale/nb/babel-nb.ini 2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/locale/nb/babel-nb.ini 2021-05-16 20:41:27 UTC (rev 59229)
@@ -11,8 +11,8 @@
[identification]
charset = utf8
-version = 1.4
-date = 2021-04-24
+version = 1.5
+date = 2021-05-16
name.local = norsk bokmål
name.english = Norwegian Bokmål
name.babel = norsk norwegianbokmal
@@ -186,8 +186,4 @@
[counters]
-[transforms.posthyphenation]
-doubleletter.hyphen.1.0 = {()([BDFGLMNPRSTbdfglmnprst])|(){1}}
-doubleletter.hyphen.1.1 = { no = {1}, pre = {1}{1}- }
-doubleletter.hyphen.1.2 = remove
Modified: trunk/Master/texmf-dist/tex/generic/babel/luababel.def
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/luababel.def 2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/luababel.def 2021-05-16 20:41:27 UTC (rev 59229)
@@ -534,6 +534,203 @@
\ifx\bbl at KVP@intrapenalty\@nil\else
\expandafter\bbl at intrapenalty\bbl at KVP@intrapenalty\@@
\fi}}
+\ifnum\bbl at bidimode>100 \ifnum\bbl at bidimode<200
+\def\bblar at chars{%
+ 0628,0629,062A,062B,062C,062D,062E,062F,0630,0631,0632,0633,%
+ 0634,0635,0636,0637,0638,0639,063A,063B,063C,063D,063E,063F,%
+ 0640,0641,0642,0643,0644,0645,0646,0647,0649}
+\def\bblar at elongated{%
+ 0626,0628,062A,062B,0633,0634,0635,0636,063B,%
+ 063C,063D,063E,063F,0641,0642,0643,0644,0646,%
+ 0649,064A}
+\begingroup
+ \catcode`_=11 \catcode`:=11
+ \gdef\bblar at nofswarn{\gdef\msg_warning:nnx##1##2##3{}}
+\endgroup
+\gdef\bbl at arabicjust{%
+ \let\bbl at arabicjust\relax
+ \newattribute\bblar at kashida
+ \bblar at kashida=\z@
+ \expandafter\bbl at add\csname selectfont \endcsname{{\bbl at parsejalt}}%
+ \directlua{
+ Babel.arabic.elong_map = Babel.arabic.elong_map or {}
+ Babel.arabic.elong_map[\the\localeid] = {}
+ luatexbase.add_to_callback('post_linebreak_filter',
+ Babel.arabic.justify, 'Babel.arabic.justify')
+ }}%
+\def\bblar at fetchjalt#1#2#3#4{%
+ \bbl at exp{\\\bbl at foreach{#1}}{%
+ \bbl at ifunset{bblar at JE@##1}%
+ {\setbox\z@\hbox{^^^^200d\char"##1#2}}%
+ {\setbox\z@\hbox{^^^^200d\char"\@nameuse{bblar at JE@##1}#2}}%
+ \directlua{%
+ local last = nil
+ for item in node.traverse(tex.box[0].head) do
+ if item.id == node.id'glyph' and item.char > 0x600 and
+ not (item.char == 0x200D) then
+ last = item
+ end
+ end
+ Babel.arabic.#3['##1#4'] = last.char
+ }}}
+\gdef\bbl at parsejalt{%
+ \ifx\addfontfeature\@undefined\else
+ \bbl at xin@{/e}{/\bbl at cl{lnbrk}}%
+ \ifin@
+ \directlua{%
+ if Babel.arabic.elong_map[\the\localeid][\fontid\font] == nil then
+ Babel.arabic.elong_map[\the\localeid][\fontid\font] = {}
+ tex.print([[\string\csname\space bbl at parsejalti\endcsname]])
+ end
+ }%
+ \fi
+ \fi}
+\gdef\bbl at parsejalti{%
+ \begingroup
+ \let\bbl at parsejalt\relax % To avoid infinite loop
+ \edef\bbl at tempb{\fontid\font}%
+ \bblar at nofswarn
+ \bblar at fetchjalt\bblar at elongated{}{from}{}%
+ \bblar at fetchjalt\bblar at chars{^^^^064a}{from}{a}% Alef maksura
+ \bblar at fetchjalt\bblar at chars{^^^^0649}{from}{y}% Yeh
+ \addfontfeature{RawFeature=+jalt}%
+ % \@namedef{bblar at JE@0643}{06AA}% todo: catch medial kaf
+ \bblar at fetchjalt\bblar at elongated{}{dest}{}%
+ \bblar at fetchjalt\bblar at chars{^^^^064a}{dest}{a}%
+ \bblar at fetchjalt\bblar at chars{^^^^0649}{dest}{y}%
+ \directlua{%
+ for k, v in pairs(Babel.arabic.from) do
+ if Babel.arabic.dest[k] and
+ not (Babel.arabic.from[k] == Babel.arabic.dest[k]) then
+ Babel.arabic.elong_map[\the\localeid][\bbl at tempb]
+ [Babel.arabic.from[k]] = Babel.arabic.dest[k]
+ end
+ end
+ }%
+ \endgroup}
+\begingroup
+\catcode`#=11
+\catcode`~=11
+\directlua{
+
+Babel.arabic = Babel.arabic or {}
+Babel.arabic.from = {}
+Babel.arabic.dest = {}
+Babel.arabic.justify_factor = 0.95
+Babel.arabic.justify_enabled = true
+
+function Babel.arabic.justify(head)
+ if not Babel.arabic.justify_enabled then return head end
+ local d, new
+ local k_list, k_item, pos_inline
+ local width, width_new, full, k_curr, wt_pos, goal
+ local subst_done = false
+ local elong_map = Babel.arabic.elong_map
+ local last_line
+ local GLYPH = node.id'glyph'
+ local KASHIDA = luatexbase.registernumber'bblar at kashida'
+ local LOCALE = luatexbase.registernumber'bbl at attr@locale'
+
+ for line in node.traverse_id(node.id'hlist', head) do
+ % Exclude last line. todo. But-- it discards one-word lines, too!
+ % ? Look for glue = 12:15
+ if (line.glue_sign == 1 and line.glue_order == 0) then
+ elongs = {} % Stores elongated candidates of each line
+ k_list = {} % And all letters with kashida
+ pos_inline = 0 % Not yet used
+
+ for n in node.traverse_id(GLYPH, line.head) do
+ pos_inline = pos_inline + 1 % To find where it is. Not used.
+
+ % Elongated glyphs
+ if elong_map then
+ local locale = node.get_attribute(n, LOCALE)
+ if elong_map[locale] and elong_map[locale][n.font] and
+ elong_map[locale][n.font][n.char] then
+ table.insert(elongs, {node = n, locale = locale} )
+ node.set_attribute(n.prev, KASHIDA, 0)
+ end
+ end
+
+ % Tatwil
+ if Babel.kashida_wts then
+ local k_wt = node.get_attribute(n, KASHIDA)
+ if k_wt > 0 then % todo. parameter for multi inserts
+ table.insert(k_list, {node = n, weight = k_wt, pos = pos_inline})
+ end
+ end
+
+ end % of node.traverse_id
+
+ if #elongs == 0 and #k_list == 0 then goto next_line end
+
+ full = line.width
+ goal = full * Babel.arabic.justify_factor % A bit crude
+ width = node.dimensions(line.head) % The 'natural' width
+
+ % == Elongated ==
+ % Original idea taken from 'chikenize'
+ while (#elongs > 0 and width < goal) do
+ subst_done = true
+ local x = #elongs
+ local curr = elongs[x].node
+ local oldchar = curr.char
+ curr.char = elong_map[elongs[x].locale][curr.font][curr.char]
+ width = node.dimensions(line.head) % Check if the line is too wide
+ % Substitute back if the line would be too wide and break:
+ if width > goal then
+ curr.char = oldchar
+ break
+ end
+ % If continue, pop the just substituted node from the list:
+ table.remove(elongs, x)
+ end
+
+ % == Tatwil ==
+ if #k_list == 0 then goto next_line end
+
+ width = node.dimensions(line.head) % The 'natural' width
+ k_curr = #k_list
+ wt_pos = 1
+
+ while width < goal do
+ subst_done = true
+ k_item = k_list[k_curr].node
+ if k_list[k_curr].weight == Babel.kashida_wts[wt_pos] then
+ d = node.copy(k_item)
+ d.char = 0x0640
+ line.head, new = node.insert_after(line.head, k_item, d)
+ width_new = node.dimensions(line.head)
+ if width > goal or width == width_new then
+ node.remove(line.head, new) % Better compute before
+ break
+ end
+ width = width_new
+ end
+ if k_curr == 1 then
+ k_curr = #k_list
+ wt_pos = (wt_pos >= table.getn(Babel.kashida_wts)) and 1 or wt_pos+1
+ else
+ k_curr = k_curr - 1
+ end
+ end
+
+ ::next_line::
+
+ % Must take into account marks and ins, see luatex manual.
+ % Have to be executed only if there are changes. Investigate
+ % what's going on exactly.
+ if subst_done then
+ node.insert_before(head, line, node.hpack(line.head, full, 'exactly'))
+ node.remove(head, line)
+ end
+ end % if process line
+ end % for lines
+ return head
+end
+}
+\endgroup
+\fi\fi % Arabic just block
\AddBabelHook{babel-fontspec}{afterextras}{\bbl at switchfont}
\AddBabelHook{babel-fontspec}{beforestart}{\bbl at ckeckstdfonts}
\DisableBabelHook{babel-fontspec}
@@ -1113,6 +1310,7 @@
&% after the match, either as found by u.match (faster) or the
&% computed position based on sc if w has changed.
local last_match = 0
+ local step = 0
&% For every match.
while true do
@@ -1120,9 +1318,9 @@
print('=====')
end
local new &% used when inserting and removing nodes
- local refetch = false
local matches = { u.match(w, p, last_match) }
+
if #matches < 2 then break end
&% Get and remove empty captures (with ()'s, which return a
@@ -1188,6 +1386,10 @@
item_base = data_nodes[crep.data]
end
+ if crep then
+ step = crep.step or 0
+ end
+
if crep and next(crep) == nil then &% = {}
last_match = save_last &% Optimization
goto next
@@ -1197,9 +1399,16 @@
table.remove(w_nodes, sc)
w = u.sub(w, 1, sc-1) .. u.sub(w, sc+1)
sc = sc - 1 &% Nothing has been inserted.
- last_match = utf8.offset(w, sc+1)
+ last_match = utf8.offset(w, sc+1+step)
goto next
+ elseif crep and crep.kashida then &% Experimental
+ node.set_attribute(item,
+ luatexbase.registernumber'bblar at kashida',
+ crep.kashida)
+ last_match = utf8.offset(w, sc+1+step)
+ goto next
+
elseif crep and crep.string then
local str = crep.string(matches)
if str == '' then &% Gather with nil
@@ -1233,7 +1442,7 @@
end &% for
node.remove(head, item)
end &% if ''
- last_match = utf8.offset(w, sc+1)
+ last_match = utf8.offset(w, sc+1+step)
goto next
elseif mode == 1 and crep and (crep.pre or crep.no or crep.post) then
@@ -1302,7 +1511,7 @@
w = u.sub(w, 1, sc-1) .. placeholder .. u.sub(w, sc+1)
end
- last_match = utf8.offset(w, sc+1)
+ last_match = utf8.offset(w, sc+1+step)
::next::
@@ -1372,6 +1581,26 @@
return "]]..Babel.capt_map(m[" .. capno .. "]," ..
(mlen) .. ").." .. "[["
end
+
+ &% Create/Extend reversed sorted list of kashida weights:
+ function Babel.capture_kashida(key, wt)
+ wt = tonumber(wt)
+ if Babel.kashida_wts then
+ for p, q in ipairs(Babel.kashida_wts) do
+ if wt == q then
+ break
+ elseif wt > q then
+ table.insert(Babel.kashida_wts, p, wt)
+ break
+ elseif table.getn(Babel.kashida_wts) == p then
+ table.insert(Babel.kashida_wts, wt)
+ end
+ end
+ else
+ Babel.kashida_wts = { wt }
+ end
+ return 'kashida = ' .. wt
+ end
}
\catcode`\#=6
\gdef\babelposthyphenation#1#2#3{&%
@@ -1432,10 +1661,11 @@
rep = rep:gsub('^%s*(remove)%s*$', 'remove = true')
rep = rep:gsub('^%s*(insert)%s*,', 'insert = true, ')
rep = rep:gsub('(string)%s*=%s*([^%s,]*)', Babel.capture_func)
- rep = rep:gsub( '(space)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
+ rep = rep:gsub('(space)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
'space = {' .. '%2, %3, %4' .. '}')
- rep = rep:gsub( '(spacefactor)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
+ rep = rep:gsub('(spacefactor)%s*=%s*([%d%.]+)%s+([%d%.]+)%s+([%d%.]+)',
'spacefactor = {' .. '%2, %3, %4' .. '}')
+ rep = rep:gsub('(kashida)%s*=%s*([^%s,]*)', Babel.capture_kashida)
tex.print([[\string\babeltempa{{]] .. rep .. [[}}]])
}}}&%
\directlua{
Modified: trunk/Master/texmf-dist/tex/generic/babel/nil.ldf
===================================================================
--- trunk/Master/texmf-dist/tex/generic/babel/nil.ldf 2021-05-16 20:40:11 UTC (rev 59228)
+++ trunk/Master/texmf-dist/tex/generic/babel/nil.ldf 2021-05-16 20:41:27 UTC (rev 59229)
@@ -32,7 +32,7 @@
%% extension |.ins|) which are part of the distribution.
%%
-\ProvidesLanguage{nil}[2021/04/26 3.58 Nil language]
+\ProvidesLanguage{nil}[2021/05/16 3.59 Nil language]
\LdfInit{nil}{datenil}
\ifx\l at nil\@undefined
\newlanguage\l at nil
More information about the tex-live-commits
mailing list.