[latex3-commits] [latex3/latex3] main: Generate dialytika when required uppercasing Greek vowels (issue #1228) (fccb36e9e)
github at latex-project.org
github at latex-project.org
Mon Jun 12 15:07:52 CEST 2023
Repository : https://github.com/latex3/latex3
On branch : main
Link : https://github.com/latex3/latex3/commit/fccb36e9ebe4a6dd7d148fe4110f84946919abab
>---------------------------------------------------------------
commit fccb36e9ebe4a6dd7d148fe4110f84946919abab
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date: Mon Jun 12 14:07:52 2023 +0100
Generate dialytika when required uppercasing Greek vowels (issue #1228)
>---------------------------------------------------------------
fccb36e9ebe4a6dd7d148fe4110f84946919abab
l3kernel/CHANGELOG.md | 2 +
l3kernel/l3text-case.dtx | 112 +++++++++++++++++++++++++++++++-
l3kernel/testfiles/m3text002.luatex.tlg | 2 +
l3kernel/testfiles/m3text002.lvt | 1 +
l3kernel/testfiles/m3text002.ptex.tlg | 2 +
l3kernel/testfiles/m3text002.tlg | 2 +
l3kernel/testfiles/m3text002.uptex.tlg | 2 +
l3kernel/testfiles/m3text002.xetex.tlg | 2 +
8 files changed, 124 insertions(+), 1 deletion(-)
diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index d469598b6..4e18afb2e 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -10,6 +10,8 @@ this project uses date-based 'snapshot' version identifiers.
### Changed
- Exclude entries in `\l_text_case_exclude_arg_tl` from expansion in `\text_expand:n`
(latex3/latex2e\#904)
+- Generate _dialytika_ if appropriate when uppercasing Greek words starting with a
+ vowel taking a breathing mark (issue \#1228)
## [2023-06-05]
diff --git a/l3kernel/l3text-case.dtx b/l3kernel/l3text-case.dtx
index dd903f104..4621271d4 100644
--- a/l3kernel/l3text-case.dtx
+++ b/l3kernel/l3text-case.dtx
@@ -1587,7 +1587,7 @@
{ \@@_codepoint_compare_p:nNn {#3} = { "1F2A } }
}
{ \@@_change_case_boundary_upper_el:nnnw {#1} {#2} {#3} }
- { \@@_change_case_loop:nnw {#1} {#2} #3 }
+ { \@@_change_case_breather:nnn {#1} {#2} {#3} }
}
\cs_new:Npn \@@_change_case_boundary_upper_el:nnnw
#1#2#3#4 \q_@@_recursion_stop
@@ -1609,6 +1609,116 @@
% \end{macro}
% \end{macro}
% \end{macro}
+% \begin{macro}[EXP]{\@@_change_case_breather:nnn}
+% \begin{macro}[EXP]{\@@_change_case_breather:nnnn}
+% \begin{macro}[EXP]{\@@_change_case_breather:nnnnw}
+% \begin{macro}[EXP]{\@@_change_case_breather:nnnnnw}
+% \begin{macro}[EXP]{\@@_change_case_breather_aux:nnnnn}
+% \begin{macro}[EXP]{\@@_change_case_breather_aux:nnnw}
+% \begin{macro}[EXP]{\@@_change_case_breather_aux:nnN}
+% \begin{macro}[EXP]{\@@_change_case_breather_dialytika:nnn}
+% In Greek, breathing diacritics are normally dropped when uppercasing:
+% see the code for the general case. However, for the first character
+% of a word, if there is a breather \emph{and} the next character takes
+% a \emph{dialytika}, it needs to be added. We start by checking if
+% the current codepoint is in the Greek range, then decomposing.
+% \begin{macrocode}
+\cs_new:Npn \@@_change_case_breather:nnn #1#2#3
+ {
+ \@@_change_case_if_greek:nTF {#3}
+ {
+ \exp_args:Ne \@@_change_case_breather:nnnn
+ {
+ \codepoint_to_nfd:n
+ { \@@_codepoint_from_chars:Nw #3 }
+ }
+ {#1} {#2} {#3}
+ }
+ { \@@_change_case_loop:nnw {#1} {#2} #3 }
+ }
+\cs_new:Npn \@@_change_case_breather:nnnn #1#2#3#4
+ {
+ \@@_codepoint_process:nN
+ { \@@_change_case_breather:nnnnw {#2} {#3} {#4} }
+ #1 \q_mark
+ }
+% \end{macrocode}
+% Normal form decomposition will always give between one and three
+% codepoints. Luckily, the two breathing marks (\emph{psili} and
+% \emph{dasia}) will be in a predictable position: last. So we can
+% quickly establish first that there was a change on decomposition,
+% and second if the final resulting codepoint is one of the two we
+% care about.
+% \begin{macrocode}
+\cs_new:Npn \@@_change_case_breather:nnnnw #1#2#3#4#5 \q_mark
+ {
+ \tl_if_blank:nTF {#5}
+ { \@@_change_case_loop:nnw {#1} {#2} #3 }
+ {
+ \@@_codepoint_process:nN
+ { \@@_change_case_breather:nnnnnw {#1} {#2} {#3} {#4} }
+ #5 \q_mark
+ }
+ }
+\cs_new:Npn \@@_change_case_breather:nnnnnw #1#2#3#4#5#6 \q_mark
+ {
+ \tl_if_blank:nTF {#6}
+ {
+ \@@_change_case_breather_aux:nnnnn
+ {#1} {#2} {#3} {#4} {#5}
+ }
+ {
+ \@@_change_case_breather_aux:nnnnn
+ {#1} {#2} {#3} {#4} {#6}
+ }
+ }
+\cs_new:Npn \@@_change_case_breather_aux:nnnnn #1#2#3#4#5
+ {
+ \bool_lazy_or:nnTF
+ { \@@_codepoint_compare_p:nNn {#5} = { "0313 } }
+ { \@@_codepoint_compare_p:nNn {#5} = { "0314 } }
+ { \@@_change_case_breather_aux:nnnw {#1} {#2} {#4} }
+ { \@@_change_case_loop:nnw {#1} {#2} #3 }
+ }
+% \end{macrocode}
+% Now the lookahead can be fired: check the next codepoint and assess
+% whether it takes a \emph{dialytika}. Drop the
+% breathing mark or generate the \emph{dialytika}: the
+% latter is code shared with the general mechanism.
+% \begin{macrocode}
+\cs_new:Npn \@@_change_case_breather_aux:nnnw #1#2#3#4
+ \q_@@_recursion_stop
+ {
+ \@@_change_case_store:e
+ { \@@_change_case_codepoint:nn { upper } {#3} }
+ \tl_if_head_is_N_type:nTF {#4}
+ { \@@_change_case_breather_aux:nnN }
+ { \@@_change_case_loop:nnw }
+ {#1} {#2} #4 \q_@@_recursion_stop
+ }
+\cs_new:Npn \@@_change_case_breather_aux:nnN #1#2#3
+ {
+ \@@_codepoint_process:nN
+ { \@@_change_case_breather_dialytika:nnn {#1} {#2} } #3
+ }
+\cs_new:Npn \@@_change_case_breather_dialytika:nnn #1#2#3
+ {
+ \@@_change_case_if_takes_dialytika:nTF {#3}
+ {
+ \@@_change_case_upper_el_dialytika:n {#3}
+ \@@_change_case_loop:nnw {#1} {#2}
+ }
+ { \@@_change_case_loop:nnw {#1} {#2} #3 }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
% \begin{macro}[EXP]{\@@_change_case_title_el:nnnn}
% Titlecasing retains accents, but to prevent the uppercasing code
% from kicking in, there has to be an explicit function here.
diff --git a/l3kernel/testfiles/m3text002.luatex.tlg b/l3kernel/testfiles/m3text002.luatex.tlg
index 9a830d284..5cf750ace 100644
--- a/l3kernel/testfiles/m3text002.luatex.tlg
+++ b/l3kernel/testfiles/m3text002.luatex.tlg
@@ -229,6 +229,8 @@ TEST 17: Greek
ͺ῀`^^b4῾᾽᾿῍῎῝῞῟῏῭^^a8῁
;
;
+ἈΥΠΝΊΑ
+ΑΫΠΝΙΑ
============================================================
============================================================
TEST 18: Turkish/Azeri
diff --git a/l3kernel/testfiles/m3text002.lvt b/l3kernel/testfiles/m3text002.lvt
index 1d513d8b7..25f275d27 100644
--- a/l3kernel/testfiles/m3text002.lvt
+++ b/l3kernel/testfiles/m3text002.lvt
@@ -224,6 +224,7 @@
\greektest:n { Το~ένα~Ή~το~άλλο } \NEWLINE
\greektest:n { ͺ ῀ ` ´ ῾ ᾽ ᾿ ῍ ῎ ῝ ῞ ῟ ῏ ῭ ΅ ῁ } \NEWLINE
\greektest:n { ; } \NEWLINE
+ \greektest:n { ἀυπνία } \NEWLINE
}
\TESTEXP { Turkish/Azeri }
diff --git a/l3kernel/testfiles/m3text002.ptex.tlg b/l3kernel/testfiles/m3text002.ptex.tlg
index b9bdd227d..87cbc82f4 100644
--- a/l3kernel/testfiles/m3text002.ptex.tlg
+++ b/l3kernel/testfiles/m3text002.ptex.tlg
@@ -229,6 +229,8 @@ TEST 17: Greek
^^cd^^ba^^e1^^bf^^80`^^b4^^e1^^bf^^be^^e1^^be^^bd^^e1^^be^^bf^^e1^^bf^^8d^^e1^^bf^^8e^^e1^^bf^^9d^^e1^^bf^^9e^^e1^^bf^^9f^^e1^^bf^^8f^^e1^^bf^^ad^^c2^^a8^^e1^^bf^^81
^^cd^^be
^^cd^^be
+^^e1^^bc^^88υπν^^ce^^8aα
+^^ce^^91υπν^^ce^^99α
============================================================
============================================================
TEST 18: Turkish/Azeri
diff --git a/l3kernel/testfiles/m3text002.tlg b/l3kernel/testfiles/m3text002.tlg
index 850df5226..a7e2085eb 100644
--- a/l3kernel/testfiles/m3text002.tlg
+++ b/l3kernel/testfiles/m3text002.tlg
@@ -229,6 +229,8 @@ TEST 17: Greek
^^cd^^ba^^e1^^bf^^80`^^c2^^b4^^e1^^bf^^be^^e1^^be^^bd^^e1^^be^^bf^^e1^^bf^^8d^^e1^^bf^^8e^^e1^^bf^^9d^^e1^^bf^^9e^^e1^^bf^^9f^^e1^^bf^^8f^^e1^^bf^^ad^^c2^^a8^^e1^^bf^^81
^^cd^^be
^^cd^^be
+^^e1^^bc^^88^^ce^^a5^^ce^^a0^^ce^^9d^^ce^^8a^^ce^^91
+^^ce^^91^^ce^^ab^^ce^^a0^^ce^^9d^^ce^^99^^ce^^91
============================================================
============================================================
TEST 18: Turkish/Azeri
diff --git a/l3kernel/testfiles/m3text002.uptex.tlg b/l3kernel/testfiles/m3text002.uptex.tlg
index 25f0bb600..15182e2e6 100644
--- a/l3kernel/testfiles/m3text002.uptex.tlg
+++ b/l3kernel/testfiles/m3text002.uptex.tlg
@@ -229,6 +229,8 @@ TEST 17: Greek
ͺ῀`^^b4῾᾽᾿῍῎῝῞῟῏῭^^c2^^a8῁
;
;
+^^e1^^bc^^88^^ce^^a5^^ce^^a0^^ce^^9d^^ce^^8a^^ce^^91
+^^ce^^91^^ce^^ab^^ce^^a0^^ce^^9d^^ce^^99^^ce^^91
============================================================
============================================================
TEST 18: Turkish/Azeri
diff --git a/l3kernel/testfiles/m3text002.xetex.tlg b/l3kernel/testfiles/m3text002.xetex.tlg
index 9a830d284..5cf750ace 100644
--- a/l3kernel/testfiles/m3text002.xetex.tlg
+++ b/l3kernel/testfiles/m3text002.xetex.tlg
@@ -229,6 +229,8 @@ TEST 17: Greek
ͺ῀`^^b4῾᾽᾿῍῎῝῞῟῏῭^^a8῁
;
;
+ἈΥΠΝΊΑ
+ΑΫΠΝΙΑ
============================================================
============================================================
TEST 18: Turkish/Azeri
More information about the latex3-commits
mailing list.