[latex3-commits] [latex3/latex3] main: Generate dialytika when required uppercasing Greek vowels (issue #1228) (fccb36e9e)

github at latex-project.org github at latex-project.org
Mon Jun 12 15:07:52 CEST 2023


Repository : https://github.com/latex3/latex3
On branch  : main
Link       : https://github.com/latex3/latex3/commit/fccb36e9ebe4a6dd7d148fe4110f84946919abab

>---------------------------------------------------------------

commit fccb36e9ebe4a6dd7d148fe4110f84946919abab
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Mon Jun 12 14:07:52 2023 +0100

    Generate dialytika when required uppercasing Greek vowels (issue #1228)


>---------------------------------------------------------------

fccb36e9ebe4a6dd7d148fe4110f84946919abab
 l3kernel/CHANGELOG.md                   |   2 +
 l3kernel/l3text-case.dtx                | 112 +++++++++++++++++++++++++++++++-
 l3kernel/testfiles/m3text002.luatex.tlg |   2 +
 l3kernel/testfiles/m3text002.lvt        |   1 +
 l3kernel/testfiles/m3text002.ptex.tlg   |   2 +
 l3kernel/testfiles/m3text002.tlg        |   2 +
 l3kernel/testfiles/m3text002.uptex.tlg  |   2 +
 l3kernel/testfiles/m3text002.xetex.tlg  |   2 +
 8 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index d469598b6..4e18afb2e 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -10,6 +10,8 @@ this project uses date-based 'snapshot' version identifiers.
 ### Changed
 - Exclude entries in `\l_text_case_exclude_arg_tl` from expansion in `\text_expand:n`
   (latex3/latex2e\#904)
+- Generate _dialytika_ if appropriate when uppercasing Greek words starting with a
+  vowel taking a breathing mark (issue \#1228)
 
 ## [2023-06-05]
 
diff --git a/l3kernel/l3text-case.dtx b/l3kernel/l3text-case.dtx
index dd903f104..4621271d4 100644
--- a/l3kernel/l3text-case.dtx
+++ b/l3kernel/l3text-case.dtx
@@ -1587,7 +1587,7 @@
         { \@@_codepoint_compare_p:nNn {#3} = { "1F2A } }
       }
       { \@@_change_case_boundary_upper_el:nnnw {#1} {#2} {#3} }
-      { \@@_change_case_loop:nnw {#1} {#2} #3 }
+      { \@@_change_case_breather:nnn {#1} {#2} {#3} }
   }
 \cs_new:Npn \@@_change_case_boundary_upper_el:nnnw
   #1#2#3#4 \q_@@_recursion_stop
@@ -1609,6 +1609,116 @@
 % \end{macro}
 % \end{macro}
 % \end{macro}
+% \begin{macro}[EXP]{\@@_change_case_breather:nnn}
+% \begin{macro}[EXP]{\@@_change_case_breather:nnnn}
+% \begin{macro}[EXP]{\@@_change_case_breather:nnnnw}
+% \begin{macro}[EXP]{\@@_change_case_breather:nnnnnw}
+% \begin{macro}[EXP]{\@@_change_case_breather_aux:nnnnn}
+% \begin{macro}[EXP]{\@@_change_case_breather_aux:nnnw}
+% \begin{macro}[EXP]{\@@_change_case_breather_aux:nnN}
+% \begin{macro}[EXP]{\@@_change_case_breather_dialytika:nnn}
+%   In Greek, breathing diacritics are normally dropped when uppercasing:
+%   see the code for the general case. However, for the first character
+%   of a word, if there is a breather \emph{and} the next character takes
+%   a \emph{dialytika}, it needs to be added. We start by checking if
+%   the current codepoint is in the Greek range, then decomposing.
+%    \begin{macrocode}
+\cs_new:Npn \@@_change_case_breather:nnn #1#2#3
+  {
+    \@@_change_case_if_greek:nTF {#3}
+      {
+        \exp_args:Ne \@@_change_case_breather:nnnn
+          {
+            \codepoint_to_nfd:n
+              { \@@_codepoint_from_chars:Nw #3 }
+          }
+            {#1} {#2} {#3}
+      }
+      { \@@_change_case_loop:nnw {#1} {#2} #3 }
+  }
+\cs_new:Npn \@@_change_case_breather:nnnn #1#2#3#4
+  {
+    \@@_codepoint_process:nN
+      { \@@_change_case_breather:nnnnw {#2} {#3} {#4} }
+        #1 \q_mark
+  }
+%    \end{macrocode}
+%   Normal form decomposition will always give between one and three
+%   codepoints. Luckily, the two breathing marks (\emph{psili} and
+%   \emph{dasia}) will be in a predictable position: last. So we can
+%   quickly establish first that there was a change on decomposition,
+%   and second if the final resulting codepoint is one of the two we
+%   care about.
+%    \begin{macrocode}
+\cs_new:Npn \@@_change_case_breather:nnnnw #1#2#3#4#5 \q_mark
+  {
+    \tl_if_blank:nTF {#5}
+      { \@@_change_case_loop:nnw {#1} {#2} #3 }
+      {
+        \@@_codepoint_process:nN
+          { \@@_change_case_breather:nnnnnw {#1} {#2} {#3} {#4} }
+            #5 \q_mark
+      }
+  }
+\cs_new:Npn \@@_change_case_breather:nnnnnw #1#2#3#4#5#6 \q_mark
+  {
+    \tl_if_blank:nTF {#6}
+     {
+       \@@_change_case_breather_aux:nnnnn
+         {#1} {#2} {#3} {#4} {#5}
+     }
+     {
+       \@@_change_case_breather_aux:nnnnn
+         {#1} {#2} {#3} {#4} {#6}
+     }
+  }
+\cs_new:Npn \@@_change_case_breather_aux:nnnnn #1#2#3#4#5
+  {
+    \bool_lazy_or:nnTF
+      { \@@_codepoint_compare_p:nNn {#5} = { "0313 } }
+      { \@@_codepoint_compare_p:nNn {#5} = { "0314 } }
+      { \@@_change_case_breather_aux:nnnw {#1} {#2} {#4} }
+      { \@@_change_case_loop:nnw {#1} {#2} #3 }
+  }
+%    \end{macrocode}
+%   Now the lookahead can be fired: check the next codepoint and assess
+%   whether it takes a \emph{dialytika}. Drop the 
+%    breathing mark or generate the \emph{dialytika}: the
+%   latter is code shared with the general mechanism.
+%    \begin{macrocode}
+\cs_new:Npn \@@_change_case_breather_aux:nnnw #1#2#3#4
+  \q_@@_recursion_stop
+  {
+    \@@_change_case_store:e
+      { \@@_change_case_codepoint:nn { upper } {#3} }
+    \tl_if_head_is_N_type:nTF {#4}
+      { \@@_change_case_breather_aux:nnN  }
+      { \@@_change_case_loop:nnw }
+        {#1} {#2} #4 \q_@@_recursion_stop
+  }
+\cs_new:Npn \@@_change_case_breather_aux:nnN #1#2#3
+  {
+    \@@_codepoint_process:nN
+      { \@@_change_case_breather_dialytika:nnn {#1} {#2} } #3
+  }
+\cs_new:Npn \@@_change_case_breather_dialytika:nnn #1#2#3
+  {
+     \@@_change_case_if_takes_dialytika:nTF {#3}
+       {
+         \@@_change_case_upper_el_dialytika:n {#3}
+         \@@_change_case_loop:nnw {#1} {#2}
+       }
+       { \@@_change_case_loop:nnw {#1} {#2} #3 }
+  }
+%    \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
 % \begin{macro}[EXP]{\@@_change_case_title_el:nnnn}
 %   Titlecasing retains accents, but to prevent the uppercasing code
 %   from kicking in, there has to be an explicit function here.
diff --git a/l3kernel/testfiles/m3text002.luatex.tlg b/l3kernel/testfiles/m3text002.luatex.tlg
index 9a830d284..5cf750ace 100644
--- a/l3kernel/testfiles/m3text002.luatex.tlg
+++ b/l3kernel/testfiles/m3text002.luatex.tlg
@@ -229,6 +229,8 @@ TEST 17: Greek
 ͺ῀`^^b4῾᾽᾿῍῎῝῞῟῏῭^^a8῁
 ;
 ;
+ἈΥΠΝΊΑ
+ΑΫΠΝΙΑ
 ============================================================
 ============================================================
 TEST 18: Turkish/Azeri
diff --git a/l3kernel/testfiles/m3text002.lvt b/l3kernel/testfiles/m3text002.lvt
index 1d513d8b7..25f275d27 100644
--- a/l3kernel/testfiles/m3text002.lvt
+++ b/l3kernel/testfiles/m3text002.lvt
@@ -224,6 +224,7 @@
     \greektest:n { Το~ένα~Ή~το~άλλο }                     \NEWLINE
     \greektest:n { ͺ ῀ ` ´ ῾ ᾽ ᾿ ῍ ῎ ῝ ῞ ῟ ῏ ῭ ΅ ῁ }      \NEWLINE
     \greektest:n { ; }                                    \NEWLINE
+    \greektest:n { ἀυπνία }                               \NEWLINE
   }
 
 \TESTEXP { Turkish/Azeri }
diff --git a/l3kernel/testfiles/m3text002.ptex.tlg b/l3kernel/testfiles/m3text002.ptex.tlg
index b9bdd227d..87cbc82f4 100644
--- a/l3kernel/testfiles/m3text002.ptex.tlg
+++ b/l3kernel/testfiles/m3text002.ptex.tlg
@@ -229,6 +229,8 @@ TEST 17: Greek
 ^^cd^^ba^^e1^^bf^^80`^^b4^^e1^^bf^^be^^e1^^be^^bd^^e1^^be^^bf^^e1^^bf^^8d^^e1^^bf^^8e^^e1^^bf^^9d^^e1^^bf^^9e^^e1^^bf^^9f^^e1^^bf^^8f^^e1^^bf^^ad^^c2^^a8^^e1^^bf^^81
 ^^cd^^be
 ^^cd^^be
+^^e1^^bc^^88υπν^^ce^^8aα
+^^ce^^91υπν^^ce^^99α
 ============================================================
 ============================================================
 TEST 18: Turkish/Azeri
diff --git a/l3kernel/testfiles/m3text002.tlg b/l3kernel/testfiles/m3text002.tlg
index 850df5226..a7e2085eb 100644
--- a/l3kernel/testfiles/m3text002.tlg
+++ b/l3kernel/testfiles/m3text002.tlg
@@ -229,6 +229,8 @@ TEST 17: Greek
 ^^cd^^ba^^e1^^bf^^80`^^c2^^b4^^e1^^bf^^be^^e1^^be^^bd^^e1^^be^^bf^^e1^^bf^^8d^^e1^^bf^^8e^^e1^^bf^^9d^^e1^^bf^^9e^^e1^^bf^^9f^^e1^^bf^^8f^^e1^^bf^^ad^^c2^^a8^^e1^^bf^^81
 ^^cd^^be
 ^^cd^^be
+^^e1^^bc^^88^^ce^^a5^^ce^^a0^^ce^^9d^^ce^^8a^^ce^^91
+^^ce^^91^^ce^^ab^^ce^^a0^^ce^^9d^^ce^^99^^ce^^91
 ============================================================
 ============================================================
 TEST 18: Turkish/Azeri
diff --git a/l3kernel/testfiles/m3text002.uptex.tlg b/l3kernel/testfiles/m3text002.uptex.tlg
index 25f0bb600..15182e2e6 100644
--- a/l3kernel/testfiles/m3text002.uptex.tlg
+++ b/l3kernel/testfiles/m3text002.uptex.tlg
@@ -229,6 +229,8 @@ TEST 17: Greek
 ͺ῀`^^b4῾᾽᾿῍῎῝῞῟῏῭^^c2^^a8῁
 ;
 ;
+^^e1^^bc^^88^^ce^^a5^^ce^^a0^^ce^^9d^^ce^^8a^^ce^^91
+^^ce^^91^^ce^^ab^^ce^^a0^^ce^^9d^^ce^^99^^ce^^91
 ============================================================
 ============================================================
 TEST 18: Turkish/Azeri
diff --git a/l3kernel/testfiles/m3text002.xetex.tlg b/l3kernel/testfiles/m3text002.xetex.tlg
index 9a830d284..5cf750ace 100644
--- a/l3kernel/testfiles/m3text002.xetex.tlg
+++ b/l3kernel/testfiles/m3text002.xetex.tlg
@@ -229,6 +229,8 @@ TEST 17: Greek
 ͺ῀`^^b4῾᾽᾿῍῎῝῞῟῏῭^^a8῁
 ;
 ;
+ἈΥΠΝΊΑ
+ΑΫΠΝΙΑ
 ============================================================
 ============================================================
 TEST 18: Turkish/Azeri





More information about the latex3-commits mailing list.