[latex3-commits] [git/LaTeX3-latex3-latex3] l3text: Add case changing support for Greek (e523db139)

Joseph Wright joseph.wright at morningstar2.co.uk
Tue Dec 3 09:32:10 CET 2019


Repository : https://github.com/latex3/latex3
On branch  : l3text
Link       : https://github.com/latex3/latex3/commit/e523db1392d3103c48e2fceb81354cf91bfbc90a

>---------------------------------------------------------------

commit e523db1392d3103c48e2fceb81354cf91bfbc90a
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Tue Dec 3 08:32:10 2019 +0000

    Add case changing support for Greek


>---------------------------------------------------------------

e523db1392d3103c48e2fceb81354cf91bfbc90a
 l3kernel/l3text.dtx                     | 129 ++++++++++++++++++++++++++++++++
 l3kernel/testfiles/m3text002.luatex.tlg |  28 ++++---
 l3kernel/testfiles/m3text002.lvt        |   8 ++
 l3kernel/testfiles/m3text002.tlg        |  28 ++++---
 l3kernel/testfiles/m3text002.uptex.tlg  |  28 ++++---
 l3kernel/testfiles/m3text002.xetex.tlg  |  28 ++++---
 6 files changed, 209 insertions(+), 40 deletions(-)

diff --git a/l3kernel/l3text.dtx b/l3kernel/l3text.dtx
index 33e0d81cc..94a2360b7 100644
--- a/l3kernel/l3text.dtx
+++ b/l3kernel/l3text.dtx
@@ -1342,6 +1342,135 @@
 %    \end{macrocode}
 % \end{macro}
 %
+% \begin{macro}[rEXP]{\@@_change_case_upper_el:nnnN}
+% \begin{macro}[rEXP]{\@@_change_case_upper_el:nnnn}
+% \begin{macro}[rEXP]{\@@_change_case_upper_el_aux:nnnN}
+% \begin{macro}[rEXP]{\@@_change_case_upper_el_loop:nnw}
+% \begin{macro}[rEXP]{\@@_change_case_upper_el:nnN}
+% \begin{macro}[EXP]{\@@_change_case_if_greek:nTF}
+%   For Greek uppercasing, we need to know if characters \emph{in the Greek
+%   range} have accents. That means doing a \textsc{nfd} conversion first, then
+%   starting a search. As described by the Unicode \textsc{cldr}, Greek accents
+%   need to be found \emph{after} any U+0308 (diaeresis) and are done in two
+%   groups to allow for the canonical ordering.
+%    \begin{macrocode}
+\bool_lazy_or:nnT
+  { \sys_if_engine_luatex_p: }
+  { \sys_if_engine_xetex_p: }
+  {
+    \cs_new:Npn \@@_change_case_upper_el:nnnN #1#2#3#4
+      {
+        \@@_change_case_if_greek:nTF { `#4 }
+          {
+            \exp_args:Ne \@@_change_case_upper_el:nnnn
+              { \char_to_nfd:N #4 } {#1} {#2} {#3}
+          }
+          { \@@_change_case_char:nnnN {#1} {#2} {#3} #4 }
+      }
+    \cs_new:Npn \@@_change_case_upper_el:nnnn #1#2#3#4
+      { \@@_change_case_upper_el_aux:nnnN {#2} {#3} {#4} #1 }
+    \cs_new:Npn \@@_change_case_upper_el_aux:nnnN #1#2#3#4
+      {
+        \use:c { char_ #1 case:N } #4
+        \@@_change_case_upper_el_loop:nnw {#2} {#3}
+      }
+    \cs_new:Npn \@@_change_case_upper_el_loop:nnw
+      #1#2#3 \q_recursion_stop
+      {
+        \tl_if_head_is_N_type:nTF {#3}
+          { \@@_change_case_upper_el:nnN }
+          { \@@_change_case_loop:nnw }
+            {#1} {#2} #3 \q_recursion_stop
+      }
+%    \end{macrocode}
+%   In addition to the Greek accents, we list three cases here where an
+%   accent outside the Greek range has a \text{nfd} that would make it
+%   equivalent. That includes U+0344, which has to insert U+0308.
+%    \begin{macrocode}
+    \cs_new:Npn \@@_change_case_upper_el:nnN #1#2#3
+      {
+        \token_if_cs:NTF #3
+          { \@@_change_case_loop:nnw {#1} {#2} #3 }
+          {
+            \int_compare:nNnTF { `#3 } = { "0308 }
+              {
+                \exp_not:n {#3}
+                \@@_change_case_upper_el_loop:nnw {#1} {#2}
+              }
+              {
+                \bool_lazy_any:nTF
+                  {
+                    { \int_compare_p:nNn { `#3 } = { "0300 } }
+                    { \int_compare_p:nNn { `#3 } = { "0301 } }
+                    { \int_compare_p:nNn { `#3 } = { "0304 } }
+                    { \int_compare_p:nNn { `#3 } = { "0306 } }
+                    { \int_compare_p:nNn { `#3 } = { "0308 } }
+                    { \int_compare_p:nNn { `#3 } = { "0313 } }
+                    { \int_compare_p:nNn { `#3 } = { "0314 } }
+                    { \int_compare_p:nNn { `#3 } = { "0342 } }
+                    { \int_compare_p:nNn { `#3 } = { "0340 } }
+                    { \int_compare_p:nNn { `#3 } = { "0341 } }
+                    { \int_compare_p:nNn { `#3 } = { "0343 } }
+                  }
+                  { \@@_change_case_upper_el_loop:nnw {#1} {#2} }
+                  {
+                    \int_compare:nNnTF { `#3 } = { "0344 }
+                      {
+                        \char_generate:nn { "0308 }
+                          { \@@_char_catcode:N #3 }
+                        \@@_change_case_upper_el_loop:nnw {#1} {#2}
+                      }
+                      {
+                        \int_compare:nNnTF { `#3 } = { "0345 }
+                          { \@@_change_case_loop:nnw {#1} {#2} }
+                          { \@@_change_case_loop:nnw {#1} {#2} #3 }
+                      }
+                  }
+              }
+          }
+      }
+    \prg_new_conditional:Npnn \@@_change_case_if_greek:n #1 { TF }
+      {
+        \if_int_compare:w #1 < "0370 \exp_stop_f:
+          \prg_return_false:
+        \else:
+          \if_int_compare:w #1 > "03FF \exp_stop_f:
+            \if_int_compare:w #1 < "1F00 \exp_stop_f:
+              \prg_return_false:
+            \else:
+              \if_int_compare:w #1 > "1FFF \exp_stop_f:
+                \prg_return_false:
+              \else:
+                \prg_return_true:
+              \fi:
+            \fi:
+          \else:
+            \prg_return_true:
+          \fi:
+        \fi:
+      }
+  }
+%    \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \begin{macro}[rEXP]{\@@_change_case_title_el:nnnN}
+%   Titlecasing retains accents, but to prevent the uppercasing code
+%   from kicking in, there has to be an explicit function here.
+%    \begin{macrocode}
+\bool_lazy_or:nnT
+  { \sys_if_engine_luatex_p: }
+  { \sys_if_engine_xetex_p: }
+  {
+    \cs_new:Npn \@@_change_case_title_el:nnnN #1#2#3#4
+      { \@@_change_case_char:nnnN {#1} {#2} {#3} #4 }
+  }
+%    \end{macrocode}
+% \end{macro}
+%
 % \begin{macro}[rEXP]
 %   {
 %     \@@_change_cases_lower_lt:nnnN      ,
diff --git a/l3kernel/testfiles/m3text002.luatex.tlg b/l3kernel/testfiles/m3text002.luatex.tlg
index a33c00d04..d9ac41f25 100644
--- a/l3kernel/testfiles/m3text002.luatex.tlg
+++ b/l3kernel/testfiles/m3text002.luatex.tlg
@@ -78,7 +78,15 @@ FUSSBALL
 FUẞBALL
 ============================================================
 ============================================================
-TEST 10: Turkish/Azeri
+TEST 10: Greek
+============================================================
+ὈΔΥΣΣΕΎΣ
+ΟΔΥΣΣΕΥΣ
+Ὀδυσσεύς
+Ὀδυσσεύς
+============================================================
+============================================================
+TEST 11: Turkish/Azeri
 ============================================================
 ragıp hul^^fbsi ^^f6zdem
 ragip hul^^fbsi̇ ^^f6zdem
@@ -90,7 +98,7 @@ Ip hul^^fbsi ^^f6zdem
 Ip hul^^fbsi ^^f6zdem
 ============================================================
 ============================================================
-TEST 11: Lithuanian
+TEST 12: Lithuanian
 ============================================================
 i̇̀i̇́i̇̃i̇̀i̇́i̇̃j̇̀j̇́j̇̃į̇̀į̇́į̇̃
 ^^ec^^edĩìíĩj̀j́j̃į̀į́į̃
@@ -102,7 +110,7 @@ Ìi̇̀i̇̃i̇̀i̇́i̇̃j̇̀j̇́j̇̃į̇̀į̇́į̇̃
 İ̀i̇̀i̇̃i̇̀i̇́i̇̃j̇̀j̇́j̇̃į̇̀į̇́į̇̃
 ============================================================
 ============================================================
-TEST 12: Dutch
+TEST 13: Dutch
 ============================================================
 ijsselmeer
 ijsselmeer
@@ -118,13 +126,13 @@ Im
 Im
 ============================================================
 ============================================================
-TEST 13: Titlecase exceptions
+TEST 14: Titlecase exceptions
 ============================================================
 Ssoo
 Dž!
 ============================================================
 ============================================================
-TEST 14: Case changing braced arguments
+TEST 15: Case changing braced arguments
 ============================================================
 foo \emph {BAR} {baz}
 FOO \emph {BAR} {BAZ}
@@ -134,7 +142,7 @@ Foo \emph {BAR} {baz}
 \emph {BAR} {Baz}
 ============================================================
 ============================================================
-TEST 15: Expanding content
+TEST 16: Expanding content
 ============================================================
 some text hello
 SOME TEXT HELLO
@@ -156,7 +164,7 @@ Some text \cs_tmp:w
 \cs_tmp:w  Sometext
 ============================================================
 ============================================================
-TEST 16: Math-mode escape
+TEST 17: Math-mode escape
 ============================================================
 some text $y = mx + c$
 SOME TEXT $y = mx + c$
@@ -169,7 +177,7 @@ OPPS NOT CLOSE TOKEN IN $y = mx + c
 Opps not close token in $y = mx + c
 ============================================================
 ============================================================
-TEST 17: Nesting
+TEST 18: Nesting
 ============================================================
 HELLO
 hello
@@ -177,14 +185,14 @@ FUSSBALL
 ὈΔΥΣΣΕΎΣ
 ============================================================
 ============================================================
-TEST 18: Letter-like commands
+TEST 19: Letter-like commands
 ============================================================
 \aa \aa \J \ae \dh \ss \l \o 
 \AA \AA \J \AE \DH \SS \L \O 
 \AA \aa \J \ae \dh \ss \l \o 
 ============================================================
 ============================================================
-TEST 19: Accents
+TEST 20: Accents
 ============================================================
 \"{a}\u {e}\H {i}\v {o}\.{u}
 \"{A}\u {E}\H {I}\v {O}\.{U}
diff --git a/l3kernel/testfiles/m3text002.lvt b/l3kernel/testfiles/m3text002.lvt
index 601a36f53..db4f1974e 100644
--- a/l3kernel/testfiles/m3text002.lvt
+++ b/l3kernel/testfiles/m3text002.lvt
@@ -100,6 +100,14 @@
     \text_uppercase:nn { de-alt } { Fußball }
   }
 
+\TESTEXP { Greek }
+  {
+    \text_uppercase:n         { ὈΔΥΣΣΕΎΣ } \NEWLINE
+    \text_uppercase:nn { el } { ὈΔΥΣΣΕΎΣ } \NEWLINE
+    \text_titlecase:n         { ὈΔΥΣΣΕΎΣ } \NEWLINE
+    \text_titlecase:nn { el } { ὈΔΥΣΣΕΎΣ }
+  }
+
 \TESTEXP { Turkish/Azeri }
   {
     \text_lowercase:nn { tr } { RAGIP~HULÛSİ~ÖZDEM } \NEWLINE
diff --git a/l3kernel/testfiles/m3text002.tlg b/l3kernel/testfiles/m3text002.tlg
index 43a7743e0..51b7e5f0d 100644
--- a/l3kernel/testfiles/m3text002.tlg
+++ b/l3kernel/testfiles/m3text002.tlg
@@ -75,7 +75,15 @@ FUSSBALL
 FUSSBALL
 ============================================================
 ============================================================
-TEST 10: Turkish/Azeri
+TEST 10: Greek
+============================================================
+^^e1^^bd^^88^^ce^^94^^ce^^a5^^ce^^a3^^ce^^a3^^ce^^95^^ce^^8e^^ce^^a3
+^^e1^^bd^^88^^ce^^94^^ce^^a5^^ce^^a3^^ce^^a3^^ce^^95^^ce^^8e^^ce^^a3
+^^e1^^bd^^88^^ce^^94^^ce^^a5^^ce^^a3^^ce^^a3^^ce^^95^^ce^^8e^^ce^^a3
+^^e1^^bd^^88^^ce^^94^^ce^^a5^^ce^^a3^^ce^^a3^^ce^^95^^ce^^8e^^ce^^a3
+============================================================
+============================================================
+TEST 11: Turkish/Azeri
 ============================================================
 rag^^c4^^b1p hul^^c3^^bbsi ^^c3^^b6zdem
 ragip hul^^c3^^bbs^^c4^^b0 ^^c3^^b6zdem
@@ -87,7 +95,7 @@ Rag^^c4^^b1p hul^^c3^^bbsi ^^c3^^b6zdem
 ^^c4^^b1p hul^^c3^^bbsi ^^c3^^b6zdem
 ============================================================
 ============================================================
-TEST 11: Lithuanian
+TEST 12: Lithuanian
 ============================================================
 ^^c3^^ac^^c3^^ad^^c4^^a9
 ^^c3^^ac^^c3^^ad^^c4^^a9
@@ -95,7 +103,7 @@ TEST 11: Lithuanian
 ^^c3^^8c^^c3^^ad^^c4^^a9
 ============================================================
 ============================================================
-TEST 12: Dutch
+TEST 13: Dutch
 ============================================================
 ijsselmeer
 ijsselmeer
@@ -111,13 +119,13 @@ Im
 Im
 ============================================================
 ============================================================
-TEST 13: Titlecase exceptions
+TEST 14: Titlecase exceptions
 ============================================================
 Ssoo
 ^^c7^^85!
 ============================================================
 ============================================================
-TEST 14: Case changing braced arguments
+TEST 15: Case changing braced arguments
 ============================================================
 foo \emph {BAR} {baz}
 FOO \emph {BAR} {BAZ}
@@ -127,7 +135,7 @@ Foo \emph {BAR} {baz}
 \emph {BAR} {Baz}
 ============================================================
 ============================================================
-TEST 15: Expanding content
+TEST 16: Expanding content
 ============================================================
 some text hello
 SOME TEXT HELLO
@@ -149,7 +157,7 @@ Some text \cs_tmp:w
 \cs_tmp:w  Sometext
 ============================================================
 ============================================================
-TEST 16: Math-mode escape
+TEST 17: Math-mode escape
 ============================================================
 some text $y = mx + c$
 SOME TEXT $y = mx + c$
@@ -162,7 +170,7 @@ OPPS NOT CLOSE TOKEN IN $y = mx + c
 Opps not close token in $y = mx + c
 ============================================================
 ============================================================
-TEST 17: Nesting
+TEST 18: Nesting
 ============================================================
 HELLO
 hello
@@ -170,14 +178,14 @@ FUSSBALL
 ^^e1^^bd^^88^^ce^^94^^ce^^a5^^ce^^a3^^ce^^a3^^ce^^95^^ce^^8e^^ce^^a3
 ============================================================
 ============================================================
-TEST 18: Letter-like commands
+TEST 19: Letter-like commands
 ============================================================
 \aa \aa \J \ae \dh \ss \l \o 
 \AA \AA \J \AE \DH \SS \L \O 
 \AA \aa \J \ae \dh \ss \l \o 
 ============================================================
 ============================================================
-TEST 19: Accents
+TEST 20: Accents
 ============================================================
 \"{a}\u {e}\H {i}\v {o}\.{u}
 \"{A}\u {E}\H {I}\v {O}\.{U}
diff --git a/l3kernel/testfiles/m3text002.uptex.tlg b/l3kernel/testfiles/m3text002.uptex.tlg
index b6d02f566..99d9b4a90 100644
--- a/l3kernel/testfiles/m3text002.uptex.tlg
+++ b/l3kernel/testfiles/m3text002.uptex.tlg
@@ -78,7 +78,15 @@ FUSSBALL
 FUSSBALL
 ============================================================
 ============================================================
-TEST 10: Turkish/Azeri
+TEST 10: Greek
+============================================================
+ὈΔΥΣΣΕΎΣ
+ὈΔΥΣΣΕΎΣ
+ὈΔΥΣΣΕΎΣ
+ὈΔΥΣΣΕΎΣ
+============================================================
+============================================================
+TEST 11: Turkish/Azeri
 ============================================================
 ragıp hul^^fbsi ^^f6zdem
 ragip hul^^fbsİ ^^f6zdem
@@ -90,7 +98,7 @@ Ragıp hul^^fbsi ^^f6zdem
 ıp hul^^fbsi ^^f6zdem
 ============================================================
 ============================================================
-TEST 11: Lithuanian
+TEST 12: Lithuanian
 ============================================================
 ^^ec^^edĩ
 ^^ec^^edĩ
@@ -98,7 +106,7 @@ TEST 11: Lithuanian
 ^^cc^^edĩ
 ============================================================
 ============================================================
-TEST 12: Dutch
+TEST 13: Dutch
 ============================================================
 ijsselmeer
 ijsselmeer
@@ -114,13 +122,13 @@ Im
 Im
 ============================================================
 ============================================================
-TEST 13: Titlecase exceptions
+TEST 14: Titlecase exceptions
 ============================================================
 Ssoo
 Dž!
 ============================================================
 ============================================================
-TEST 14: Case changing braced arguments
+TEST 15: Case changing braced arguments
 ============================================================
 foo \emph {BAR} {baz}
 FOO \emph {BAR} {BAZ}
@@ -130,7 +138,7 @@ Foo \emph {BAR} {baz}
 \emph {BAR} {Baz}
 ============================================================
 ============================================================
-TEST 15: Expanding content
+TEST 16: Expanding content
 ============================================================
 some text hello
 SOME TEXT HELLO
@@ -152,7 +160,7 @@ Some text \cs_tmp:w
 \cs_tmp:w  Sometext
 ============================================================
 ============================================================
-TEST 16: Math-mode escape
+TEST 17: Math-mode escape
 ============================================================
 some text $y = mx + c$
 SOME TEXT $y = mx + c$
@@ -165,7 +173,7 @@ OPPS NOT CLOSE TOKEN IN $y = mx + c
 Opps not close token in $y = mx + c
 ============================================================
 ============================================================
-TEST 17: Nesting
+TEST 18: Nesting
 ============================================================
 HELLO
 hello
@@ -173,14 +181,14 @@ FUSSBALL
 ὈΔΥΣΣΕΎΣ
 ============================================================
 ============================================================
-TEST 18: Letter-like commands
+TEST 19: Letter-like commands
 ============================================================
 \aa \aa \J \ae \dh \ss \l \o 
 \AA \AA \J \AE \DH \SS \L \O 
 \AA \aa \J \ae \dh \ss \l \o 
 ============================================================
 ============================================================
-TEST 19: Accents
+TEST 20: Accents
 ============================================================
 \"{a}\u {e}\H {i}\v {o}\.{u}
 \"{A}\u {E}\H {I}\v {O}\.{U}
diff --git a/l3kernel/testfiles/m3text002.xetex.tlg b/l3kernel/testfiles/m3text002.xetex.tlg
index 0ca8e1600..f84a9adbf 100644
--- a/l3kernel/testfiles/m3text002.xetex.tlg
+++ b/l3kernel/testfiles/m3text002.xetex.tlg
@@ -75,7 +75,15 @@ FUSSBALL
 FUẞBALL
 ============================================================
 ============================================================
-TEST 10: Turkish/Azeri
+TEST 10: Greek
+============================================================
+ὈΔΥΣΣΕΎΣ
+ΟΔΥΣΣΕΥΣ
+Ὀδυσσεύς
+Ὀδυσσεύς
+============================================================
+============================================================
+TEST 11: Turkish/Azeri
 ============================================================
 ragıp hul^^fbsi ^^f6zdem
 ragip hul^^fbsi̇ ^^f6zdem
@@ -87,7 +95,7 @@ Ip hul^^fbsi ^^f6zdem
 Ip hul^^fbsi ^^f6zdem
 ============================================================
 ============================================================
-TEST 11: Lithuanian
+TEST 12: Lithuanian
 ============================================================
 i̇̀i̇́i̇̃i̇̀i̇́i̇̃j̇̀j̇́j̇̃į̇̀į̇́į̇̃
 ^^ec^^edĩìíĩj̀j́j̃į̀į́į̃
@@ -99,7 +107,7 @@ Ìi̇̀i̇̃i̇̀i̇́i̇̃j̇̀j̇́j̇̃į̇̀į̇́į̇̃
 İ̀i̇̀i̇̃i̇̀i̇́i̇̃j̇̀j̇́j̇̃į̇̀į̇́į̇̃
 ============================================================
 ============================================================
-TEST 12: Dutch
+TEST 13: Dutch
 ============================================================
 ijsselmeer
 ijsselmeer
@@ -115,13 +123,13 @@ Im
 Im
 ============================================================
 ============================================================
-TEST 13: Titlecase exceptions
+TEST 14: Titlecase exceptions
 ============================================================
 Ssoo
 Dž!
 ============================================================
 ============================================================
-TEST 14: Case changing braced arguments
+TEST 15: Case changing braced arguments
 ============================================================
 foo \emph {BAR} {baz}
 FOO \emph {BAR} {BAZ}
@@ -131,7 +139,7 @@ Foo \emph {BAR} {baz}
 \emph {BAR} {Baz}
 ============================================================
 ============================================================
-TEST 15: Expanding content
+TEST 16: Expanding content
 ============================================================
 some text hello
 SOME TEXT HELLO
@@ -153,7 +161,7 @@ Some text \cs_tmp:w
 \cs_tmp:w  Sometext
 ============================================================
 ============================================================
-TEST 16: Math-mode escape
+TEST 17: Math-mode escape
 ============================================================
 some text $y = mx + c$
 SOME TEXT $y = mx + c$
@@ -166,7 +174,7 @@ OPPS NOT CLOSE TOKEN IN $y = mx + c
 Opps not close token in $y = mx + c
 ============================================================
 ============================================================
-TEST 17: Nesting
+TEST 18: Nesting
 ============================================================
 HELLO
 hello
@@ -174,14 +182,14 @@ FUSSBALL
 ὈΔΥΣΣΕΎΣ
 ============================================================
 ============================================================
-TEST 18: Letter-like commands
+TEST 19: Letter-like commands
 ============================================================
 \aa \aa \J \ae \dh \ss \l \o 
 \AA \AA \J \AE \DH \SS \L \O 
 \AA \aa \J \ae \dh \ss \l \o 
 ============================================================
 ============================================================
-TEST 19: Accents
+TEST 20: Accents
 ============================================================
 \"{a}\u {e}\H {i}\v {o}\.{u}
 \"{A}\u {E}\H {I}\v {O}\.{U}





More information about the latex3-commits mailing list