[latex3-commits] [git/LaTeX3-latex3-latex3] l3text: Add case changing support for Greek (e523db139)
Joseph Wright
joseph.wright at morningstar2.co.uk
Tue Dec 3 09:32:10 CET 2019
Repository : https://github.com/latex3/latex3
On branch : l3text
Link : https://github.com/latex3/latex3/commit/e523db1392d3103c48e2fceb81354cf91bfbc90a
>---------------------------------------------------------------
commit e523db1392d3103c48e2fceb81354cf91bfbc90a
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date: Tue Dec 3 08:32:10 2019 +0000
Add case changing support for Greek
>---------------------------------------------------------------
e523db1392d3103c48e2fceb81354cf91bfbc90a
l3kernel/l3text.dtx | 129 ++++++++++++++++++++++++++++++++
l3kernel/testfiles/m3text002.luatex.tlg | 28 ++++---
l3kernel/testfiles/m3text002.lvt | 8 ++
l3kernel/testfiles/m3text002.tlg | 28 ++++---
l3kernel/testfiles/m3text002.uptex.tlg | 28 ++++---
l3kernel/testfiles/m3text002.xetex.tlg | 28 ++++---
6 files changed, 209 insertions(+), 40 deletions(-)
diff --git a/l3kernel/l3text.dtx b/l3kernel/l3text.dtx
index 33e0d81cc..94a2360b7 100644
--- a/l3kernel/l3text.dtx
+++ b/l3kernel/l3text.dtx
@@ -1342,6 +1342,135 @@
% \end{macrocode}
% \end{macro}
%
+% \begin{macro}[rEXP]{\@@_change_case_upper_el:nnnN}
+% \begin{macro}[rEXP]{\@@_change_case_upper_el:nnnn}
+% \begin{macro}[rEXP]{\@@_change_case_upper_el_aux:nnnN}
+% \begin{macro}[rEXP]{\@@_change_case_upper_el_loop:nnw}
+% \begin{macro}[rEXP]{\@@_change_case_upper_el:nnN}
+% \begin{macro}[EXP]{\@@_change_case_if_greek:nTF}
+% For Greek uppercasing, we need to know if characters \emph{in the Greek
+% range} have accents. That means doing a \textsc{nfd} conversion first, then
+% starting a search. As described by the Unicode \textsc{cldr}, Greek accents
+% need to be found \emph{after} any U+0308 (diaeresis) and are done in two
+% groups to allow for the canonical ordering.
+% \begin{macrocode}
+\bool_lazy_or:nnT
+ { \sys_if_engine_luatex_p: }
+ { \sys_if_engine_xetex_p: }
+ {
+ \cs_new:Npn \@@_change_case_upper_el:nnnN #1#2#3#4
+ {
+ \@@_change_case_if_greek:nTF { `#4 }
+ {
+ \exp_args:Ne \@@_change_case_upper_el:nnnn
+ { \char_to_nfd:N #4 } {#1} {#2} {#3}
+ }
+ { \@@_change_case_char:nnnN {#1} {#2} {#3} #4 }
+ }
+ \cs_new:Npn \@@_change_case_upper_el:nnnn #1#2#3#4
+ { \@@_change_case_upper_el_aux:nnnN {#2} {#3} {#4} #1 }
+ \cs_new:Npn \@@_change_case_upper_el_aux:nnnN #1#2#3#4
+ {
+ \use:c { char_ #1 case:N } #4
+ \@@_change_case_upper_el_loop:nnw {#2} {#3}
+ }
+ \cs_new:Npn \@@_change_case_upper_el_loop:nnw
+ #1#2#3 \q_recursion_stop
+ {
+ \tl_if_head_is_N_type:nTF {#3}
+ { \@@_change_case_upper_el:nnN }
+ { \@@_change_case_loop:nnw }
+ {#1} {#2} #3 \q_recursion_stop
+ }
+% \end{macrocode}
+% In addition to the Greek accents, we list three cases here where an
+% accent outside the Greek range has a \text{nfd} that would make it
+% equivalent. That includes U+0344, which has to insert U+0308.
+% \begin{macrocode}
+ \cs_new:Npn \@@_change_case_upper_el:nnN #1#2#3
+ {
+ \token_if_cs:NTF #3
+ { \@@_change_case_loop:nnw {#1} {#2} #3 }
+ {
+ \int_compare:nNnTF { `#3 } = { "0308 }
+ {
+ \exp_not:n {#3}
+ \@@_change_case_upper_el_loop:nnw {#1} {#2}
+ }
+ {
+ \bool_lazy_any:nTF
+ {
+ { \int_compare_p:nNn { `#3 } = { "0300 } }
+ { \int_compare_p:nNn { `#3 } = { "0301 } }
+ { \int_compare_p:nNn { `#3 } = { "0304 } }
+ { \int_compare_p:nNn { `#3 } = { "0306 } }
+ { \int_compare_p:nNn { `#3 } = { "0308 } }
+ { \int_compare_p:nNn { `#3 } = { "0313 } }
+ { \int_compare_p:nNn { `#3 } = { "0314 } }
+ { \int_compare_p:nNn { `#3 } = { "0342 } }
+ { \int_compare_p:nNn { `#3 } = { "0340 } }
+ { \int_compare_p:nNn { `#3 } = { "0341 } }
+ { \int_compare_p:nNn { `#3 } = { "0343 } }
+ }
+ { \@@_change_case_upper_el_loop:nnw {#1} {#2} }
+ {
+ \int_compare:nNnTF { `#3 } = { "0344 }
+ {
+ \char_generate:nn { "0308 }
+ { \@@_char_catcode:N #3 }
+ \@@_change_case_upper_el_loop:nnw {#1} {#2}
+ }
+ {
+ \int_compare:nNnTF { `#3 } = { "0345 }
+ { \@@_change_case_loop:nnw {#1} {#2} }
+ { \@@_change_case_loop:nnw {#1} {#2} #3 }
+ }
+ }
+ }
+ }
+ }
+ \prg_new_conditional:Npnn \@@_change_case_if_greek:n #1 { TF }
+ {
+ \if_int_compare:w #1 < "0370 \exp_stop_f:
+ \prg_return_false:
+ \else:
+ \if_int_compare:w #1 > "03FF \exp_stop_f:
+ \if_int_compare:w #1 < "1F00 \exp_stop_f:
+ \prg_return_false:
+ \else:
+ \if_int_compare:w #1 > "1FFF \exp_stop_f:
+ \prg_return_false:
+ \else:
+ \prg_return_true:
+ \fi:
+ \fi:
+ \else:
+ \prg_return_true:
+ \fi:
+ \fi:
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \begin{macro}[rEXP]{\@@_change_case_title_el:nnnN}
+% Titlecasing retains accents, but to prevent the uppercasing code
+% from kicking in, there has to be an explicit function here.
+% \begin{macrocode}
+\bool_lazy_or:nnT
+ { \sys_if_engine_luatex_p: }
+ { \sys_if_engine_xetex_p: }
+ {
+ \cs_new:Npn \@@_change_case_title_el:nnnN #1#2#3#4
+ { \@@_change_case_char:nnnN {#1} {#2} {#3} #4 }
+ }
+% \end{macrocode}
+% \end{macro}
+%
% \begin{macro}[rEXP]
% {
% \@@_change_cases_lower_lt:nnnN ,
diff --git a/l3kernel/testfiles/m3text002.luatex.tlg b/l3kernel/testfiles/m3text002.luatex.tlg
index a33c00d04..d9ac41f25 100644
--- a/l3kernel/testfiles/m3text002.luatex.tlg
+++ b/l3kernel/testfiles/m3text002.luatex.tlg
@@ -78,7 +78,15 @@ FUSSBALL
FUẞBALL
============================================================
============================================================
-TEST 10: Turkish/Azeri
+TEST 10: Greek
+============================================================
+ὈΔΥΣΣΕΎΣ
+ΟΔΥΣΣΕΥΣ
+Ὀδυσσεύς
+Ὀδυσσεύς
+============================================================
+============================================================
+TEST 11: Turkish/Azeri
============================================================
ragıp hul^^fbsi ^^f6zdem
ragip hul^^fbsi̇ ^^f6zdem
@@ -90,7 +98,7 @@ Ip hul^^fbsi ^^f6zdem
Ip hul^^fbsi ^^f6zdem
============================================================
============================================================
-TEST 11: Lithuanian
+TEST 12: Lithuanian
============================================================
i̇̀i̇́i̇̃i̇̀i̇́i̇̃j̇̀j̇́j̇̃į̇̀į̇́į̇̃
^^ec^^edĩìíĩj̀j́j̃į̀į́į̃
@@ -102,7 +110,7 @@ Ìi̇̀i̇̃i̇̀i̇́i̇̃j̇̀j̇́j̇̃į̇̀į̇́į̇̃
İ̀i̇̀i̇̃i̇̀i̇́i̇̃j̇̀j̇́j̇̃į̇̀į̇́į̇̃
============================================================
============================================================
-TEST 12: Dutch
+TEST 13: Dutch
============================================================
ijsselmeer
ijsselmeer
@@ -118,13 +126,13 @@ Im
Im
============================================================
============================================================
-TEST 13: Titlecase exceptions
+TEST 14: Titlecase exceptions
============================================================
Ssoo
Dž!
============================================================
============================================================
-TEST 14: Case changing braced arguments
+TEST 15: Case changing braced arguments
============================================================
foo \emph {BAR} {baz}
FOO \emph {BAR} {BAZ}
@@ -134,7 +142,7 @@ Foo \emph {BAR} {baz}
\emph {BAR} {Baz}
============================================================
============================================================
-TEST 15: Expanding content
+TEST 16: Expanding content
============================================================
some text hello
SOME TEXT HELLO
@@ -156,7 +164,7 @@ Some text \cs_tmp:w
\cs_tmp:w Sometext
============================================================
============================================================
-TEST 16: Math-mode escape
+TEST 17: Math-mode escape
============================================================
some text $y = mx + c$
SOME TEXT $y = mx + c$
@@ -169,7 +177,7 @@ OPPS NOT CLOSE TOKEN IN $y = mx + c
Opps not close token in $y = mx + c
============================================================
============================================================
-TEST 17: Nesting
+TEST 18: Nesting
============================================================
HELLO
hello
@@ -177,14 +185,14 @@ FUSSBALL
ὈΔΥΣΣΕΎΣ
============================================================
============================================================
-TEST 18: Letter-like commands
+TEST 19: Letter-like commands
============================================================
\aa \aa \J \ae \dh \ss \l \o
\AA \AA \J \AE \DH \SS \L \O
\AA \aa \J \ae \dh \ss \l \o
============================================================
============================================================
-TEST 19: Accents
+TEST 20: Accents
============================================================
\"{a}\u {e}\H {i}\v {o}\.{u}
\"{A}\u {E}\H {I}\v {O}\.{U}
diff --git a/l3kernel/testfiles/m3text002.lvt b/l3kernel/testfiles/m3text002.lvt
index 601a36f53..db4f1974e 100644
--- a/l3kernel/testfiles/m3text002.lvt
+++ b/l3kernel/testfiles/m3text002.lvt
@@ -100,6 +100,14 @@
\text_uppercase:nn { de-alt } { Fußball }
}
+\TESTEXP { Greek }
+ {
+ \text_uppercase:n { ὈΔΥΣΣΕΎΣ } \NEWLINE
+ \text_uppercase:nn { el } { ὈΔΥΣΣΕΎΣ } \NEWLINE
+ \text_titlecase:n { ὈΔΥΣΣΕΎΣ } \NEWLINE
+ \text_titlecase:nn { el } { ὈΔΥΣΣΕΎΣ }
+ }
+
\TESTEXP { Turkish/Azeri }
{
\text_lowercase:nn { tr } { RAGIP~HULÛSİ~ÖZDEM } \NEWLINE
diff --git a/l3kernel/testfiles/m3text002.tlg b/l3kernel/testfiles/m3text002.tlg
index 43a7743e0..51b7e5f0d 100644
--- a/l3kernel/testfiles/m3text002.tlg
+++ b/l3kernel/testfiles/m3text002.tlg
@@ -75,7 +75,15 @@ FUSSBALL
FUSSBALL
============================================================
============================================================
-TEST 10: Turkish/Azeri
+TEST 10: Greek
+============================================================
+^^e1^^bd^^88^^ce^^94^^ce^^a5^^ce^^a3^^ce^^a3^^ce^^95^^ce^^8e^^ce^^a3
+^^e1^^bd^^88^^ce^^94^^ce^^a5^^ce^^a3^^ce^^a3^^ce^^95^^ce^^8e^^ce^^a3
+^^e1^^bd^^88^^ce^^94^^ce^^a5^^ce^^a3^^ce^^a3^^ce^^95^^ce^^8e^^ce^^a3
+^^e1^^bd^^88^^ce^^94^^ce^^a5^^ce^^a3^^ce^^a3^^ce^^95^^ce^^8e^^ce^^a3
+============================================================
+============================================================
+TEST 11: Turkish/Azeri
============================================================
rag^^c4^^b1p hul^^c3^^bbsi ^^c3^^b6zdem
ragip hul^^c3^^bbs^^c4^^b0 ^^c3^^b6zdem
@@ -87,7 +95,7 @@ Rag^^c4^^b1p hul^^c3^^bbsi ^^c3^^b6zdem
^^c4^^b1p hul^^c3^^bbsi ^^c3^^b6zdem
============================================================
============================================================
-TEST 11: Lithuanian
+TEST 12: Lithuanian
============================================================
^^c3^^ac^^c3^^ad^^c4^^a9
^^c3^^ac^^c3^^ad^^c4^^a9
@@ -95,7 +103,7 @@ TEST 11: Lithuanian
^^c3^^8c^^c3^^ad^^c4^^a9
============================================================
============================================================
-TEST 12: Dutch
+TEST 13: Dutch
============================================================
ijsselmeer
ijsselmeer
@@ -111,13 +119,13 @@ Im
Im
============================================================
============================================================
-TEST 13: Titlecase exceptions
+TEST 14: Titlecase exceptions
============================================================
Ssoo
^^c7^^85!
============================================================
============================================================
-TEST 14: Case changing braced arguments
+TEST 15: Case changing braced arguments
============================================================
foo \emph {BAR} {baz}
FOO \emph {BAR} {BAZ}
@@ -127,7 +135,7 @@ Foo \emph {BAR} {baz}
\emph {BAR} {Baz}
============================================================
============================================================
-TEST 15: Expanding content
+TEST 16: Expanding content
============================================================
some text hello
SOME TEXT HELLO
@@ -149,7 +157,7 @@ Some text \cs_tmp:w
\cs_tmp:w Sometext
============================================================
============================================================
-TEST 16: Math-mode escape
+TEST 17: Math-mode escape
============================================================
some text $y = mx + c$
SOME TEXT $y = mx + c$
@@ -162,7 +170,7 @@ OPPS NOT CLOSE TOKEN IN $y = mx + c
Opps not close token in $y = mx + c
============================================================
============================================================
-TEST 17: Nesting
+TEST 18: Nesting
============================================================
HELLO
hello
@@ -170,14 +178,14 @@ FUSSBALL
^^e1^^bd^^88^^ce^^94^^ce^^a5^^ce^^a3^^ce^^a3^^ce^^95^^ce^^8e^^ce^^a3
============================================================
============================================================
-TEST 18: Letter-like commands
+TEST 19: Letter-like commands
============================================================
\aa \aa \J \ae \dh \ss \l \o
\AA \AA \J \AE \DH \SS \L \O
\AA \aa \J \ae \dh \ss \l \o
============================================================
============================================================
-TEST 19: Accents
+TEST 20: Accents
============================================================
\"{a}\u {e}\H {i}\v {o}\.{u}
\"{A}\u {E}\H {I}\v {O}\.{U}
diff --git a/l3kernel/testfiles/m3text002.uptex.tlg b/l3kernel/testfiles/m3text002.uptex.tlg
index b6d02f566..99d9b4a90 100644
--- a/l3kernel/testfiles/m3text002.uptex.tlg
+++ b/l3kernel/testfiles/m3text002.uptex.tlg
@@ -78,7 +78,15 @@ FUSSBALL
FUSSBALL
============================================================
============================================================
-TEST 10: Turkish/Azeri
+TEST 10: Greek
+============================================================
+ὈΔΥΣΣΕΎΣ
+ὈΔΥΣΣΕΎΣ
+ὈΔΥΣΣΕΎΣ
+ὈΔΥΣΣΕΎΣ
+============================================================
+============================================================
+TEST 11: Turkish/Azeri
============================================================
ragıp hul^^fbsi ^^f6zdem
ragip hul^^fbsİ ^^f6zdem
@@ -90,7 +98,7 @@ Ragıp hul^^fbsi ^^f6zdem
ıp hul^^fbsi ^^f6zdem
============================================================
============================================================
-TEST 11: Lithuanian
+TEST 12: Lithuanian
============================================================
^^ec^^edĩ
^^ec^^edĩ
@@ -98,7 +106,7 @@ TEST 11: Lithuanian
^^cc^^edĩ
============================================================
============================================================
-TEST 12: Dutch
+TEST 13: Dutch
============================================================
ijsselmeer
ijsselmeer
@@ -114,13 +122,13 @@ Im
Im
============================================================
============================================================
-TEST 13: Titlecase exceptions
+TEST 14: Titlecase exceptions
============================================================
Ssoo
Dž!
============================================================
============================================================
-TEST 14: Case changing braced arguments
+TEST 15: Case changing braced arguments
============================================================
foo \emph {BAR} {baz}
FOO \emph {BAR} {BAZ}
@@ -130,7 +138,7 @@ Foo \emph {BAR} {baz}
\emph {BAR} {Baz}
============================================================
============================================================
-TEST 15: Expanding content
+TEST 16: Expanding content
============================================================
some text hello
SOME TEXT HELLO
@@ -152,7 +160,7 @@ Some text \cs_tmp:w
\cs_tmp:w Sometext
============================================================
============================================================
-TEST 16: Math-mode escape
+TEST 17: Math-mode escape
============================================================
some text $y = mx + c$
SOME TEXT $y = mx + c$
@@ -165,7 +173,7 @@ OPPS NOT CLOSE TOKEN IN $y = mx + c
Opps not close token in $y = mx + c
============================================================
============================================================
-TEST 17: Nesting
+TEST 18: Nesting
============================================================
HELLO
hello
@@ -173,14 +181,14 @@ FUSSBALL
ὈΔΥΣΣΕΎΣ
============================================================
============================================================
-TEST 18: Letter-like commands
+TEST 19: Letter-like commands
============================================================
\aa \aa \J \ae \dh \ss \l \o
\AA \AA \J \AE \DH \SS \L \O
\AA \aa \J \ae \dh \ss \l \o
============================================================
============================================================
-TEST 19: Accents
+TEST 20: Accents
============================================================
\"{a}\u {e}\H {i}\v {o}\.{u}
\"{A}\u {E}\H {I}\v {O}\.{U}
diff --git a/l3kernel/testfiles/m3text002.xetex.tlg b/l3kernel/testfiles/m3text002.xetex.tlg
index 0ca8e1600..f84a9adbf 100644
--- a/l3kernel/testfiles/m3text002.xetex.tlg
+++ b/l3kernel/testfiles/m3text002.xetex.tlg
@@ -75,7 +75,15 @@ FUSSBALL
FUẞBALL
============================================================
============================================================
-TEST 10: Turkish/Azeri
+TEST 10: Greek
+============================================================
+ὈΔΥΣΣΕΎΣ
+ΟΔΥΣΣΕΥΣ
+Ὀδυσσεύς
+Ὀδυσσεύς
+============================================================
+============================================================
+TEST 11: Turkish/Azeri
============================================================
ragıp hul^^fbsi ^^f6zdem
ragip hul^^fbsi̇ ^^f6zdem
@@ -87,7 +95,7 @@ Ip hul^^fbsi ^^f6zdem
Ip hul^^fbsi ^^f6zdem
============================================================
============================================================
-TEST 11: Lithuanian
+TEST 12: Lithuanian
============================================================
i̇̀i̇́i̇̃i̇̀i̇́i̇̃j̇̀j̇́j̇̃į̇̀į̇́į̇̃
^^ec^^edĩìíĩj̀j́j̃į̀į́į̃
@@ -99,7 +107,7 @@ Ìi̇̀i̇̃i̇̀i̇́i̇̃j̇̀j̇́j̇̃į̇̀į̇́į̇̃
İ̀i̇̀i̇̃i̇̀i̇́i̇̃j̇̀j̇́j̇̃į̇̀į̇́į̇̃
============================================================
============================================================
-TEST 12: Dutch
+TEST 13: Dutch
============================================================
ijsselmeer
ijsselmeer
@@ -115,13 +123,13 @@ Im
Im
============================================================
============================================================
-TEST 13: Titlecase exceptions
+TEST 14: Titlecase exceptions
============================================================
Ssoo
Dž!
============================================================
============================================================
-TEST 14: Case changing braced arguments
+TEST 15: Case changing braced arguments
============================================================
foo \emph {BAR} {baz}
FOO \emph {BAR} {BAZ}
@@ -131,7 +139,7 @@ Foo \emph {BAR} {baz}
\emph {BAR} {Baz}
============================================================
============================================================
-TEST 15: Expanding content
+TEST 16: Expanding content
============================================================
some text hello
SOME TEXT HELLO
@@ -153,7 +161,7 @@ Some text \cs_tmp:w
\cs_tmp:w Sometext
============================================================
============================================================
-TEST 16: Math-mode escape
+TEST 17: Math-mode escape
============================================================
some text $y = mx + c$
SOME TEXT $y = mx + c$
@@ -166,7 +174,7 @@ OPPS NOT CLOSE TOKEN IN $y = mx + c
Opps not close token in $y = mx + c
============================================================
============================================================
-TEST 17: Nesting
+TEST 18: Nesting
============================================================
HELLO
hello
@@ -174,14 +182,14 @@ FUSSBALL
ὈΔΥΣΣΕΎΣ
============================================================
============================================================
-TEST 18: Letter-like commands
+TEST 19: Letter-like commands
============================================================
\aa \aa \J \ae \dh \ss \l \o
\AA \AA \J \AE \DH \SS \L \O
\AA \aa \J \ae \dh \ss \l \o
============================================================
============================================================
-TEST 19: Accents
+TEST 20: Accents
============================================================
\"{a}\u {e}\H {i}\v {o}\.{u}
\"{A}\u {E}\H {I}\v {O}\.{U}
More information about the latex3-commits
mailing list