[latex3-commits] [latex3/latex3] gh984-titlecase-char-class: Detect letters for titlecasing based on Unicode general category (3787c92f0)
github at latex-project.org
github at latex-project.org
Sun Oct 15 20:31:56 CEST 2023
Repository : https://github.com/latex3/latex3
On branch : gh984-titlecase-char-class
Link : https://github.com/latex3/latex3/commit/3787c92f01e59109b34fcc84d4359634172a8d72
>---------------------------------------------------------------
commit 3787c92f01e59109b34fcc84d4359634172a8d72
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date: Sun Oct 15 19:31:52 2023 +0100
Detect letters for titlecasing based on Unicode general category
Fixes #984
>---------------------------------------------------------------
3787c92f01e59109b34fcc84d4359634172a8d72
l3kernel/CHANGELOG.md | 1 +
l3kernel/l3text-case.dtx | 41 ++++++++++++++++++---------------
l3kernel/l3text.dtx | 11 ++++-----
l3kernel/testfiles/m3text002.luatex.tlg | 7 ++++++
l3kernel/testfiles/m3text002.lvt | 7 ++++++
l3kernel/testfiles/m3text002.ptex.tlg | 11 +++++++--
l3kernel/testfiles/m3text002.tlg | 7 ++++++
l3kernel/testfiles/m3text002.uptex.tlg | 17 ++++++++++----
l3kernel/testfiles/m3text002.xetex.tlg | 7 ++++++
l3kernel/testfiles/m3text005.ptex.tlg | 4 ++--
l3kernel/testfiles/m3text005.uptex.tlg | 4 ++--
11 files changed, 81 insertions(+), 36 deletions(-)
diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index 62407ff15..e83c49d89 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -13,6 +13,7 @@ this project uses date-based 'snapshot' version identifiers.
### Changed
- Clarify action of `\text_titlecase_first:n(n)`
+- Detect letters for titlecasing based on Unicode general category
### Deprecated
- `\text_titlecase:n(n)` as ambiguous: replaced by `\text_titlecase_all:n(n)`
diff --git a/l3kernel/l3text-case.dtx b/l3kernel/l3text-case.dtx
index e99be423a..4813de17a 100644
--- a/l3kernel/l3text-case.dtx
+++ b/l3kernel/l3text-case.dtx
@@ -192,8 +192,12 @@
% \begin{macro}[EXP]{\@@_change_case_lower_sigma:nnnnn}
% \begin{macro}[EXP]{\@@_change_case_lower_sigma:nnnnw}
% \begin{macro}[EXP]{\@@_change_case_lower_sigma:nnnnN}
+% \begin{macro}[EXP]
+% {
+% \@@_change_case_codepoint_title_auxi:nnnn ,
+% \@@_change_case_codepoint_title_auxii:nnnn
+% }
% \begin{macro}[EXP]{\@@_change_case_codepoint_title:nnn}
-% \begin{macro}[EXP]{\@@_change_case_codepoint_title:nnnnn}
% \begin{macro}[EXP]{\@@_change_case_codepoint:nnnnn}
% \begin{macro}[EXP]{\@@_change_case_codepoint:nn}
% \begin{macro}[EXP]
@@ -692,40 +696,39 @@
\@@_change_case_loop:nnnw {#2} {#3} {#4} #5
}
% \end{macrocode}
-% For titlecasing, we need to fully expand the new character to see if it
-% is a letter (or active).
+% For titlecasing, we need to fully expand the new codepoint to see if it
+% is a letter.
% \begin{macrocode}
\cs_new:Npn \@@_change_case_codepoint_title:nnnn #1#2#3#4
{
\bool_if:NTF \l_text_titlecase_check_letter_bool
{
- \tl_if_single:nTF {#4}
+ \exp_args:Ne \@@_change_case_codepoint_title_auxi:nnnn
{
- \bool_lazy_or:nnTF
- { \token_if_letter_p:N #4 }
- {
- \bool_lazy_and_p:nn
- { \token_if_active_p:N #4 }
- { ! \int_compare_p:nNn {`#4} < { "80 } }
- }
- { \@@_change_case_codepoint_title:nnn }
- { \@@_change_case_codepoint_title:nnnnn { title } {#1} }
+ \codepoint_to_category:n
+ { \@@_codepoint_from_chars:Nw #4 }
}
- { \@@_change_case_codepoint_title:nnn }
}
{ \@@_change_case_codepoint_title:nnn }
{#2} {#3} {#4}
}
+\cs_new:Npn \@@_change_case_codepoint_title_auxi:nnnn #1#2#3#4
+ {
+ \tl_if_head_eq_charcode:nNTF {#1} { L }
+ { \@@_change_case_codepoint_title:nnn }
+ { \@@_change_case_codepoint_title_auxii:nnnn { title } }
+ {#2} {#3} {#4}
+ }
\cs_new:Npn \@@_change_case_codepoint_title:nnn #1#2#3
- { \@@_change_case_codepoint_title:nnnnn { title } { end } {#1} {#2} {#3} }
-\cs_new:Npn \@@_change_case_codepoint_title:nnnnn #1#2#3#4#5
+ { \@@_change_case_codepoint_title_auxii:nnnn { end } {#1} {#2} {#3} }
+\cs_new:Npn \@@_change_case_codepoint_title_auxii:nnnn #1#2#3#4
{
- \cs_if_exist_use:cF { @@_change_case_title_ #4 :nnnnn }
+ \cs_if_exist_use:cF { @@_change_case_title_ #3 :nnnnn }
{
- \cs_if_exist_use:cF { @@_change_case_upper_ #4 :nnnnn }
+ \cs_if_exist_use:cF { @@_change_case_upper_ #3 :nnnnn }
{ \@@_change_case_codepoint:nnnnn }
}
- {#1} {#2} {#3} {#4} {#5}
+ { title } {#1} {#2} {#3} {#4}
}
\cs_new:Npn \@@_change_case_codepoint:nnnnn #1#2#3#4#5
{
diff --git a/l3kernel/l3text.dtx b/l3kernel/l3text.dtx
index 0fe4ed9ce..d969783d7 100644
--- a/l3kernel/l3text.dtx
+++ b/l3kernel/l3text.dtx
@@ -188,13 +188,12 @@
% \end{itemize}
%
% Determining whether non-letter characters at the start of text should count
-% as the uppercase element is controllable. When \cs{l_text_titlecase_check_letter_bool} is
-% \texttt{true}, characters which are not letters (category code~$11$) are
+% as the uppercase element is controllable. When
+% \cs{l_text_titlecase_check_letter_bool} is \texttt{true}, codepoints which are
+% not letters (Unicode general category \texttt{L}) are
% \enquote{skipped}: the first \emph{letter} is uppercased.
-% (With $8$-bit engines, this is extended to active characters which form
-% part of a multi-byte letter codepoint.) When
-% \cs{l_text_titlecase_check_letter_bool} is \texttt{false}, the first
-% character is uppercased, irrespective of the category code of the character.
+% When \cs{l_text_titlecase_check_letter_bool} is \texttt{false}, the first
+% codepoint is uppercased, irrespective of the general code of the character.
%
% \begin{function}[added = 2022-07-04]
% {
diff --git a/l3kernel/testfiles/m3text002.luatex.tlg b/l3kernel/testfiles/m3text002.luatex.tlg
index bcb7da80f..c578e8961 100644
--- a/l3kernel/testfiles/m3text002.luatex.tlg
+++ b/l3kernel/testfiles/m3text002.luatex.tlg
@@ -420,3 +420,10 @@ Defining \l__text_uppercase_special_la-x-new_tl on line ...
<recently read> }
l. ... }
============================================================
+============================================================
+TEST 34: Titlecase catcode handling
+============================================================
+> Abc.
+<recently read> }
+l. ... }
+============================================================
diff --git a/l3kernel/testfiles/m3text002.lvt b/l3kernel/testfiles/m3text002.lvt
index 408136bce..e394877f8 100644
--- a/l3kernel/testfiles/m3text002.lvt
+++ b/l3kernel/testfiles/m3text002.lvt
@@ -428,4 +428,11 @@
}
}
+\TEST { Titlecase~catcode~handling }
+ {
+ \str_set:Nn \l_tmpa_str { abc }
+ \tl_show:e
+ { \text_titlecase_all:n { \l_tmpa_str } }
+ }
+
\END
diff --git a/l3kernel/testfiles/m3text002.ptex.tlg b/l3kernel/testfiles/m3text002.ptex.tlg
index 67172fd5c..cfa868693 100644
--- a/l3kernel/testfiles/m3text002.ptex.tlg
+++ b/l3kernel/testfiles/m3text002.ptex.tlg
@@ -132,8 +132,8 @@ TEST 13: Cyrillic
============================================================
Доклады Акад^^ea^^9f^^97мии наук
^^ea^^9e^^a4окл^^ea^^9f^^90ды ^^ea^^9e^^a0к^^ea^^9f^^90демии н^^ea^^9f^^90ук
-^^ea^^9e^^a4окл^^ea^^9f^^90ды ^^ea^^9e^^a0к^^ea^^9f^^90демии н^^ea^^9f^^90ук
-^^ea^^9e^^a4окл^^ea^^9f^^90ды Академии наук
+^^ea^^9e^^a4оклады ^^ea^^9e^^a0кадемии н^^ea^^9f^^90ук
+^^ea^^9e^^a4оклады Академии наук
============================================================
============================================================
TEST 14: BCP47 parts
@@ -419,3 +419,10 @@ Defining \l__text_uppercase_special_la-x-new_tl on line ...
<recently read> }
l. ... }
============================================================
+============================================================
+TEST 34: Titlecase catcode handling
+============================================================
+> Abc.
+<recently read> }
+l. ... }
+============================================================
diff --git a/l3kernel/testfiles/m3text002.tlg b/l3kernel/testfiles/m3text002.tlg
index 9fc858202..a6d229a65 100644
--- a/l3kernel/testfiles/m3text002.tlg
+++ b/l3kernel/testfiles/m3text002.tlg
@@ -419,3 +419,10 @@ Defining \l__text_uppercase_special_la-x-new_tl on line ...
<recently read> }
l. ... }
============================================================
+============================================================
+TEST 34: Titlecase catcode handling
+============================================================
+> Abc.
+<recently read> }
+l. ... }
+============================================================
diff --git a/l3kernel/testfiles/m3text002.uptex.tlg b/l3kernel/testfiles/m3text002.uptex.tlg
index fcade24d7..d0343a1ee 100644
--- a/l3kernel/testfiles/m3text002.uptex.tlg
+++ b/l3kernel/testfiles/m3text002.uptex.tlg
@@ -132,8 +132,8 @@ TEST 13: Cyrillic
============================================================
^^d0^^b4оклады ^^d0^^b0кадемии наук
Д^^d0^^9e^^d0^^9a^^d0^^9b^^d0^^90^^d0^^94^^d0^^ab А^^d0^^9a^^d0^^90^^d0^^94^^d0^^95^^d0^^9c^^d0^^98^^d0^^98 ^^d0^^9d^^d0^^90^^d0^^a3^^d0^^9a
-Д^^d0^^9e^^d0^^9a^^d0^^9b^^d0^^90^^d0^^94^^d0^^ab А^^d0^^9a^^d0^^90^^d0^^94^^d0^^95^^d0^^9c^^d0^^98^^d0^^98 ^^d0^^9d^^d0^^90^^d0^^a3^^d0^^9a
-Д^^d0^^9e^^d0^^9a^^d0^^9b^^d0^^90^^d0^^94^^d0^^ab Академии наук
+Доклады Академии ^^d0^^9dаук
+Доклады Академии наук
============================================================
============================================================
TEST 14: BCP47 parts
@@ -146,9 +146,9 @@ TEST 15: Armenian
Ե^^d5^^90^^d4^^b5^^d5^^92^^d4^^b1^^d5^^86
Ե^^d5^^90^^d4^^b5^^d5^^8e^^d4^^b1^^d5^^86
Ե^^d5^^90^^d4^^b5^^d5^^92^^d4^^b1^^d5^^86
-Ե^^d5^^90^^d4^^b5^^d6^^82^^d4^^b1^^d5^^86
-^^d4^^b5^^d5^^be^^d4^^b1^^d5^^86
-^^d4^^b5^^d6^^82^^d4^^b1^^d5^^86
+Երևան
+^^d4^^b5^^d5^^beան
+^^d4^^b5^^d6^^82ան
============================================================
============================================================
TEST 16: German-alternative
@@ -419,3 +419,10 @@ Defining \l__text_uppercase_special_la-x-new_tl on line ...
<recently read> }
l. ... }
============================================================
+============================================================
+TEST 34: Titlecase catcode handling
+============================================================
+> Abc.
+<recently read> }
+l. ... }
+============================================================
diff --git a/l3kernel/testfiles/m3text002.xetex.tlg b/l3kernel/testfiles/m3text002.xetex.tlg
index bcb7da80f..c578e8961 100644
--- a/l3kernel/testfiles/m3text002.xetex.tlg
+++ b/l3kernel/testfiles/m3text002.xetex.tlg
@@ -420,3 +420,10 @@ Defining \l__text_uppercase_special_la-x-new_tl on line ...
<recently read> }
l. ... }
============================================================
+============================================================
+TEST 34: Titlecase catcode handling
+============================================================
+> Abc.
+<recently read> }
+l. ... }
+============================================================
diff --git a/l3kernel/testfiles/m3text005.ptex.tlg b/l3kernel/testfiles/m3text005.ptex.tlg
index 118c7f8c4..a40abae9a 100644
--- a/l3kernel/testfiles/m3text005.ptex.tlg
+++ b/l3kernel/testfiles/m3text005.ptex.tlg
@@ -10,6 +10,6 @@ TEST 1: \@uclclist\ entries
^^ea^^9e^^a6\CYRYO
ё\cyryo
^^ea^^9f^^96\CYRYO
-^^ea^^9f^^96\CYRYO
-^^ea^^9f^^96\CYRYO
+^^ea^^9f^^96\cyryo
+^^ea^^9f^^96\cyryo
============================================================
diff --git a/l3kernel/testfiles/m3text005.uptex.tlg b/l3kernel/testfiles/m3text005.uptex.tlg
index 427380824..ddbaf4c8a 100644
--- a/l3kernel/testfiles/m3text005.uptex.tlg
+++ b/l3kernel/testfiles/m3text005.uptex.tlg
@@ -10,6 +10,6 @@ TEST 1: \@uclclist\ entries
Ё\CYRYO
ё\cyryo
^^d0^^81\CYRYO
-^^d0^^81\CYRYO
-^^d0^^81\CYRYO
+^^d0^^81\cyryo
+^^d0^^81\cyryo
============================================================
More information about the latex3-commits
mailing list.