[latex3-commits] [latex3/latex3] main: Detect letters for titlecasing based on Unicode general category (#1280) (9ffa1890a)

github at latex-project.org github at latex-project.org
Wed Oct 25 08:32:30 CEST 2023


Repository : https://github.com/latex3/latex3
On branch  : main
Link       : https://github.com/latex3/latex3/commit/9ffa1890a187212c1f78f5d6c2ea59ecc33323af

>---------------------------------------------------------------

commit 9ffa1890a187212c1f78f5d6c2ea59ecc33323af
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Wed Oct 25 07:32:30 2023 +0100

    Detect letters for titlecasing based on Unicode general category (#1280)


>---------------------------------------------------------------

9ffa1890a187212c1f78f5d6c2ea59ecc33323af
 l3kernel/CHANGELOG.md                   |  6 +++--
 l3kernel/l3text-case.dtx                | 41 ++++++++++++++++++---------------
 l3kernel/l3text.dtx                     | 13 +++++------
 l3kernel/testfiles/m3text002.luatex.tlg |  7 ++++++
 l3kernel/testfiles/m3text002.lvt        |  7 ++++++
 l3kernel/testfiles/m3text002.ptex.tlg   | 11 +++++++--
 l3kernel/testfiles/m3text002.tlg        |  7 ++++++
 l3kernel/testfiles/m3text002.uptex.tlg  | 17 ++++++++++----
 l3kernel/testfiles/m3text002.xetex.tlg  |  7 ++++++
 l3kernel/testfiles/m3text005.ptex.tlg   |  4 ++--
 l3kernel/testfiles/m3text005.uptex.tlg  |  4 ++--
 11 files changed, 85 insertions(+), 39 deletions(-)

diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index 75d86c20d..744e32ba7 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -12,6 +12,8 @@ this project uses date-based 'snapshot' version identifiers.
 
 ### Changed
 - Documentation improvements
+- Refine action of `\text_titlecase_first:n(n)` to be focussed strictly on
+  first (relevant) codepoint in the input
 
 ## [2023-10-23]
 
@@ -24,8 +26,8 @@ this project uses date-based 'snapshot' version identifiers.
   `\fp_new_function:n`, `\fp_set_function:nnn` and `\fp_clear_function:n`
 
 ### Changed
-- Refine action of `\text_titlecase_first:n(n)` to be focussed strictly on
-  first (relevant) codepoint in the input
+- Clarify action of `\text_titlecase_first:n(n)`
+- Detect letters for titlecasing based on Unicode general category
 
 ### Deprecated
 - `\text_titlecase:n(n)` as ambiguous: replaced by `\text_titlecase_all:n(n)`
diff --git a/l3kernel/l3text-case.dtx b/l3kernel/l3text-case.dtx
index e34e9043a..5c44c7bf7 100644
--- a/l3kernel/l3text-case.dtx
+++ b/l3kernel/l3text-case.dtx
@@ -192,8 +192,12 @@
 % \begin{macro}[EXP]{\@@_change_case_lower_sigma:nnnnn}
 % \begin{macro}[EXP]{\@@_change_case_lower_sigma:nnnnw}
 % \begin{macro}[EXP]{\@@_change_case_lower_sigma:nnnnN}
+% \begin{macro}[EXP]
+%   {
+%     \@@_change_case_codepoint_title_auxi:nnnn ,
+%     \@@_change_case_codepoint_title_auxii:nnnn
+%   }
 % \begin{macro}[EXP]{\@@_change_case_codepoint_title:nnn}
-% \begin{macro}[EXP]{\@@_change_case_codepoint_title:nnnnn}
 % \begin{macro}[EXP]{\@@_change_case_codepoint:nnnnn}
 % \begin{macro}[EXP]{\@@_change_case_codepoint:nn}
 % \begin{macro}[EXP]
@@ -695,40 +699,39 @@
     \@@_change_case_loop:nnnw {#2} {#3} {#4} #5
   }
 %    \end{macrocode}
-%   For titlecasing, we need to fully expand the new character to see if it
-%   is a letter (or active).
+%   For titlecasing, we need to obtain the general category of the current
+%   codepoint.
 %    \begin{macrocode}
 \cs_new:Npn \@@_change_case_codepoint_title:nnnn #1#2#3#4
   {
     \bool_if:NTF \l_text_titlecase_check_letter_bool
       {
-        \tl_if_single:nTF {#4}
+        \exp_args:Ne \@@_change_case_codepoint_title_auxi:nnnn
           {
-            \bool_lazy_or:nnTF
-              { \token_if_letter_p:N #4 }
-              {
-                \bool_lazy_and_p:nn
-                  { \token_if_active_p:N #4 }
-                  { ! \int_compare_p:nNn {`#4} < { "80 } }
-              }
-              { \@@_change_case_codepoint_title:nnn }
-              { \@@_change_case_codepoint_title:nnnnn { title } {#1} }
+            \codepoint_to_category:n
+              { \@@_codepoint_from_chars:Nw #4 }
           }
-          { \@@_change_case_codepoint_title:nnn }
       }
       { \@@_change_case_codepoint_title:nnn }
         {#2} {#3} {#4}
   }
+\cs_new:Npn \@@_change_case_codepoint_title_auxi:nnnn #1#2#3#4
+  {
+    \tl_if_head_eq_charcode:nNTF {#1} { L }
+      { \@@_change_case_codepoint_title:nnn }
+      { \@@_change_case_codepoint_title_auxii:nnnn { title } }
+        {#2} {#3} {#4}
+  }
 \cs_new:Npn \@@_change_case_codepoint_title:nnn #1#2#3
-  { \@@_change_case_codepoint_title:nnnnn { title } { end } {#1} {#2} {#3} }
-\cs_new:Npn \@@_change_case_codepoint_title:nnnnn #1#2#3#4#5
+  { \@@_change_case_codepoint_title_auxii:nnnn { end } {#1} {#2} {#3} }
+\cs_new:Npn \@@_change_case_codepoint_title_auxii:nnnn #1#2#3#4
   {
-    \cs_if_exist_use:cF { @@_change_case_title_ #4 :nnnnn }
+    \cs_if_exist_use:cF { @@_change_case_title_ #3 :nnnnn }
       {
-        \cs_if_exist_use:cF { @@_change_case_upper_ #4 :nnnnn }
+        \cs_if_exist_use:cF { @@_change_case_upper_ #3 :nnnnn }
           { \@@_change_case_codepoint:nnnnn }
       }
-        {#1} {#2} {#3} {#4} {#5}
+        { title } {#1} {#2} {#3} {#4}
   }
 \cs_new:Npn \@@_change_case_codepoint:nnnnn #1#2#3#4#5
   {
diff --git a/l3kernel/l3text.dtx b/l3kernel/l3text.dtx
index ea33b5885..9706f505d 100644
--- a/l3kernel/l3text.dtx
+++ b/l3kernel/l3text.dtx
@@ -188,13 +188,12 @@
 %   \end{itemize}
 %
 %  Determining whether non-letter characters at the start of text should count
-%  as the uppercase element is controllable. When \cs{l_text_titlecase_check_letter_bool} is
-%  \texttt{true}, characters which are not letters (category code~$11$) are
-%  \enquote{skipped}: the first \emph{letter} is uppercased.
-%  (With $8$-bit engines, this is extended to active characters which form
-%  part of a multi-byte letter codepoint.) When
-%  \cs{l_text_titlecase_check_letter_bool} is \texttt{false}, the first
-%  character is uppercased, irrespective of the category code of the character.
+%  as the uppercase element is controllable. When
+%  \cs{l_text_titlecase_check_letter_bool} is \texttt{true}, codepoints which are
+%  not letters (Unicode general category \texttt{L}) are not changed, and only
+%  the first \emph{letter} is uppercased.
+%  When \cs{l_text_titlecase_check_letter_bool} is \texttt{false}, the first
+%  codepoint is uppercased, irrespective of the general code of the character.
 %
 % \begin{function}[added = 2022-07-04]
 %   {
diff --git a/l3kernel/testfiles/m3text002.luatex.tlg b/l3kernel/testfiles/m3text002.luatex.tlg
index 835f6d2f7..0ba163892 100644
--- a/l3kernel/testfiles/m3text002.luatex.tlg
+++ b/l3kernel/testfiles/m3text002.luatex.tlg
@@ -420,3 +420,10 @@ Defining \l__text_uppercase_special_la-x-new_tl on line ...
 <recently read> }
 l. ...  }
 ============================================================
+============================================================
+TEST 34: Titlecase catcode handling
+============================================================
+> Abc.
+<recently read> }
+l. ...  }
+============================================================
diff --git a/l3kernel/testfiles/m3text002.lvt b/l3kernel/testfiles/m3text002.lvt
index 408136bce..e394877f8 100644
--- a/l3kernel/testfiles/m3text002.lvt
+++ b/l3kernel/testfiles/m3text002.lvt
@@ -428,4 +428,11 @@
       }
   }
 
+\TEST { Titlecase~catcode~handling }
+  {
+    \str_set:Nn \l_tmpa_str { abc }
+    \tl_show:e 
+      { \text_titlecase_all:n { \l_tmpa_str } }
+  }
+
 \END
diff --git a/l3kernel/testfiles/m3text002.ptex.tlg b/l3kernel/testfiles/m3text002.ptex.tlg
index 9ec8f2af1..a1caa2074 100644
--- a/l3kernel/testfiles/m3text002.ptex.tlg
+++ b/l3kernel/testfiles/m3text002.ptex.tlg
@@ -132,8 +132,8 @@ TEST 13: Cyrillic
 ============================================================
 Доклады Акад^^ea^^9f^^97мии наук
 ^^ea^^9e^^a4окл^^ea^^9f^^90ды ^^ea^^9e^^a0к^^ea^^9f^^90демии н^^ea^^9f^^90ук
-^^ea^^9e^^a4окл^^ea^^9f^^90ды ^^ea^^9e^^a0к^^ea^^9f^^90демии н^^ea^^9f^^90ук
-^^ea^^9e^^a4окл^^ea^^9f^^90ды Академии наук
+^^ea^^9e^^a4оклады ^^ea^^9e^^a0кадемии н^^ea^^9f^^90ук
+^^ea^^9e^^a4оклады Академии наук
 ============================================================
 ============================================================
 TEST 14: BCP47 parts
@@ -419,3 +419,10 @@ Defining \l__text_uppercase_special_la-x-new_tl on line ...
 <recently read> }
 l. ...  }
 ============================================================
+============================================================
+TEST 34: Titlecase catcode handling
+============================================================
+> Abc.
+<recently read> }
+l. ...  }
+============================================================
diff --git a/l3kernel/testfiles/m3text002.tlg b/l3kernel/testfiles/m3text002.tlg
index c62a05ce6..0b7b090e5 100644
--- a/l3kernel/testfiles/m3text002.tlg
+++ b/l3kernel/testfiles/m3text002.tlg
@@ -419,3 +419,10 @@ Defining \l__text_uppercase_special_la-x-new_tl on line ...
 <recently read> }
 l. ...  }
 ============================================================
+============================================================
+TEST 34: Titlecase catcode handling
+============================================================
+> Abc.
+<recently read> }
+l. ...  }
+============================================================
diff --git a/l3kernel/testfiles/m3text002.uptex.tlg b/l3kernel/testfiles/m3text002.uptex.tlg
index 0b57bf702..454cfff8f 100644
--- a/l3kernel/testfiles/m3text002.uptex.tlg
+++ b/l3kernel/testfiles/m3text002.uptex.tlg
@@ -132,8 +132,8 @@ TEST 13: Cyrillic
 ============================================================
 ^^d0^^b4оклады ^^d0^^b0кадемии наук
 Д^^d0^^9e^^d0^^9a^^d0^^9b^^d0^^90^^d0^^94^^d0^^ab А^^d0^^9a^^d0^^90^^d0^^94^^d0^^95^^d0^^9c^^d0^^98^^d0^^98 ^^d0^^9d^^d0^^90^^d0^^a3^^d0^^9a
-Д^^d0^^9e^^d0^^9a^^d0^^9b^^d0^^90^^d0^^94^^d0^^ab А^^d0^^9a^^d0^^90^^d0^^94^^d0^^95^^d0^^9c^^d0^^98^^d0^^98 ^^d0^^9d^^d0^^90^^d0^^a3^^d0^^9a
-Д^^d0^^9e^^d0^^9a^^d0^^9b^^d0^^90^^d0^^94^^d0^^ab Академии наук
+Доклады Академии ^^d0^^9dаук
+Доклады Академии наук
 ============================================================
 ============================================================
 TEST 14: BCP47 parts
@@ -146,9 +146,9 @@ TEST 15: Armenian
 Ե^^d5^^90^^d4^^b5^^d5^^92^^d4^^b1^^d5^^86
 Ե^^d5^^90^^d4^^b5^^d5^^8e^^d4^^b1^^d5^^86
 Ե^^d5^^90^^d4^^b5^^d5^^92^^d4^^b1^^d5^^86
-Ե^^d5^^90^^d4^^b5^^d6^^82^^d4^^b1^^d5^^86
-^^d4^^b5^^d5^^be^^d4^^b1^^d5^^86
-^^d4^^b5^^d6^^82^^d4^^b1^^d5^^86
+Երևան
+^^d4^^b5^^d5^^beան
+^^d4^^b5^^d6^^82ան
 ============================================================
 ============================================================
 TEST 16: German-alternative
@@ -419,3 +419,10 @@ Defining \l__text_uppercase_special_la-x-new_tl on line ...
 <recently read> }
 l. ...  }
 ============================================================
+============================================================
+TEST 34: Titlecase catcode handling
+============================================================
+> Abc.
+<recently read> }
+l. ...  }
+============================================================
diff --git a/l3kernel/testfiles/m3text002.xetex.tlg b/l3kernel/testfiles/m3text002.xetex.tlg
index 835f6d2f7..0ba163892 100644
--- a/l3kernel/testfiles/m3text002.xetex.tlg
+++ b/l3kernel/testfiles/m3text002.xetex.tlg
@@ -420,3 +420,10 @@ Defining \l__text_uppercase_special_la-x-new_tl on line ...
 <recently read> }
 l. ...  }
 ============================================================
+============================================================
+TEST 34: Titlecase catcode handling
+============================================================
+> Abc.
+<recently read> }
+l. ...  }
+============================================================
diff --git a/l3kernel/testfiles/m3text005.ptex.tlg b/l3kernel/testfiles/m3text005.ptex.tlg
index 118c7f8c4..a40abae9a 100644
--- a/l3kernel/testfiles/m3text005.ptex.tlg
+++ b/l3kernel/testfiles/m3text005.ptex.tlg
@@ -10,6 +10,6 @@ TEST 1: \@uclclist\ entries
 ^^ea^^9e^^a6\CYRYO 
 ё\cyryo 
 ^^ea^^9f^^96\CYRYO 
-^^ea^^9f^^96\CYRYO 
-^^ea^^9f^^96\CYRYO 
+^^ea^^9f^^96\cyryo 
+^^ea^^9f^^96\cyryo 
 ============================================================
diff --git a/l3kernel/testfiles/m3text005.uptex.tlg b/l3kernel/testfiles/m3text005.uptex.tlg
index 427380824..ddbaf4c8a 100644
--- a/l3kernel/testfiles/m3text005.uptex.tlg
+++ b/l3kernel/testfiles/m3text005.uptex.tlg
@@ -10,6 +10,6 @@ TEST 1: \@uclclist\ entries
 Ё\CYRYO 
 ё\cyryo 
 ^^d0^^81\CYRYO 
-^^d0^^81\CYRYO 
-^^d0^^81\CYRYO 
+^^d0^^81\cyryo 
+^^d0^^81\cyryo 
 ============================================================





More information about the latex3-commits mailing list.