[latex3-commits] [git/LaTeX3-latex3-latex3] unicode-data: Rename \char_to_nfd:n to \codepoint_to_ndf:n (9e19f852f)

Joseph Wright joseph.wright at morningstar2.co.uk
Sun Oct 9 19:35:35 CEST 2022


Repository : https://github.com/latex3/latex3
On branch  : unicode-data
Link       : https://github.com/latex3/latex3/commit/9e19f852fb069f375a83d6fc9d6d90589095265a

>---------------------------------------------------------------

commit 9e19f852fb069f375a83d6fc9d6d90589095265a
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Sun Oct 9 18:34:11 2022 +0100

    Rename \char_to_nfd:n to \codepoint_to_ndf:n
    
    Also deprecate entirely the :N version.


>---------------------------------------------------------------

9e19f852fb069f375a83d6fc9d6d90589095265a
 l3kernel/CHANGELOG.md                      |  2 ++
 l3kernel/doc/l3obsolete.txt                |  1 +
 l3kernel/l3candidates.dtx                  | 22 ------------
 l3kernel/l3deprecation.dtx                 |  9 +++++
 l3kernel/l3kernel-functions.dtx            |  9 -----
 l3kernel/l3text-case.dtx                   |  2 +-
 l3kernel/l3token.dtx                       | 50 ---------------------------
 l3kernel/l3unicode.dtx                     | 54 ++++++++++++++++++++++++++++--
 l3kernel/testfiles/m3token001.luatex.tlg   | 10 ------
 l3kernel/testfiles/m3token001.lvt          | 30 -----------------
 l3kernel/testfiles/m3token001.tlg          |  8 -----
 l3kernel/testfiles/m3token001.xetex.tlg    | 10 ------
 l3kernel/testfiles/m3token006.luatex.tlg   | 10 ------
 l3kernel/testfiles/m3token006.lvt          | 25 --------------
 l3kernel/testfiles/m3token006.tlg          | 10 ------
 l3kernel/testfiles/m3token006.xetex.tlg    | 10 ------
 l3kernel/testfiles/m3unicode001.luatex.tlg |  7 ++++
 l3kernel/testfiles/m3unicode001.lvt        |  7 ++++
 l3kernel/testfiles/m3unicode001.tlg        |  7 ++++
 l3kernel/testfiles/m3unicode001.xetex.tlg  |  7 ++++
 20 files changed, 93 insertions(+), 197 deletions(-)

diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index 0d32ba176..a8d47c9f9 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -9,6 +9,7 @@ this project uses date-based 'snapshot' version identifiers.
 
 ### Added
 - `\codepoint_to_bytes:n`
+- `\codepoint_to_nfd:n`
 - `\codepoint_str_generate:n`
 
 ### Changed
@@ -21,6 +22,7 @@ this project uses date-based 'snapshot' version identifiers.
   esoteric case (issue [\#1113](https://github.com/latex3/latex3/issues/1113))
 
 ### Deprecated
+- `\char_to_nfd:N`, `\char_to_nfd:n`
 - `\char_to_utfviii_bytes:n`
 
 ## [2022-09-28]
diff --git a/l3kernel/doc/l3obsolete.txt b/l3kernel/doc/l3obsolete.txt
index 2377bb985..0b229ddfe 100644
--- a/l3kernel/doc/l3obsolete.txt
+++ b/l3kernel/doc/l3obsolete.txt
@@ -23,6 +23,7 @@ Function                            Date deprecated
 \char_str_mixed_case:N                   2020-01-03
 \char_str_upper_case:N                   2020-01-03
 \char_to_utfviii_bytes:n                 2022-10-09
+\char_to_nfd:N                           2022-10-09
 \cs_argument_spec:N                      2022-06-24
 \l_keys_key_tl                           2020-02-08
 \l_keys_path_tl                          2020-02-08
diff --git a/l3kernel/l3candidates.dtx b/l3kernel/l3candidates.dtx
index 13a0e9b5c..f7a92049b 100644
--- a/l3kernel/l3candidates.dtx
+++ b/l3kernel/l3candidates.dtx
@@ -626,28 +626,6 @@
 %   (\enquote{active}), and character code $32$ (space).
 % \end{variable}
 %
-% \begin{function}[added = 2020-01-02, rEXP]{\char_to_nfd:N}
-%   \begin{syntax}
-%     \cs{char_to_nfd:N} \meta{char}
-%   \end{syntax}
-%   Converts the \meta{char} to the Unicode Normalization Form Canonical
-%   Decomposition. The category code of the \emph{first} generated character is
-%   the same as the \meta{char}; second and subsequent chars will have the
-%   current category code, as they would if typed in directly. For $8$-bit
-%   engines, no change will take place.
-% \end{function}
-%
-% \begin{function}[added = 2022-08-29, rEXP]{\char_to_nfd:n}
-%   \begin{syntax}
-%     \cs{char_to_nfd:n} \Arg{codepoint}
-%   \end{syntax}
-%   Converts the (Unicode) \meta{codepoint} to the Unicode Normalization
-%   Form Canonical Decomposition. The generated character(s) will have
-%   the current category code as they would if typed in directly. In contrast
-%   to \cs{char_to_nfd:N}, this function \emph{does} decompose codepoints
-%   with $8$-bit engines.
-% \end{function}
-%
 % \begin{function}[added = 2018-09-23]
 %   {
 %     \peek_catcode_collect_inline:Nn,
diff --git a/l3kernel/l3deprecation.dtx b/l3kernel/l3deprecation.dtx
index 85c532113..dc0c7c5dc 100644
--- a/l3kernel/l3deprecation.dtx
+++ b/l3kernel/l3deprecation.dtx
@@ -556,6 +556,15 @@
 %    \end{macrocode}
 % \end{macro}
 %
+% \begin{macro}[EXP]{\char_to_nfd:Nm, \char_to_nfd:n}
+%    \begin{macrocode}
+\__kernel_patch_deprecation:nnNNpn { 2022-10-09 } { \codepoint_to_nfd:n }
+\cs_gset:Npn \char_to_nfd:N #1 { \codepoint_to_nfd:n {`#1} }
+\__kernel_patch_deprecation:nnNNpn { 2022-10-09 } { \codepoint_to_nfd:n }
+\cs_gset:Npn \char_to_nfd:n { \codepoint_to_nfd:n }
+%    \end{macrocode}
+% \end{macro}
+%
 % \begin{macro}[EXP]
 %   {
 %     \char_lower_case:N, \char_upper_case:N,
diff --git a/l3kernel/l3kernel-functions.dtx b/l3kernel/l3kernel-functions.dtx
index fa88fd502..2d16d907e 100644
--- a/l3kernel/l3kernel-functions.dtx
+++ b/l3kernel/l3kernel-functions.dtx
@@ -512,15 +512,6 @@
 %   \end{itemize}
 % \end{function}
 %
-% \begin{function}[EXP]{\__kernel_codepoint_nfd:n}
-%   \begin{syntax}
-%     \cs{__kernel_unicode_nfd:nn} \Arg{mapping}
-%   \end{syntax}
-%   Expands to a list of two balanced text, of which at least the first
-%   will contain a codepoint. This list of one or two codepoints specifies
-%   the normal form decomposition of the input \meta{codepoint}.
-% \end{function}
-%
 % \subsection{Kernel backend functions}
 %
 % These functions are required to pass information to the backend. The nature
diff --git a/l3kernel/l3text-case.dtx b/l3kernel/l3text-case.dtx
index 18c2188e6..5191c8003 100644
--- a/l3kernel/l3text-case.dtx
+++ b/l3kernel/l3text-case.dtx
@@ -945,7 +945,7 @@
         \@@_change_case_if_greek:nTF { `#4 }
           {
             \exp_args:Ne \@@_change_case_upper_el:nnn
-              { \char_to_nfd:N #4 } {#2} {#3}
+              { \codepoint_to_nfd:N #4 } {#2} {#3}
           }
           {
             \int_compare:nNnTF { `#4 } = { "0345 }
diff --git a/l3kernel/l3token.dtx b/l3kernel/l3token.dtx
index 356d88d57..75f4f6a05 100644
--- a/l3kernel/l3token.dtx
+++ b/l3kernel/l3token.dtx
@@ -1690,56 +1690,6 @@
 % \end{macro}
 % \end{macro}
 %
-% \begin{macro}[EXP]{\char_to_nfd:N}
-% \begin{macro}[EXP]{\char_to_nfd:n}
-% \begin{macro}[EXP]{\@@_to_nfd:nn}
-% \begin{macro}[EXP]{\@@_to_nfd:nnn}
-% \begin{macro}[EXP]{\@@_to_nfd:nnnn}
-% \begin{macro}[EXP]{\@@_to_nfd_generate:nn}
-% \begin{macro}[EXP]{\@@_to_nfd_generate:n}
-% \begin{macro}[EXP]{\@@_to_nfd_generate:nnnn}
-%   Converted to NFD is a potentially-recursive process: the key is to
-%   check if we get the input codepoint back again. As far as possible,
-%   we use the same path for all engines.
-%    \begin{macrocode}
-\bool_lazy_or:nnTF
-  { \sys_if_engine_luatex_p: }
-  { \sys_if_engine_xetex_p: }
-  {
-    \cs_new:Npn \char_to_nfd:N #1
-      { \@@_to_nfd:nn {`#1} { \@@_change_case_catcode:N #1 } }
-  }
-  {
-    \cs_new:Npn \char_to_nfd:N #1 { \exp_not:n {#1} }
-  }
-\cs_new:Npn \char_to_nfd:n #1
-  { \@@_to_nfd:nn {#1} { \char_value_catcode:n {#1} } }
-\cs_new:Npn \@@_to_nfd:nn #1#2
-  {
-    \exp_args:Ne \@@_to_nfd:nnn
-      { \__kernel_codepoint_nfd:n {#1} } {#1} {#2}
-  }
-\cs_new:Npn \@@_to_nfd:nnn #1#2#3 { \@@_to_nfd:nnnn #1 {#2} {#3} }
-\cs_new:Npn \@@_to_nfd:nnnn #1#2#3#4
-  {
-    \int_compare:nNnTF {#1} = {#3}
-      { \codepoint_generate:nn {#1} {#4} }
-      {
-        \@@_to_nfd:nn {#1} {#4}
-        \tl_if_blank:nF {#2}
-          { \@@_to_nfd:nn {#2} {#4} }
-      }
-  }
-%    \end{macrocode}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-%
 % \begin{macro}[EXP]
 %   {
 %     \char_lowercase:N, \char_uppercase:N,
diff --git a/l3kernel/l3unicode.dtx b/l3kernel/l3unicode.dtx
index 81b5c17bd..e8eb5c350 100644
--- a/l3kernel/l3unicode.dtx
+++ b/l3kernel/l3unicode.dtx
@@ -111,6 +111,16 @@
 %   and |#3| and |#4| empty.
 % \end{function}
 %
+% \begin{function}[added = 2022-10-09, EXP]{\codepoint_to_nfd:n}
+%   \begin{syntax}
+%     \cs{codepoint_to_nfd:n} \Arg{codepoint}
+%   Converts the \meta{codepoint} to the Unicode Normalization
+%   Form Canonical Decomposition. The generated character(s) will have
+%   the current category code as they would if typed in directly for Unicode
+%   engines; for $8$-bit engines, active characters are used for all codepoints
+%   outside of the ASCII range.
+% \end{function}
+%
 % \end{documentation}
 %
 % \begin{implementation}
@@ -340,6 +350,46 @@
 % \end{macro}
 % \end{macro}
 % \end{macro}
+%
+% \begin{macro}[EXP]{\codepoint_to_nfd:n}
+% \begin{macro}[EXP]{\@@_to_nfd:nn}
+% \begin{macro}[EXP]{\@@_to_nfd:nnn}
+% \begin{macro}[EXP]{\@@_to_nfd:nnnn}
+% \begin{macro}[EXP]{\@@_to_nfd_generate:nn}
+% \begin{macro}[EXP]{\@@_to_nfd_generate:n}
+% \begin{macro}[EXP]{\@@_to_nfd_generate:nnnn}
+%   Converted to NFD is a potentially-recursive process: the key is to
+%   check if we get the input codepoint back again. As far as possible,
+%   we use the same path for all engines.
+%    \begin{macrocode}
+\cs_new:Npn \codepoint_to_nfd:n #1
+  { \@@_to_nfd:nn {#1} { \char_value_catcode:n {#1} } }
+\cs_new:Npn \@@_to_nfd:nn #1#2
+  {
+    \exp_args:Ne \@@_to_nfd:nnn
+      { \@@_nfd:n {#1} } {#1} {#2}
+  }
+\cs_new:Npn \@@_to_nfd:nnn #1#2#3 { \@@_to_nfd:nnnn #1 {#2} {#3} }
+\cs_new:Npn \@@_to_nfd:nnnn #1#2#3#4
+  {
+    \int_compare:nNnTF {#1} = {#3}
+      { \codepoint_generate:nn {#1} {#4} }
+      {
+        \@@_to_nfd:nn {#1} {#4}
+        \tl_if_blank:nF {#2}
+          { \@@_to_nfd:nn {#2} {#4} }
+      }
+  }
+%    \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+
 %
 % \subsection{Data loader}
 %
@@ -842,11 +892,11 @@
 % \end{macro}
 % \end{macro}
 %
-% \begin{macro}[EXP]{\__kernel_codepoint_nfd:n}
+% \begin{macro}[EXP]{\@@_nfd:n}
 % \begin{macro}[EXP]{\@@_nfd:nn}
 %   A simple interface.
 %    \begin{macrocode}
-\cs_new:Npn \__kernel_codepoint_nfd:n #1
+\cs_new:Npn \@@_nfd:n #1
   { \exp_args:Ne \@@_nfd:nn { \codepoint_str_generate:n {#1} } {#1} }
 \cs_new:Npn \@@_nfd:nn #1#2
   {
diff --git a/l3kernel/testfiles/m3token001.luatex.tlg b/l3kernel/testfiles/m3token001.luatex.tlg
deleted file mode 100644
index bdf4776d3..000000000
--- a/l3kernel/testfiles/m3token001.luatex.tlg
+++ /dev/null
@@ -1,10 +0,0 @@
-This is a generated file for the LaTeX (2e + expl3) validation system.
-Don't change this file in any respect.
-Author: Joseph Wright
-============================================================
-TEST 1: Unicode NFD
-============================================================
-A
-á
-ῒ
-============================================================
diff --git a/l3kernel/testfiles/m3token001.lvt b/l3kernel/testfiles/m3token001.lvt
deleted file mode 100644
index ab4cc21e8..000000000
--- a/l3kernel/testfiles/m3token001.lvt
+++ /dev/null
@@ -1,30 +0,0 @@
-%
-% Copyright (C) 2020,2021 The LaTeX Project
-%
-
-\documentclass{minimal}
-\input{regression-test}
-
-\RequirePackage[enable-debug]{expl3}
-\ExplSyntaxOn
-\debug_on:n { check-declarations , deprecation , log-functions }
-\ExplSyntaxOff
-\makeatletter
-
-\begin{document}
-\START
-\AUTHOR{Joseph Wright}
-\ExplSyntaxOn
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\TESTEXP{Unicode~NFD}{
-  \char_to_nfd:N A \NEWLINE
-  \bool_lazy_or:nnT { \sys_if_engine_luatex_p: } { \sys_if_engine_xetex_p: }
-    {
-      \char_to_nfd:N á \NEWLINE
-      \char_to_nfd:N ῒ
-    }
-}
-
-\END
diff --git a/l3kernel/testfiles/m3token001.tlg b/l3kernel/testfiles/m3token001.tlg
deleted file mode 100644
index cd844648d..000000000
--- a/l3kernel/testfiles/m3token001.tlg
+++ /dev/null
@@ -1,8 +0,0 @@
-This is a generated file for the LaTeX (2e + expl3) validation system.
-Don't change this file in any respect.
-Author: Joseph Wright
-============================================================
-TEST 1: Unicode NFD
-============================================================
-A
-============================================================
diff --git a/l3kernel/testfiles/m3token001.xetex.tlg b/l3kernel/testfiles/m3token001.xetex.tlg
deleted file mode 100644
index bdf4776d3..000000000
--- a/l3kernel/testfiles/m3token001.xetex.tlg
+++ /dev/null
@@ -1,10 +0,0 @@
-This is a generated file for the LaTeX (2e + expl3) validation system.
-Don't change this file in any respect.
-Author: Joseph Wright
-============================================================
-TEST 1: Unicode NFD
-============================================================
-A
-á
-ῒ
-============================================================
diff --git a/l3kernel/testfiles/m3token006.luatex.tlg b/l3kernel/testfiles/m3token006.luatex.tlg
deleted file mode 100644
index f5a12fbb0..000000000
--- a/l3kernel/testfiles/m3token006.luatex.tlg
+++ /dev/null
@@ -1,10 +0,0 @@
-This is a generated file for the LaTeX (2e + expl3) validation system.
-Don't change this file in any respect.
-Author: Joseph Wright
-============================================================
-TEST 1: Character decomposition
-============================================================
-A
-Î
-Ή
-============================================================
diff --git a/l3kernel/testfiles/m3token006.lvt b/l3kernel/testfiles/m3token006.lvt
deleted file mode 100644
index cdb8df767..000000000
--- a/l3kernel/testfiles/m3token006.lvt
+++ /dev/null
@@ -1,25 +0,0 @@
-%
-% Copyright (C) 2022 The LaTeX Project
-%
-
-\documentclass{minimal}
-\input{regression-test}
-
-\RequirePackage[enable-debug]{expl3}
-\ExplSyntaxOn
-\debug_on:n { check-declarations , deprecation , log-functions }
-\ExplSyntaxOff
-
-\START
-\AUTHOR{Joseph Wright}
-
-\ExplSyntaxOn
-
-\TESTEXP { Character~decomposition }
-  {
-    \char_to_nfd:n { `A } \NEWLINE
-    \char_to_nfd:n { "00CE } \NEWLINE
-    \char_to_nfd:n { "0389 }
-  }
-
-\END
diff --git a/l3kernel/testfiles/m3token006.tlg b/l3kernel/testfiles/m3token006.tlg
deleted file mode 100644
index 7ab6b11e2..000000000
--- a/l3kernel/testfiles/m3token006.tlg
+++ /dev/null
@@ -1,10 +0,0 @@
-This is a generated file for the LaTeX (2e + expl3) validation system.
-Don't change this file in any respect.
-Author: Joseph Wright
-============================================================
-TEST 1: Character decomposition
-============================================================
-A
-I^^cc^^82
-^^ce^^97^^cc^^81
-============================================================
diff --git a/l3kernel/testfiles/m3token006.xetex.tlg b/l3kernel/testfiles/m3token006.xetex.tlg
deleted file mode 100644
index f5a12fbb0..000000000
--- a/l3kernel/testfiles/m3token006.xetex.tlg
+++ /dev/null
@@ -1,10 +0,0 @@
-This is a generated file for the LaTeX (2e + expl3) validation system.
-Don't change this file in any respect.
-Author: Joseph Wright
-============================================================
-TEST 1: Character decomposition
-============================================================
-A
-Î
-Ή
-============================================================
diff --git a/l3kernel/testfiles/m3unicode001.luatex.tlg b/l3kernel/testfiles/m3unicode001.luatex.tlg
index 0e243b747..ace132b9a 100644
--- a/l3kernel/testfiles/m3unicode001.luatex.tlg
+++ b/l3kernel/testfiles/m3unicode001.luatex.tlg
@@ -31,3 +31,10 @@ TEST 3: Byte decomposition
 {239}{191}{189}{}
 {240}{144}{128}{128}
 ============================================================
+============================================================
+TEST 4: Character decomposition
+============================================================
+A
+Î
+Ή
+============================================================
diff --git a/l3kernel/testfiles/m3unicode001.lvt b/l3kernel/testfiles/m3unicode001.lvt
index 73e6a41df..931cf2063 100644
--- a/l3kernel/testfiles/m3unicode001.lvt
+++ b/l3kernel/testfiles/m3unicode001.lvt
@@ -52,4 +52,11 @@
     \codepoint_to_bytes:n { "10000 }
   }
 
+\TESTEXP { Character~decomposition }
+  {
+    \codepoint_to_nfd:n { `A } \NEWLINE
+    \codepoint_to_nfd:n { "00CE } \NEWLINE
+    \codepoint_to_nfd:n { "0389 }
+  }
+
 \END
\ No newline at end of file
diff --git a/l3kernel/testfiles/m3unicode001.tlg b/l3kernel/testfiles/m3unicode001.tlg
index 21244eb8b..3c38d26ef 100644
--- a/l3kernel/testfiles/m3unicode001.tlg
+++ b/l3kernel/testfiles/m3unicode001.tlg
@@ -31,3 +31,10 @@ TEST 3: Byte decomposition
 {239}{191}{189}{}
 {240}{144}{128}{128}
 ============================================================
+============================================================
+TEST 4: Character decomposition
+============================================================
+A
+I^^cc^^82
+^^ce^^97^^cc^^81
+============================================================
diff --git a/l3kernel/testfiles/m3unicode001.xetex.tlg b/l3kernel/testfiles/m3unicode001.xetex.tlg
index 0e243b747..ace132b9a 100644
--- a/l3kernel/testfiles/m3unicode001.xetex.tlg
+++ b/l3kernel/testfiles/m3unicode001.xetex.tlg
@@ -31,3 +31,10 @@ TEST 3: Byte decomposition
 {239}{191}{189}{}
 {240}{144}{128}{128}
 ============================================================
+============================================================
+TEST 4: Character decomposition
+============================================================
+A
+Î
+Ή
+============================================================





More information about the latex3-commits mailing list.