[latex3-commits] [git/LaTeX3-latex3-latex3] l3text: Add \char_to_nfd:N (3d9cc0e80)
Joseph Wright
joseph.wright at morningstar2.co.uk
Mon Dec 2 21:43:17 CET 2019
Repository : https://github.com/latex3/latex3
On branch : l3text
Link : https://github.com/latex3/latex3/commit/3d9cc0e80453854e2f58d96c940c3f7d7216eecd
>---------------------------------------------------------------
commit 3d9cc0e80453854e2f58d96c940c3f7d7216eecd
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date: Mon Dec 2 20:42:48 2019 +0000
Add \char_to_nfd:N
This is needed for Greek case changing, but is potentially
more general.
>---------------------------------------------------------------
3d9cc0e80453854e2f58d96c940c3f7d7216eecd
l3kernel/CHANGELOG.md | 1 +
l3kernel/l3token.dtx | 42 ++++++++++++++++++++++
l3kernel/l3unicode.dtx | 30 ++++++++++++----
...{m3str-convert003.tlg => m3token001.luatex.tlg} | 8 +++--
l3kernel/testfiles/m3token001.lvt | 30 ++++++++++++++++
.../{m3str-convert003.tlg => m3token001.tlg} | 6 ++--
.../{m3str-convert003.tlg => m3token001.xetex.tlg} | 8 +++--
7 files changed, 110 insertions(+), 15 deletions(-)
diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index 510b57f19..a711ed31f 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -8,6 +8,7 @@ this project uses date-based 'snapshot' version identifiers.
## [Unreleased]
### Added
+- `\char_to_nfd:N`
- `\file_hex_dump:n(nn)` and `\file_get_hex_dump:n(nn)N(TF)`
- `\str_foldcase:n`
- `\str_lowercase:n`
diff --git a/l3kernel/l3token.dtx b/l3kernel/l3token.dtx
index 854dd177d..05e239c2d 100644
--- a/l3kernel/l3token.dtx
+++ b/l3kernel/l3token.dtx
@@ -433,6 +433,16 @@
% the character code changes).
% \end{function}
%
+% \begin{function}[added = 2019-12-02, rEXP]{\char_to_nfd:N}
+% \begin{syntax}
+% \cs{char_to_nfd:N} \meta{char}
+% \end{syntax}
+% Converts the \meta{char} to the Unicode Normalization Form Canonical
+% Decomposition. The category code of the generated character is the
+% same as the \meta{char}. With $8$-bit engines, no change is made to the
+% character.
+% \end{function}
+%
% \section{Generic tokens}
%
% \begin{variable}
@@ -1843,6 +1853,38 @@
% \end{macro}
% \end{macro}
%
+% \begin{macro}[rEXP]{\char_to_nfd:N}
+% \begin{macro}[rEXP]{\@@_to_nfd:n}
+% \begin{macro}[rEXP]{\@@_to_nfd:Nw}
+% Look up any \textsc{nfd} and recursively produce the result.
+% \begin{macrocode}
+\cs_new:Npn \char_to_nfd:N #1
+ {
+ \cs_if_exist:cTF { c_@@_nfd_ \token_to_str:N #1 _ tl }
+ {
+ \exp_after:wN \exp_after:wN \exp_after:wN \@@_to_nfd:Nw
+ \exp_after:wN \exp_after:wN \exp_after:wN #1
+ \cs:w c_@@_nfd_ \token_to_str:N #1 _ tl \cs_end:
+ \q_stop
+ }
+ { \exp_not:n {#1} }
+ }
+\cs_set_eq:NN \@@_to_nfd:n \char_to_nfd:N
+\cs_new:Npn \@@_to_nfd:Nw #1#2#3 \q_stop
+ {
+ \exp_args:Ne \@@_to_nfd:n
+ { \char_generate:nn { `#2 } { \@@_change_case_catcode:N #1 } }
+ \tl_if_blank:nF {#3}
+ {
+ \exp_args:Ne \@@_to_nfd:n
+ { \char_generate:nn { `#3 } { \char_value_catcode:n { `#3 } } }
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
% \begin{macro}{\c_catcode_other_space_tl}
% Create a space with category code $12$: an \enquote{other} space.
% \begin{macrocode}
diff --git a/l3kernel/l3unicode.dtx b/l3kernel/l3unicode.dtx
index 31f51b812..8ae7cf37b 100644
--- a/l3kernel/l3unicode.dtx
+++ b/l3kernel/l3unicode.dtx
@@ -116,16 +116,34 @@
}
}
% \end{macrocode}
-% Parse the main Unicode data file for title case exceptions (the one-to-one
-% lower- and uppercase mappings it contains are all be covered by the \TeX{}
-% data). There are no comments in the main data file so this can be done using
-% a standard mapping and no checks.
+% Parse the main Unicode data file for two things. First, we want the titlecase
+% exceptions: the one-to-one lower- and uppercase mappings it contains are all
+% be covered by the \TeX{} data. Second, we need normalization data: at present,
+% just the canonical \textsc{nfd} mappings. Those all yield either one or two
+% codepoints, so the split is relatively easy.
% \begin{macrocode}
\ior_open:Nn \g_@@_data_ior { UnicodeData.txt }
\cs_set_protected:Npn \@@_data_auxi:w
#1 ; #2 ; #3 ; #4 ; #5 ; #6 ; #7 ; #8 ; #9 ;
- { \@@_data_auxii:w #1 ; }
- \cs_set_protected:Npn \@@_data_auxii:w
+ {
+ \tl_if_blank:nF {#6}
+ {
+ \tl_if_head_eq_charcode:nNF {#6} < % >
+ { \@@_data_auxii:w #1 ; #6 ~ \q_stop }
+ }
+ \@@_data_auxiii:w #1 ;
+ }
+ \cs_set_protected:Npn \@@_data_auxii:w #1 ; #2 ~ #3 \q_stop
+ {
+ \tl_const:cx
+ { c_@@_nfd_ \@@_generate_char:n {#1} _tl }
+ {
+ \@@_generate:n { "#2 }
+ \tl_if_blank:nF {#3}
+ { \@@_generate:n { "#3 } }
+ }
+ }
+ \cs_set_protected:Npn \@@_data_auxiii:w
#1 ; #2 ; #3 ; #4 ; #5 ; #6 ; #7 ~ \q_stop
{
\cs_set_nopar:Npn \l_@@_tmpa_tl {#7}
diff --git a/l3kernel/testfiles/m3str-convert003.tlg b/l3kernel/testfiles/m3token001.luatex.tlg
similarity index 78%
copy from l3kernel/testfiles/m3str-convert003.tlg
copy to l3kernel/testfiles/m3token001.luatex.tlg
index c89276a04..bdf4776d3 100644
--- a/l3kernel/testfiles/m3str-convert003.tlg
+++ b/l3kernel/testfiles/m3token001.luatex.tlg
@@ -1,8 +1,10 @@
This is a generated file for the LaTeX (2e + expl3) validation system.
Don't change this file in any respect.
-Author: Bruno Le Floch
+Author: Joseph Wright
============================================================
-TEST 1: str if contains char
+TEST 1: Unicode NFD
============================================================
-FALSE TRUE FALSE FALSE TRUE
+A
+á
+ῒ
============================================================
diff --git a/l3kernel/testfiles/m3token001.lvt b/l3kernel/testfiles/m3token001.lvt
new file mode 100644
index 000000000..8f2a3b630
--- /dev/null
+++ b/l3kernel/testfiles/m3token001.lvt
@@ -0,0 +1,30 @@
+%
+% Copyright (C) 2019 The LaTeX Project
+%
+
+\documentclass{minimal}
+\input{regression-test}
+
+\RequirePackage[enable-debug]{expl3}
+\ExplSyntaxOn
+\debug_on:n { check-declarations , deprecation , log-functions }
+\ExplSyntaxOff
+\makeatletter
+
+\begin{document}
+\START
+\AUTHOR{Joseph Wright}
+\ExplSyntaxOn
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\TESTEXP{Unicode~NFD}{
+ \char_to_nfd:N A \NEWLINE
+ \bool_lazy_or:nnT { \sys_if_engine_luatex_p: } { \sys_if_engine_xetex_p: }
+ {
+ \char_to_nfd:N á \NEWLINE
+ \char_to_nfd:N ῒ
+ }
+}
+
+\END
diff --git a/l3kernel/testfiles/m3str-convert003.tlg b/l3kernel/testfiles/m3token001.tlg
similarity index 78%
copy from l3kernel/testfiles/m3str-convert003.tlg
copy to l3kernel/testfiles/m3token001.tlg
index c89276a04..cd844648d 100644
--- a/l3kernel/testfiles/m3str-convert003.tlg
+++ b/l3kernel/testfiles/m3token001.tlg
@@ -1,8 +1,8 @@
This is a generated file for the LaTeX (2e + expl3) validation system.
Don't change this file in any respect.
-Author: Bruno Le Floch
+Author: Joseph Wright
============================================================
-TEST 1: str if contains char
+TEST 1: Unicode NFD
============================================================
-FALSE TRUE FALSE FALSE TRUE
+A
============================================================
diff --git a/l3kernel/testfiles/m3str-convert003.tlg b/l3kernel/testfiles/m3token001.xetex.tlg
similarity index 78%
copy from l3kernel/testfiles/m3str-convert003.tlg
copy to l3kernel/testfiles/m3token001.xetex.tlg
index c89276a04..bdf4776d3 100644
--- a/l3kernel/testfiles/m3str-convert003.tlg
+++ b/l3kernel/testfiles/m3token001.xetex.tlg
@@ -1,8 +1,10 @@
This is a generated file for the LaTeX (2e + expl3) validation system.
Don't change this file in any respect.
-Author: Bruno Le Floch
+Author: Joseph Wright
============================================================
-TEST 1: str if contains char
+TEST 1: Unicode NFD
============================================================
-FALSE TRUE FALSE FALSE TRUE
+A
+á
+ῒ
============================================================
More information about the latex3-commits
mailing list