[latex3-commits] [git/LaTeX3-latex3-latex3] main: Expand letter-like commands in \text_expand:n (1c6d0275c)
Joseph Wright
joseph.wright at morningstar2.co.uk
Sun Feb 5 12:24:40 CET 2023
Repository : https://github.com/latex3/latex3
On branch : main
Link : https://github.com/latex3/latex3/commit/1c6d0275c15cb3075e38da858b689e1a3b230b64
>---------------------------------------------------------------
commit 1c6d0275c15cb3075e38da858b689e1a3b230b64
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date: Sun Feb 5 11:23:39 2023 +0000
Expand letter-like commands in \text_expand:n
The issues with Greek case changing highlight that
leaving 'letter like' commands in text is an issue.
Thus whilst this is formally a breaking change, it
will make longer-term work easier.
Further commits will address related issues, most
obviously accents, before Greek and Cyrillic
letter commands are added to the support.
>---------------------------------------------------------------
1c6d0275c15cb3075e38da858b689e1a3b230b64
l3kernel/CHANGELOG.md | 7 ++
l3kernel/doc/l3obsolete.txt | 1 +
l3kernel/l3text-purify.dtx | 74 --------------------
l3kernel/l3text.dtx | 119 ++++++++++++++++----------------
l3kernel/testfiles/m3text001.tlg | 2 +-
l3kernel/testfiles/m3text002.luatex.tlg | 8 +--
l3kernel/testfiles/m3text002.tlg | 8 +--
l3kernel/testfiles/m3text002.xetex.tlg | 8 +--
8 files changed, 82 insertions(+), 145 deletions(-)
diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index 9024ea39c..a2c668ce7 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -8,9 +8,16 @@ this project uses date-based 'snapshot' version identifiers.
## [Unreleased]
### Changed
+- `\text_expand:n` now converts letter-like commands,
+ e.g. `\ae`, to the UTF-8 equivalent: breaking change
+ also removes `\l_text_letterlike_tl` (unused outside of
+ the `expl3` kernel in TeX Live)
- Swap meaning of `el` and `el-x-iota` when case changing
Greek: match traditional LaTeX approach
+### Removed
+- `\l_text_letterlike_tl` (breaking change)
+
## [2023-02-02]
### Fixed
diff --git a/l3kernel/doc/l3obsolete.txt b/l3kernel/doc/l3obsolete.txt
index 722f807a6..b1a258a4d 100644
--- a/l3kernel/doc/l3obsolete.txt
+++ b/l3kernel/doc/l3obsolete.txt
@@ -342,6 +342,7 @@ Function Date removed
\KV_process_space_removal_sanitize:NNn 2011-09-08
\l_iow_line_length_int 2013-01-08
\l_last_box 2012-05-11
+\l_text_letterlike_tl 2023-02-05
\l_tl_replace_toks 2011-09-08
\l_tmpa_toks 2011-09-08
\l_tmpb_toks 2011-09-08
diff --git a/l3kernel/l3text-purify.dtx b/l3kernel/l3text-purify.dtx
index a28b73fe5..a086dc060 100644
--- a/l3kernel/l3text-purify.dtx
+++ b/l3kernel/l3text-purify.dtx
@@ -454,80 +454,6 @@
%
% \subsection{Accent and letter-like data for purifying text}
%
-% In contrast to case changing, both $8$-bit and Unicode engines need
-% information for text purification to handle accents and letter-like
-% functions: these all need to be removed. However, the results are
-% of course engine-dependent.
-%
-% For the letter-like commands, life is relatively easy: they are all
-% simply added as standard exceptions. The only oddity is \tn{SS}, which
-% gets converted to two letters. (At some stage an alternative version
-% can presumably be added to \pkg{babel} or similar.)
-% \begin{macrocode}
-\bool_lazy_or:nnTF
- { \sys_if_engine_luatex_p: }
- { \sys_if_engine_xetex_p: }
- {
- \cs_set_protected:Npn \@@_loop:Nn #1#2
- {
- \quark_if_recursion_tail_stop:N #1
- \text_declare_purify_equivalent:Nx #1
- {
- \char_generate:nn { "#2 }
- { \char_value_catcode:n { "#2 } }
- }
- \@@_loop:Nn
- }
- }
- {
- \cs_set_protected:Npn \@@_loop:Nn #1#2
- {
- \quark_if_recursion_tail_stop:N #1
- \text_declare_purify_equivalent:Nx #1
- {
- \exp_args:Ne \@@_tmp:n
- { \__kernel_codepoint_to_bytes:n { "#2 } }
- }
- \@@_loop:Nn
- }
- \cs_set:Npn \@@_tmp:n #1 { \@@_tmp:nnnn #1 }
- \cs_set:Npn \@@_tmp:nnnn #1#2#3#4
- {
- \exp_after:wN \exp_after:wN \exp_after:wN
- \exp_not:N \char_generate:nn {#1} { 13 }
- \exp_after:wN \exp_after:wN \exp_after:wN
- \exp_not:N \char_generate:nn {#2} { 13 }
- }
- }
-\@@_loop:Nn
- \AA { 00C5 }
- \AE { 00C6 }
- \DH { 00D0 }
- \DJ { 0110 }
- \IJ { 0132 }
- \L { 0141 }
- \NG { 014A }
- \O { 00D8 }
- \OE { 0152 }
- \TH { 00DE }
- \aa { 00E5 }
- \ae { 00E6 }
- \dh { 00F0 }
- \dj { 0111 }
- \i { 0131 }
- \j { 0237 }
- \ij { 0132 }
- \l { 0142 }
- \ng { 014B }
- \o { 00F8 }
- \oe { 0153 }
- \ss { 00DF }
- \th { 00FE }
- \q_recursion_tail ?
- \q_recursion_stop
-\text_declare_purify_equivalent:Nn \SS { SS }
-% \end{macrocode}
-%
% \begin{macro}[rEXP]{\@@_purify_accent:NN}
% Accent \textsc{licr} handling is a little more complex. Accents may exist
% as pre-composed codepoints or as independent glyphs. The former are all
diff --git a/l3kernel/l3text.dtx b/l3kernel/l3text.dtx
index 6bd33f190..c111068bc 100644
--- a/l3kernel/l3text.dtx
+++ b/l3kernel/l3text.dtx
@@ -58,7 +58,8 @@
%
% \section{Expanding text}
%
-% \begin{function}[EXP, added = 2020-01-02]{\text_expand:n}
+% \begin{function}[EXP, added = 2020-01-02, updated = 2023-02-05]
+% {\text_expand:n}
% \begin{syntax}
% \cs{text_expand:n} \Arg{text}
% \end{syntax}
@@ -69,15 +70,17 @@
% \cs{l_text_math_delims_tl} or as the argument to commands listed
% in \cs{l_text_math_arg_tl}). Commands which are neither engine-
% nor \LaTeX{} protected are expanded exhaustively.
-% Any commands listed in \cs{l_text_expand_exclude_tl},
-% \cs{l_text_accents_tl} and \cs{l_text_letterlike_tl} are excluded from
-% expansion.
+% Any commands listed in \cs{l_text_expand_exclude_tl} and
+% \cs{l_text_accents_tl} are excluded from expansion. Letter-like
+% commands, e.g.~\cs{ae}, are converted to the UTF-8 equivalent.
% \end{function}
%
-% \begin{function}[added = 2020-01-22]
+% \begin{function}[added = 2020-01-22, updated = 2023-02-05]
% {
% \text_declare_expand_equivalent:Nn ,
-% \text_declare_expand_equivalent:cn
+% \text_declare_expand_equivalent:Nx ,
+% \text_declare_expand_equivalent:cn ,
+% \text_declare_expand_equivalent:cx
% }
% \begin{syntax}
% \cs{text_declare_expand_equivalent:Nn} \meta{cmd} \Arg{replacement}
@@ -261,11 +264,6 @@
% by expansion. (Defined only for the \LaTeXe{} package.)
% \end{variable}
%
-% \begin{variable}{\l_text_letterlike_tl}
-% Lists commands which represent letters; these are left unchanged by
-% expansion. (Defined only for the \LaTeXe{} package.)
-% \end{variable}
-%
% \begin{variable}{\l_text_math_arg_tl}
% Lists commands present in the \meta{text} where the argument of the
% command should be treated as math mode material. The treatment here is
@@ -794,28 +792,13 @@
%
% \subsection{Configuration variables}
%
-% \begin{variable}{\l_text_accents_tl, \l_text_letterlike_tl}
+% \begin{variable}{\l_text_accents_tl}
% Special cases for accents and letter-like symbols, which in some cases will
% need to be converted further.
% \begin{macrocode}
\tl_new:N \l_text_accents_tl
\tl_set:Nn \l_text_accents_tl
{ \` \' \^ \~ \= \u \. \" \r \H \v \d \c \k \b \t }
-\tl_new:N \l_text_letterlike_tl
-\tl_set:Nn \l_text_letterlike_tl
- {
- \AA \aa
- \AE \ae
- \DH \dh
- \DJ \dj
- \IJ \ij
- \L \l
- \NG \ng
- \O \o
- \OE \oe
- \SS \ss
- \TH \th
- }
% \end{macrocode}
% \end{variable}
%
@@ -953,8 +936,6 @@
% \begin{macro}[EXP]{\@@_expand_exclude:Nnn}
% \begin{macro}[EXP]{\@@_expand_accent:N}
% \begin{macro}[EXP]{\@@_expand_accent:NN}
-% \begin{macro}[EXP]{\@@_expand_letterlike:N}
-% \begin{macro}[EXP]{\@@_expand_letterlike:NN}
% \begin{macro}[EXP]{\@@_expand_cs:N}
% \begin{macro}[EXP]{\@@_expand_protect:w}
% \begin{macro}[EXP]{\@@_expand_protect:N}
@@ -1264,30 +1245,6 @@
\q_@@_recursion_tail \q_@@_recursion_stop
}
\cs_new:Npn \@@_expand_accent:NN #1#2
- {
- \@@_if_q_recursion_tail_stop_do:Nn #2
- { \@@_expand_letterlike:N #1 }
- \cs_if_eq:NNTF #2 #1
- {
- \@@_use_i_delimit_by_q_recursion_stop:nw
- {
- \@@_expand_store:n {#1}
- \@@_expand_loop:w
- }
- }
- { \@@_expand_accent:NN #1 }
- }
-% \end{macrocode}
-% Another list of exceptions: these ones take no arguments so are
-% easier to handle.
-% \begin{macrocode}
-\cs_new:Npn \@@_expand_letterlike:N #1
- {
- \exp_after:wN \@@_expand_letterlike:NN \exp_after:wN
- #1 \l_text_letterlike_tl
- \q_@@_recursion_tail \q_@@_recursion_stop
- }
-\cs_new:Npn \@@_expand_letterlike:NN #1#2
{
\@@_if_q_recursion_tail_stop_do:Nn #2
{ \@@_expand_cs:N #1 }
@@ -1299,7 +1256,7 @@
\@@_expand_loop:w
}
}
- { \@@_expand_letterlike:NN #1 }
+ { \@@_expand_accent:NN #1 }
}
% \end{macrocode}
% \LaTeXe{}'s \cs{protect} makes life interesting. Where possible, we
@@ -1495,13 +1452,13 @@
% \end{macro}
% \end{macro}
% \end{macro}
-% \end{macro}
-% \end{macro}
%
% \begin{macro}
% {
% \text_declare_expand_equivalent:Nn ,
-% \text_declare_expand_equivalent:cn
+% \text_declare_expand_equivalent:cn ,
+% \text_declare_expand_equivalent:Nx ,
+% \text_declare_expand_equivalent:cx
% }
% Create equivalents to allow replacement.
% \begin{macrocode}
@@ -1510,10 +1467,56 @@
\tl_clear_new:c { l_@@_expand_ \token_to_str:N #1 _tl }
\tl_set:cn { l_@@_expand_ \token_to_str:N #1 _tl } {#2}
}
-\cs_generate_variant:Nn \text_declare_expand_equivalent:Nn { c }
+\cs_generate_variant:Nn \text_declare_expand_equivalent:Nn { Nx }
+\cs_generate_variant:Nn \text_declare_expand_equivalent:Nn { c , cx }
% \end{macrocode}
% \end{macro}
%
+% \subsection{Accent and letter-like data for expandsion}
+%
+% For the letter-like commands, life is relatively easy: they are all
+% simply added as standard exceptions. The only oddity is \tn{SS}, which
+% gets converted to two letters.
+% \begin{macrocode}
+\cs_set_protected:Npn \@@_loop:Nn #1#2
+ {
+ \quark_if_recursion_tail_stop:N #1
+ \text_declare_expand_equivalent:Nx #1
+ {
+ \codepoint_generate:nn {"#2}
+ { \char_value_catcode:n {"#2} }
+ }
+ \@@_loop:Nn
+ }
+\@@_loop:Nn
+ \AA { 00C5 }
+ \AE { 00C6 }
+ \DH { 00D0 }
+ \DJ { 0110 }
+ \IJ { 0132 }
+ \L { 0141 }
+ \NG { 014A }
+ \O { 00D8 }
+ \OE { 0152 }
+ \TH { 00DE }
+ \aa { 00E5 }
+ \ae { 00E6 }
+ \dh { 00F0 }
+ \dj { 0111 }
+ \i { 0131 }
+ \j { 0237 }
+ \ij { 0132 }
+ \l { 0142 }
+ \ng { 014B }
+ \o { 00F8 }
+ \oe { 0153 }
+ \ss { 00DF }
+ \th { 00FE }
+ \q_recursion_tail ?
+ \q_recursion_stop
+\text_declare_expand_equivalent:Nn \SS { SS }
+% \end{macrocode}
+%
% \begin{macrocode}
%</package>
% \end{macrocode}
diff --git a/l3kernel/testfiles/m3text001.tlg b/l3kernel/testfiles/m3text001.tlg
index aca51bd87..0077f55ca 100644
--- a/l3kernel/testfiles/m3text001.tlg
+++ b/l3kernel/testfiles/m3text001.tlg
@@ -25,7 +25,7 @@ Opps not close token in $y = \sin \theta
============================================================
TEST 4: Letter-like commands
============================================================
-\AA \aa \ae \dh \ss \l \O
+^^c3^^85^^c3^^a5^^c3^^a6^^c3^^b0^^c3^^9f^^c5^^82^^c3^^98
============================================================
============================================================
TEST 5: Accents
diff --git a/l3kernel/testfiles/m3text002.luatex.tlg b/l3kernel/testfiles/m3text002.luatex.tlg
index 6256fe866..b026ee684 100644
--- a/l3kernel/testfiles/m3text002.luatex.tlg
+++ b/l3kernel/testfiles/m3text002.luatex.tlg
@@ -339,10 +339,10 @@ FUSSBALL
============================================================
TEST 27: Letter-like commands
============================================================
-\aa \aa \ae \dh \ss \l \o
-\AA \AA \AE \DH \SS \L \O
-\AA \aa \ae \dh \ss \l \o
-\AA \aa \ae \dh \ss \l \O
+^^e5^^e5^^e6^^f0^^dfł^^f8
+^^c5^^c5^^c6^^d0SSŁ^^d8
+^^c5^^c5^^c6^^d0SsŁ^^d8
+^^c5^^c5^^c6^^d0SsŁ^^d8
============================================================
============================================================
TEST 28: Accents
diff --git a/l3kernel/testfiles/m3text002.tlg b/l3kernel/testfiles/m3text002.tlg
index 5177226a9..5cbe50b00 100644
--- a/l3kernel/testfiles/m3text002.tlg
+++ b/l3kernel/testfiles/m3text002.tlg
@@ -339,10 +339,10 @@ FUSSBALL
============================================================
TEST 27: Letter-like commands
============================================================
-\aa \aa \ae \dh \ss \l \o
-\AA \AA \AE \DH \SS \L \O
-\AA \aa \ae \dh \ss \l \o
-\AA \aa \ae \dh \ss \l \O
+^^c3^^a5^^c3^^a5^^c3^^a6^^c3^^b0^^c3^^9f^^c5^^82^^c3^^b8
+^^c3^^85^^c3^^85^^c3^^86^^c3^^90SS^^c5^^81^^c3^^98
+^^c3^^85^^c3^^a5^^c3^^a6^^c3^^b0^^c3^^9f^^c5^^82^^c3^^b8
+^^c3^^85^^c3^^a5^^c3^^a6^^c3^^b0^^c3^^9f^^c5^^82^^c3^^98
============================================================
============================================================
TEST 28: Accents
diff --git a/l3kernel/testfiles/m3text002.xetex.tlg b/l3kernel/testfiles/m3text002.xetex.tlg
index 6256fe866..b026ee684 100644
--- a/l3kernel/testfiles/m3text002.xetex.tlg
+++ b/l3kernel/testfiles/m3text002.xetex.tlg
@@ -339,10 +339,10 @@ FUSSBALL
============================================================
TEST 27: Letter-like commands
============================================================
-\aa \aa \ae \dh \ss \l \o
-\AA \AA \AE \DH \SS \L \O
-\AA \aa \ae \dh \ss \l \o
-\AA \aa \ae \dh \ss \l \O
+^^e5^^e5^^e6^^f0^^dfł^^f8
+^^c5^^c5^^c6^^d0SSŁ^^d8
+^^c5^^c5^^c6^^d0SsŁ^^d8
+^^c5^^c5^^c6^^d0SsŁ^^d8
============================================================
============================================================
TEST 28: Accents
More information about the latex3-commits
mailing list.