[latex3-commits] [git/LaTeX3-latex3-latex3] main: Revert "Expand letter-like commands in \text_expand:n" (5627f4335)
Joseph Wright
joseph.wright at morningstar2.co.uk
Mon Feb 6 20:07:49 CET 2023
Repository : https://github.com/latex3/latex3
On branch : main
Link : https://github.com/latex3/latex3/commit/5627f4335f205d992857d024095e02f3c1a39c96
>---------------------------------------------------------------
commit 5627f4335f205d992857d024095e02f3c1a39c96
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date: Sun Feb 5 11:23:39 2023 +0000
Revert "Expand letter-like commands in \text_expand:n"
This reverts commit 1c6d0275c15cb3075e38da858b689e1a3b230b64.
>---------------------------------------------------------------
5627f4335f205d992857d024095e02f3c1a39c96
l3kernel/CHANGELOG.md | 7 --
l3kernel/doc/l3obsolete.txt | 1 -
l3kernel/l3text-purify.dtx | 74 ++++++++++++++++++++
l3kernel/l3text.dtx | 119 ++++++++++++++++----------------
l3kernel/testfiles/m3text001.tlg | 2 +-
l3kernel/testfiles/m3text002.luatex.tlg | 8 +--
l3kernel/testfiles/m3text002.tlg | 8 +--
l3kernel/testfiles/m3text002.xetex.tlg | 8 +--
8 files changed, 145 insertions(+), 82 deletions(-)
diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index c1b2d4e0e..2c6b3d478 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -8,16 +8,9 @@ this project uses date-based 'snapshot' version identifiers.
## [Unreleased]
### Changed
-- `\text_expand:n` now converts letter-like commands,
- e.g. `\ae`, to the UTF-8 equivalent: breaking change
- also removes `\l_text_letterlike_tl` (unused outside of
- the `expl3` kernel in TeX Live)
- Swap meaning of `el` and `el-x-iota` when case changing
Greek: match traditional LaTeX approach
-### Removed
-- `\l_text_letterlike_tl` (breaking change)
-
## [2023-02-02]
### Fixed
diff --git a/l3kernel/doc/l3obsolete.txt b/l3kernel/doc/l3obsolete.txt
index b1a258a4d..722f807a6 100644
--- a/l3kernel/doc/l3obsolete.txt
+++ b/l3kernel/doc/l3obsolete.txt
@@ -342,7 +342,6 @@ Function Date removed
\KV_process_space_removal_sanitize:NNn 2011-09-08
\l_iow_line_length_int 2013-01-08
\l_last_box 2012-05-11
-\l_text_letterlike_tl 2023-02-05
\l_tl_replace_toks 2011-09-08
\l_tmpa_toks 2011-09-08
\l_tmpb_toks 2011-09-08
diff --git a/l3kernel/l3text-purify.dtx b/l3kernel/l3text-purify.dtx
index a086dc060..a28b73fe5 100644
--- a/l3kernel/l3text-purify.dtx
+++ b/l3kernel/l3text-purify.dtx
@@ -454,6 +454,80 @@
%
% \subsection{Accent and letter-like data for purifying text}
%
+% In contrast to case changing, both $8$-bit and Unicode engines need
+% information for text purification to handle accents and letter-like
+% functions: these all need to be removed. However, the results are
+% of course engine-dependent.
+%
+% For the letter-like commands, life is relatively easy: they are all
+% simply added as standard exceptions. The only oddity is \tn{SS}, which
+% gets converted to two letters. (At some stage an alternative version
+% can presumably be added to \pkg{babel} or similar.)
+% \begin{macrocode}
+\bool_lazy_or:nnTF
+ { \sys_if_engine_luatex_p: }
+ { \sys_if_engine_xetex_p: }
+ {
+ \cs_set_protected:Npn \@@_loop:Nn #1#2
+ {
+ \quark_if_recursion_tail_stop:N #1
+ \text_declare_purify_equivalent:Nx #1
+ {
+ \char_generate:nn { "#2 }
+ { \char_value_catcode:n { "#2 } }
+ }
+ \@@_loop:Nn
+ }
+ }
+ {
+ \cs_set_protected:Npn \@@_loop:Nn #1#2
+ {
+ \quark_if_recursion_tail_stop:N #1
+ \text_declare_purify_equivalent:Nx #1
+ {
+ \exp_args:Ne \@@_tmp:n
+ { \__kernel_codepoint_to_bytes:n { "#2 } }
+ }
+ \@@_loop:Nn
+ }
+ \cs_set:Npn \@@_tmp:n #1 { \@@_tmp:nnnn #1 }
+ \cs_set:Npn \@@_tmp:nnnn #1#2#3#4
+ {
+ \exp_after:wN \exp_after:wN \exp_after:wN
+ \exp_not:N \char_generate:nn {#1} { 13 }
+ \exp_after:wN \exp_after:wN \exp_after:wN
+ \exp_not:N \char_generate:nn {#2} { 13 }
+ }
+ }
+\@@_loop:Nn
+ \AA { 00C5 }
+ \AE { 00C6 }
+ \DH { 00D0 }
+ \DJ { 0110 }
+ \IJ { 0132 }
+ \L { 0141 }
+ \NG { 014A }
+ \O { 00D8 }
+ \OE { 0152 }
+ \TH { 00DE }
+ \aa { 00E5 }
+ \ae { 00E6 }
+ \dh { 00F0 }
+ \dj { 0111 }
+ \i { 0131 }
+ \j { 0237 }
+ \ij { 0132 }
+ \l { 0142 }
+ \ng { 014B }
+ \o { 00F8 }
+ \oe { 0153 }
+ \ss { 00DF }
+ \th { 00FE }
+ \q_recursion_tail ?
+ \q_recursion_stop
+\text_declare_purify_equivalent:Nn \SS { SS }
+% \end{macrocode}
+%
% \begin{macro}[rEXP]{\@@_purify_accent:NN}
% Accent \textsc{licr} handling is a little more complex. Accents may exist
% as pre-composed codepoints or as independent glyphs. The former are all
diff --git a/l3kernel/l3text.dtx b/l3kernel/l3text.dtx
index c111068bc..6bd33f190 100644
--- a/l3kernel/l3text.dtx
+++ b/l3kernel/l3text.dtx
@@ -58,8 +58,7 @@
%
% \section{Expanding text}
%
-% \begin{function}[EXP, added = 2020-01-02, updated = 2023-02-05]
-% {\text_expand:n}
+% \begin{function}[EXP, added = 2020-01-02]{\text_expand:n}
% \begin{syntax}
% \cs{text_expand:n} \Arg{text}
% \end{syntax}
@@ -70,17 +69,15 @@
% \cs{l_text_math_delims_tl} or as the argument to commands listed
% in \cs{l_text_math_arg_tl}). Commands which are neither engine-
% nor \LaTeX{} protected are expanded exhaustively.
-% Any commands listed in \cs{l_text_expand_exclude_tl} and
-% \cs{l_text_accents_tl} are excluded from expansion. Letter-like
-% commands, e.g.~\cs{ae}, are converted to the UTF-8 equivalent.
+% Any commands listed in \cs{l_text_expand_exclude_tl},
+% \cs{l_text_accents_tl} and \cs{l_text_letterlike_tl} are excluded from
+% expansion.
% \end{function}
%
-% \begin{function}[added = 2020-01-22, updated = 2023-02-05]
+% \begin{function}[added = 2020-01-22]
% {
% \text_declare_expand_equivalent:Nn ,
-% \text_declare_expand_equivalent:Nx ,
-% \text_declare_expand_equivalent:cn ,
-% \text_declare_expand_equivalent:cx
+% \text_declare_expand_equivalent:cn
% }
% \begin{syntax}
% \cs{text_declare_expand_equivalent:Nn} \meta{cmd} \Arg{replacement}
@@ -264,6 +261,11 @@
% by expansion. (Defined only for the \LaTeXe{} package.)
% \end{variable}
%
+% \begin{variable}{\l_text_letterlike_tl}
+% Lists commands which represent letters; these are left unchanged by
+% expansion. (Defined only for the \LaTeXe{} package.)
+% \end{variable}
+%
% \begin{variable}{\l_text_math_arg_tl}
% Lists commands present in the \meta{text} where the argument of the
% command should be treated as math mode material. The treatment here is
@@ -792,13 +794,28 @@
%
% \subsection{Configuration variables}
%
-% \begin{variable}{\l_text_accents_tl}
+% \begin{variable}{\l_text_accents_tl, \l_text_letterlike_tl}
% Special cases for accents and letter-like symbols, which in some cases will
% need to be converted further.
% \begin{macrocode}
\tl_new:N \l_text_accents_tl
\tl_set:Nn \l_text_accents_tl
{ \` \' \^ \~ \= \u \. \" \r \H \v \d \c \k \b \t }
+\tl_new:N \l_text_letterlike_tl
+\tl_set:Nn \l_text_letterlike_tl
+ {
+ \AA \aa
+ \AE \ae
+ \DH \dh
+ \DJ \dj
+ \IJ \ij
+ \L \l
+ \NG \ng
+ \O \o
+ \OE \oe
+ \SS \ss
+ \TH \th
+ }
% \end{macrocode}
% \end{variable}
%
@@ -936,6 +953,8 @@
% \begin{macro}[EXP]{\@@_expand_exclude:Nnn}
% \begin{macro}[EXP]{\@@_expand_accent:N}
% \begin{macro}[EXP]{\@@_expand_accent:NN}
+% \begin{macro}[EXP]{\@@_expand_letterlike:N}
+% \begin{macro}[EXP]{\@@_expand_letterlike:NN}
% \begin{macro}[EXP]{\@@_expand_cs:N}
% \begin{macro}[EXP]{\@@_expand_protect:w}
% \begin{macro}[EXP]{\@@_expand_protect:N}
@@ -1247,7 +1266,7 @@
\cs_new:Npn \@@_expand_accent:NN #1#2
{
\@@_if_q_recursion_tail_stop_do:Nn #2
- { \@@_expand_cs:N #1 }
+ { \@@_expand_letterlike:N #1 }
\cs_if_eq:NNTF #2 #1
{
\@@_use_i_delimit_by_q_recursion_stop:nw
@@ -1259,6 +1278,30 @@
{ \@@_expand_accent:NN #1 }
}
% \end{macrocode}
+% Another list of exceptions: these ones take no arguments so are
+% easier to handle.
+% \begin{macrocode}
+\cs_new:Npn \@@_expand_letterlike:N #1
+ {
+ \exp_after:wN \@@_expand_letterlike:NN \exp_after:wN
+ #1 \l_text_letterlike_tl
+ \q_@@_recursion_tail \q_@@_recursion_stop
+ }
+\cs_new:Npn \@@_expand_letterlike:NN #1#2
+ {
+ \@@_if_q_recursion_tail_stop_do:Nn #2
+ { \@@_expand_cs:N #1 }
+ \cs_if_eq:NNTF #2 #1
+ {
+ \@@_use_i_delimit_by_q_recursion_stop:nw
+ {
+ \@@_expand_store:n {#1}
+ \@@_expand_loop:w
+ }
+ }
+ { \@@_expand_letterlike:NN #1 }
+ }
+% \end{macrocode}
% \LaTeXe{}'s \cs{protect} makes life interesting. Where possible, we
% simply remove it and replace with the \enquote{parent} command; of course,
% the \cs{protect} might be explicit, in which case we need to leave it alone.
@@ -1452,13 +1495,13 @@
% \end{macro}
% \end{macro}
% \end{macro}
+% \end{macro}
+% \end{macro}
%
% \begin{macro}
% {
% \text_declare_expand_equivalent:Nn ,
-% \text_declare_expand_equivalent:cn ,
-% \text_declare_expand_equivalent:Nx ,
-% \text_declare_expand_equivalent:cx
+% \text_declare_expand_equivalent:cn
% }
% Create equivalents to allow replacement.
% \begin{macrocode}
@@ -1467,56 +1510,10 @@
\tl_clear_new:c { l_@@_expand_ \token_to_str:N #1 _tl }
\tl_set:cn { l_@@_expand_ \token_to_str:N #1 _tl } {#2}
}
-\cs_generate_variant:Nn \text_declare_expand_equivalent:Nn { Nx }
-\cs_generate_variant:Nn \text_declare_expand_equivalent:Nn { c , cx }
+\cs_generate_variant:Nn \text_declare_expand_equivalent:Nn { c }
% \end{macrocode}
% \end{macro}
%
-% \subsection{Accent and letter-like data for expandsion}
-%
-% For the letter-like commands, life is relatively easy: they are all
-% simply added as standard exceptions. The only oddity is \tn{SS}, which
-% gets converted to two letters.
-% \begin{macrocode}
-\cs_set_protected:Npn \@@_loop:Nn #1#2
- {
- \quark_if_recursion_tail_stop:N #1
- \text_declare_expand_equivalent:Nx #1
- {
- \codepoint_generate:nn {"#2}
- { \char_value_catcode:n {"#2} }
- }
- \@@_loop:Nn
- }
-\@@_loop:Nn
- \AA { 00C5 }
- \AE { 00C6 }
- \DH { 00D0 }
- \DJ { 0110 }
- \IJ { 0132 }
- \L { 0141 }
- \NG { 014A }
- \O { 00D8 }
- \OE { 0152 }
- \TH { 00DE }
- \aa { 00E5 }
- \ae { 00E6 }
- \dh { 00F0 }
- \dj { 0111 }
- \i { 0131 }
- \j { 0237 }
- \ij { 0132 }
- \l { 0142 }
- \ng { 014B }
- \o { 00F8 }
- \oe { 0153 }
- \ss { 00DF }
- \th { 00FE }
- \q_recursion_tail ?
- \q_recursion_stop
-\text_declare_expand_equivalent:Nn \SS { SS }
-% \end{macrocode}
-%
% \begin{macrocode}
%</package>
% \end{macrocode}
diff --git a/l3kernel/testfiles/m3text001.tlg b/l3kernel/testfiles/m3text001.tlg
index 0077f55ca..aca51bd87 100644
--- a/l3kernel/testfiles/m3text001.tlg
+++ b/l3kernel/testfiles/m3text001.tlg
@@ -25,7 +25,7 @@ Opps not close token in $y = \sin \theta
============================================================
TEST 4: Letter-like commands
============================================================
-^^c3^^85^^c3^^a5^^c3^^a6^^c3^^b0^^c3^^9f^^c5^^82^^c3^^98
+\AA \aa \ae \dh \ss \l \O
============================================================
============================================================
TEST 5: Accents
diff --git a/l3kernel/testfiles/m3text002.luatex.tlg b/l3kernel/testfiles/m3text002.luatex.tlg
index b026ee684..6256fe866 100644
--- a/l3kernel/testfiles/m3text002.luatex.tlg
+++ b/l3kernel/testfiles/m3text002.luatex.tlg
@@ -339,10 +339,10 @@ FUSSBALL
============================================================
TEST 27: Letter-like commands
============================================================
-^^e5^^e5^^e6^^f0^^dfł^^f8
-^^c5^^c5^^c6^^d0SSŁ^^d8
-^^c5^^c5^^c6^^d0SsŁ^^d8
-^^c5^^c5^^c6^^d0SsŁ^^d8
+\aa \aa \ae \dh \ss \l \o
+\AA \AA \AE \DH \SS \L \O
+\AA \aa \ae \dh \ss \l \o
+\AA \aa \ae \dh \ss \l \O
============================================================
============================================================
TEST 28: Accents
diff --git a/l3kernel/testfiles/m3text002.tlg b/l3kernel/testfiles/m3text002.tlg
index 5cbe50b00..5177226a9 100644
--- a/l3kernel/testfiles/m3text002.tlg
+++ b/l3kernel/testfiles/m3text002.tlg
@@ -339,10 +339,10 @@ FUSSBALL
============================================================
TEST 27: Letter-like commands
============================================================
-^^c3^^a5^^c3^^a5^^c3^^a6^^c3^^b0^^c3^^9f^^c5^^82^^c3^^b8
-^^c3^^85^^c3^^85^^c3^^86^^c3^^90SS^^c5^^81^^c3^^98
-^^c3^^85^^c3^^a5^^c3^^a6^^c3^^b0^^c3^^9f^^c5^^82^^c3^^b8
-^^c3^^85^^c3^^a5^^c3^^a6^^c3^^b0^^c3^^9f^^c5^^82^^c3^^98
+\aa \aa \ae \dh \ss \l \o
+\AA \AA \AE \DH \SS \L \O
+\AA \aa \ae \dh \ss \l \o
+\AA \aa \ae \dh \ss \l \O
============================================================
============================================================
TEST 28: Accents
diff --git a/l3kernel/testfiles/m3text002.xetex.tlg b/l3kernel/testfiles/m3text002.xetex.tlg
index b026ee684..6256fe866 100644
--- a/l3kernel/testfiles/m3text002.xetex.tlg
+++ b/l3kernel/testfiles/m3text002.xetex.tlg
@@ -339,10 +339,10 @@ FUSSBALL
============================================================
TEST 27: Letter-like commands
============================================================
-^^e5^^e5^^e6^^f0^^dfł^^f8
-^^c5^^c5^^c6^^d0SSŁ^^d8
-^^c5^^c5^^c6^^d0SsŁ^^d8
-^^c5^^c5^^c6^^d0SsŁ^^d8
+\aa \aa \ae \dh \ss \l \o
+\AA \AA \AE \DH \SS \L \O
+\AA \aa \ae \dh \ss \l \o
+\AA \aa \ae \dh \ss \l \O
============================================================
============================================================
TEST 28: Accents
More information about the latex3-commits
mailing list.