[latex3-commits] [git/LaTeX3-latex3-latex3] main: Revert "Expand letter-like commands in \text_expand:n" (5627f4335)

Joseph Wright joseph.wright at morningstar2.co.uk
Mon Feb 6 20:07:49 CET 2023


Repository : https://github.com/latex3/latex3
On branch  : main
Link       : https://github.com/latex3/latex3/commit/5627f4335f205d992857d024095e02f3c1a39c96

>---------------------------------------------------------------

commit 5627f4335f205d992857d024095e02f3c1a39c96
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Sun Feb 5 11:23:39 2023 +0000

    Revert "Expand letter-like commands in \text_expand:n"
    
    This reverts commit 1c6d0275c15cb3075e38da858b689e1a3b230b64.


>---------------------------------------------------------------

5627f4335f205d992857d024095e02f3c1a39c96
 l3kernel/CHANGELOG.md                   |   7 --
 l3kernel/doc/l3obsolete.txt             |   1 -
 l3kernel/l3text-purify.dtx              |  74 ++++++++++++++++++++
 l3kernel/l3text.dtx                     | 119 ++++++++++++++++----------------
 l3kernel/testfiles/m3text001.tlg        |   2 +-
 l3kernel/testfiles/m3text002.luatex.tlg |   8 +--
 l3kernel/testfiles/m3text002.tlg        |   8 +--
 l3kernel/testfiles/m3text002.xetex.tlg  |   8 +--
 8 files changed, 145 insertions(+), 82 deletions(-)

diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index c1b2d4e0e..2c6b3d478 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -8,16 +8,9 @@ this project uses date-based 'snapshot' version identifiers.
 ## [Unreleased]
 
 ### Changed
-- `\text_expand:n` now converts letter-like commands,
-  e.g. `\ae`, to the UTF-8 equivalent: breaking change
-  also removes `\l_text_letterlike_tl` (unused outside of
-  the `expl3` kernel in TeX Live)
 - Swap meaning of `el` and `el-x-iota` when case changing
   Greek: match traditional LaTeX approach
 
-### Removed
-- `\l_text_letterlike_tl` (breaking change)
-
 ## [2023-02-02]
 
 ### Fixed
diff --git a/l3kernel/doc/l3obsolete.txt b/l3kernel/doc/l3obsolete.txt
index b1a258a4d..722f807a6 100644
--- a/l3kernel/doc/l3obsolete.txt
+++ b/l3kernel/doc/l3obsolete.txt
@@ -342,7 +342,6 @@ Function                                      Date removed
 \KV_process_space_removal_sanitize:NNn          2011-09-08
 \l_iow_line_length_int                          2013-01-08
 \l_last_box                                     2012-05-11
-\l_text_letterlike_tl                           2023-02-05
 \l_tl_replace_toks                              2011-09-08
 \l_tmpa_toks                                    2011-09-08
 \l_tmpb_toks                                    2011-09-08
diff --git a/l3kernel/l3text-purify.dtx b/l3kernel/l3text-purify.dtx
index a086dc060..a28b73fe5 100644
--- a/l3kernel/l3text-purify.dtx
+++ b/l3kernel/l3text-purify.dtx
@@ -454,6 +454,80 @@
 %
 % \subsection{Accent and letter-like data for purifying text}
 %
+% In contrast to case changing, both $8$-bit and Unicode engines need
+% information for text purification to handle accents and letter-like
+% functions: these all need to be removed. However, the results are
+% of course engine-dependent.
+%
+% For the letter-like commands, life is relatively easy: they are all
+% simply added as standard exceptions. The only oddity is \tn{SS}, which
+% gets converted to two letters. (At some stage an alternative version
+% can presumably be added to \pkg{babel} or similar.)
+%    \begin{macrocode}
+\bool_lazy_or:nnTF
+  { \sys_if_engine_luatex_p: }
+  { \sys_if_engine_xetex_p: }
+  {
+    \cs_set_protected:Npn \@@_loop:Nn #1#2
+      {
+        \quark_if_recursion_tail_stop:N #1
+        \text_declare_purify_equivalent:Nx #1
+          {
+            \char_generate:nn { "#2 }
+              { \char_value_catcode:n { "#2 } }
+          }
+        \@@_loop:Nn
+      }
+  }
+  {
+    \cs_set_protected:Npn \@@_loop:Nn #1#2
+      {
+        \quark_if_recursion_tail_stop:N #1
+        \text_declare_purify_equivalent:Nx #1
+          {
+            \exp_args:Ne \@@_tmp:n
+              { \__kernel_codepoint_to_bytes:n { "#2 } }
+          }
+        \@@_loop:Nn
+      }
+    \cs_set:Npn \@@_tmp:n #1 { \@@_tmp:nnnn #1 }
+    \cs_set:Npn \@@_tmp:nnnn #1#2#3#4
+      {
+        \exp_after:wN \exp_after:wN \exp_after:wN
+          \exp_not:N \char_generate:nn {#1} { 13 }
+        \exp_after:wN \exp_after:wN \exp_after:wN
+          \exp_not:N \char_generate:nn {#2} { 13 }
+      }
+  }
+\@@_loop:Nn
+  \AA { 00C5 }
+  \AE { 00C6 }
+  \DH { 00D0 }
+  \DJ { 0110 }
+  \IJ { 0132 }
+  \L  { 0141 }
+  \NG { 014A }
+  \O  { 00D8 }
+  \OE { 0152 }
+  \TH { 00DE }
+  \aa { 00E5 }
+  \ae { 00E6 }
+  \dh { 00F0 }
+  \dj { 0111 }
+  \i  { 0131 }
+  \j  { 0237 }
+  \ij { 0132 }
+  \l  { 0142 }
+  \ng { 014B }
+  \o  { 00F8 }
+  \oe { 0153 }
+  \ss { 00DF }
+  \th { 00FE }
+  \q_recursion_tail ?
+  \q_recursion_stop
+\text_declare_purify_equivalent:Nn \SS { SS }
+%    \end{macrocode}
+%
 % \begin{macro}[rEXP]{\@@_purify_accent:NN}
 %   Accent \textsc{licr} handling is a little more complex. Accents may exist
 %   as pre-composed codepoints or as independent glyphs. The former are all
diff --git a/l3kernel/l3text.dtx b/l3kernel/l3text.dtx
index c111068bc..6bd33f190 100644
--- a/l3kernel/l3text.dtx
+++ b/l3kernel/l3text.dtx
@@ -58,8 +58,7 @@
 %
 % \section{Expanding text}
 %
-% \begin{function}[EXP, added = 2020-01-02, updated = 2023-02-05]
-%   {\text_expand:n}
+% \begin{function}[EXP, added = 2020-01-02]{\text_expand:n}
 %   \begin{syntax}
 %     \cs{text_expand:n} \Arg{text}
 %   \end{syntax}
@@ -70,17 +69,15 @@
 %   \cs{l_text_math_delims_tl} or as the argument to commands listed
 %   in \cs{l_text_math_arg_tl}). Commands which are neither engine-
 %   nor \LaTeX{} protected are expanded exhaustively.
-%   Any commands listed in \cs{l_text_expand_exclude_tl} and
-%   \cs{l_text_accents_tl} are excluded from expansion. Letter-like
-%   commands, e.g.~\cs{ae}, are converted to the UTF-8 equivalent.
+%   Any commands listed in \cs{l_text_expand_exclude_tl},
+%   \cs{l_text_accents_tl} and \cs{l_text_letterlike_tl} are excluded from
+%   expansion.
 % \end{function}
 %
-% \begin{function}[added = 2020-01-22, updated = 2023-02-05]
+% \begin{function}[added = 2020-01-22]
 %   {
 %     \text_declare_expand_equivalent:Nn ,
-%     \text_declare_expand_equivalent:Nx ,
-%     \text_declare_expand_equivalent:cn ,
-%     \text_declare_expand_equivalent:cx
+%     \text_declare_expand_equivalent:cn
 %   }
 %   \begin{syntax}
 %     \cs{text_declare_expand_equivalent:Nn} \meta{cmd} \Arg{replacement}
@@ -264,6 +261,11 @@
 %   by expansion. (Defined only for the \LaTeXe{} package.)
 % \end{variable}
 %
+% \begin{variable}{\l_text_letterlike_tl}
+%   Lists commands which represent  letters; these are left unchanged by
+%   expansion. (Defined only for the \LaTeXe{} package.)
+% \end{variable}
+%
 % \begin{variable}{\l_text_math_arg_tl}
 %   Lists commands present in the \meta{text} where the argument of the
 %   command should be treated as math mode material. The treatment here is
@@ -792,13 +794,28 @@
 %
 % \subsection{Configuration variables}
 %
-% \begin{variable}{\l_text_accents_tl}
+% \begin{variable}{\l_text_accents_tl, \l_text_letterlike_tl}
 %   Special cases for accents and letter-like symbols, which in some cases will
 %   need to be converted further.
 %    \begin{macrocode}
 \tl_new:N \l_text_accents_tl
 \tl_set:Nn \l_text_accents_tl
   { \` \' \^ \~ \= \u \. \" \r \H \v \d \c \k \b \t }
+\tl_new:N \l_text_letterlike_tl
+\tl_set:Nn \l_text_letterlike_tl
+  {
+    \AA \aa
+    \AE \ae
+    \DH \dh
+    \DJ \dj
+    \IJ \ij
+    \L  \l
+    \NG \ng
+    \O  \o
+    \OE \oe
+    \SS \ss
+    \TH \th
+  }
 %    \end{macrocode}
 % \end{variable}
 %
@@ -936,6 +953,8 @@
 % \begin{macro}[EXP]{\@@_expand_exclude:Nnn}
 % \begin{macro}[EXP]{\@@_expand_accent:N}
 % \begin{macro}[EXP]{\@@_expand_accent:NN}
+% \begin{macro}[EXP]{\@@_expand_letterlike:N}
+% \begin{macro}[EXP]{\@@_expand_letterlike:NN}
 % \begin{macro}[EXP]{\@@_expand_cs:N}
 % \begin{macro}[EXP]{\@@_expand_protect:w}
 % \begin{macro}[EXP]{\@@_expand_protect:N}
@@ -1247,7 +1266,7 @@
 \cs_new:Npn \@@_expand_accent:NN #1#2
   {
     \@@_if_q_recursion_tail_stop_do:Nn #2
-      { \@@_expand_cs:N #1 }
+      { \@@_expand_letterlike:N #1 }
     \cs_if_eq:NNTF #2 #1
       {
         \@@_use_i_delimit_by_q_recursion_stop:nw
@@ -1259,6 +1278,30 @@
       { \@@_expand_accent:NN #1 }
   }
 %    \end{macrocode}
+%   Another list of exceptions: these ones take no arguments so are
+%   easier to handle.
+%    \begin{macrocode}
+\cs_new:Npn \@@_expand_letterlike:N #1
+  {
+    \exp_after:wN \@@_expand_letterlike:NN \exp_after:wN
+      #1 \l_text_letterlike_tl
+      \q_@@_recursion_tail \q_@@_recursion_stop
+  }
+\cs_new:Npn \@@_expand_letterlike:NN #1#2
+  {
+    \@@_if_q_recursion_tail_stop_do:Nn #2
+      { \@@_expand_cs:N #1 }
+    \cs_if_eq:NNTF #2 #1
+      {
+        \@@_use_i_delimit_by_q_recursion_stop:nw
+          {
+            \@@_expand_store:n {#1}
+            \@@_expand_loop:w
+          }
+      }
+      { \@@_expand_letterlike:NN #1 }
+  }
+%    \end{macrocode}
 %   \LaTeXe{}'s \cs{protect} makes life interesting. Where possible, we
 %   simply remove it and replace with the \enquote{parent} command; of course,
 %   the \cs{protect} might be explicit, in which case we need to leave it alone.
@@ -1452,13 +1495,13 @@
 % \end{macro}
 % \end{macro}
 % \end{macro}
+% \end{macro}
+% \end{macro}
 %
 % \begin{macro}
 %   {
 %     \text_declare_expand_equivalent:Nn ,
-%     \text_declare_expand_equivalent:cn ,
-%     \text_declare_expand_equivalent:Nx ,
-%     \text_declare_expand_equivalent:cx
+%     \text_declare_expand_equivalent:cn
 %   }
 %  Create equivalents to allow replacement.
 %    \begin{macrocode}
@@ -1467,56 +1510,10 @@
     \tl_clear_new:c { l_@@_expand_ \token_to_str:N #1 _tl }
     \tl_set:cn { l_@@_expand_ \token_to_str:N #1 _tl } {#2}
   }
-\cs_generate_variant:Nn \text_declare_expand_equivalent:Nn { Nx }
-\cs_generate_variant:Nn \text_declare_expand_equivalent:Nn { c , cx }
+\cs_generate_variant:Nn \text_declare_expand_equivalent:Nn { c }
 %    \end{macrocode}
 % \end{macro}
 %
-% \subsection{Accent and letter-like data for expandsion}
-%
-% For the letter-like commands, life is relatively easy: they are all
-% simply added as standard exceptions. The only oddity is \tn{SS}, which
-% gets converted to two letters.
-%    \begin{macrocode}
-\cs_set_protected:Npn \@@_loop:Nn #1#2
-  {
-    \quark_if_recursion_tail_stop:N #1
-    \text_declare_expand_equivalent:Nx #1
-      {
-        \codepoint_generate:nn {"#2}
-          { \char_value_catcode:n {"#2} }
-      }
-    \@@_loop:Nn
-  }
-\@@_loop:Nn
-  \AA { 00C5 }
-  \AE { 00C6 }
-  \DH { 00D0 }
-  \DJ { 0110 }
-  \IJ { 0132 }
-  \L  { 0141 }
-  \NG { 014A }
-  \O  { 00D8 }
-  \OE { 0152 }
-  \TH { 00DE }
-  \aa { 00E5 }
-  \ae { 00E6 }
-  \dh { 00F0 }
-  \dj { 0111 }
-  \i  { 0131 }
-  \j  { 0237 }
-  \ij { 0132 }
-  \l  { 0142 }
-  \ng { 014B }
-  \o  { 00F8 }
-  \oe { 0153 }
-  \ss { 00DF }
-  \th { 00FE }
-  \q_recursion_tail ?
-  \q_recursion_stop
-\text_declare_expand_equivalent:Nn \SS { SS }
-%    \end{macrocode}
-%
 %    \begin{macrocode}
 %</package>
 %    \end{macrocode}
diff --git a/l3kernel/testfiles/m3text001.tlg b/l3kernel/testfiles/m3text001.tlg
index 0077f55ca..aca51bd87 100644
--- a/l3kernel/testfiles/m3text001.tlg
+++ b/l3kernel/testfiles/m3text001.tlg
@@ -25,7 +25,7 @@ Opps not close token in $y = \sin \theta
 ============================================================
 TEST 4: Letter-like commands
 ============================================================
-^^c3^^85^^c3^^a5^^c3^^a6^^c3^^b0^^c3^^9f^^c5^^82^^c3^^98
+\AA \aa \ae \dh \ss \l \O 
 ============================================================
 ============================================================
 TEST 5: Accents
diff --git a/l3kernel/testfiles/m3text002.luatex.tlg b/l3kernel/testfiles/m3text002.luatex.tlg
index b026ee684..6256fe866 100644
--- a/l3kernel/testfiles/m3text002.luatex.tlg
+++ b/l3kernel/testfiles/m3text002.luatex.tlg
@@ -339,10 +339,10 @@ FUSSBALL
 ============================================================
 TEST 27: Letter-like commands
 ============================================================
-^^e5^^e5^^e6^^f0^^dfł^^f8
-^^c5^^c5^^c6^^d0SSŁ^^d8
-^^c5^^c5^^c6^^d0SsŁ^^d8
-^^c5^^c5^^c6^^d0SsŁ^^d8
+\aa \aa \ae \dh \ss \l \o 
+\AA \AA \AE \DH \SS \L \O 
+\AA \aa \ae \dh \ss \l \o 
+\AA \aa \ae \dh \ss \l \O 
 ============================================================
 ============================================================
 TEST 28: Accents
diff --git a/l3kernel/testfiles/m3text002.tlg b/l3kernel/testfiles/m3text002.tlg
index 5cbe50b00..5177226a9 100644
--- a/l3kernel/testfiles/m3text002.tlg
+++ b/l3kernel/testfiles/m3text002.tlg
@@ -339,10 +339,10 @@ FUSSBALL
 ============================================================
 TEST 27: Letter-like commands
 ============================================================
-^^c3^^a5^^c3^^a5^^c3^^a6^^c3^^b0^^c3^^9f^^c5^^82^^c3^^b8
-^^c3^^85^^c3^^85^^c3^^86^^c3^^90SS^^c5^^81^^c3^^98
-^^c3^^85^^c3^^a5^^c3^^a6^^c3^^b0^^c3^^9f^^c5^^82^^c3^^b8
-^^c3^^85^^c3^^a5^^c3^^a6^^c3^^b0^^c3^^9f^^c5^^82^^c3^^98
+\aa \aa \ae \dh \ss \l \o 
+\AA \AA \AE \DH \SS \L \O 
+\AA \aa \ae \dh \ss \l \o 
+\AA \aa \ae \dh \ss \l \O 
 ============================================================
 ============================================================
 TEST 28: Accents
diff --git a/l3kernel/testfiles/m3text002.xetex.tlg b/l3kernel/testfiles/m3text002.xetex.tlg
index b026ee684..6256fe866 100644
--- a/l3kernel/testfiles/m3text002.xetex.tlg
+++ b/l3kernel/testfiles/m3text002.xetex.tlg
@@ -339,10 +339,10 @@ FUSSBALL
 ============================================================
 TEST 27: Letter-like commands
 ============================================================
-^^e5^^e5^^e6^^f0^^dfł^^f8
-^^c5^^c5^^c6^^d0SSŁ^^d8
-^^c5^^c5^^c6^^d0SsŁ^^d8
-^^c5^^c5^^c6^^d0SsŁ^^d8
+\aa \aa \ae \dh \ss \l \o 
+\AA \AA \AE \DH \SS \L \O 
+\AA \aa \ae \dh \ss \l \o 
+\AA \aa \ae \dh \ss \l \O 
 ============================================================
 ============================================================
 TEST 28: Accents





More information about the latex3-commits mailing list.