[latex3-commits] [git/LaTeX3-latex3-latex3] main: Revert "Convert all accents to Unicode in supporting engines" (e9ca80fcd)

Mon Feb 6 20:07:29 CET 2023

Repository : https://github.com/latex3/latex3
On branch  : main
Link       : https://github.com/latex3/latex3/commit/e9ca80fcd881e67d32eb028ad472e61b34d50026

>---------------------------------------------------------------

commit e9ca80fcd881e67d32eb028ad472e61b34d50026
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Sun Feb 5 17:02:31 2023 +0000

    Revert "Convert all accents to Unicode in supporting engines"
    
    This reverts commit 60e284652c21844303f80df31eee59180795eb74.


>---------------------------------------------------------------

e9ca80fcd881e67d32eb028ad472e61b34d50026
 l3kernel/l3text-purify.dtx              | 83 ++++++++++++++++-----------------
 l3kernel/l3text.dtx                     | 57 +++-------------------
 l3kernel/testfiles/m3text001.luatex.tlg |  2 +-
 l3kernel/testfiles/m3text001.xetex.tlg  |  2 +-
 l3kernel/testfiles/m3text002.luatex.tlg |  8 ++--
 l3kernel/testfiles/m3text002.xetex.tlg  |  8 ++--
 6 files changed, 55 insertions(+), 105 deletions(-)

diff --git a/l3kernel/l3text-purify.dtx b/l3kernel/l3text-purify.dtx
index 0599f7f8c..c547cd14b 100644
--- a/l3kernel/l3text-purify.dtx
+++ b/l3kernel/l3text-purify.dtx
@@ -459,56 +459,51 @@
 %   the $8$-bit engines need these leaving as commands for typesetting.
 %   However, we now need to make those into real Unicode codepoints: they
 %   come after the letter, so there is a shuffle as well as a change of
-%   data. This only applies to non-Unicode engines.
+%   data.
 %    \begin{macrocode}
-\bool_lazy_or:nnF
-  { \sys_if_engine_luatex_p: }
-  { \sys_if_engine_xetex_p: }
+\cs_new:Npn \@@_purify_accent:NN #1#2
   {
-    \cs_new:Npn \@@_purify_accent:NN #1#2
+    \cs_if_exist:cTF
+      { c_@@_purify_ \token_to_str:N #1 _ \token_to_str:N #2 _tl }
       {
-        \cs_if_exist:cTF
+        \exp_not:v
           { c_@@_purify_ \token_to_str:N #1 _ \token_to_str:N #2 _tl }
-          {
-            \exp_not:v
-              { c_@@_purify_ \token_to_str:N #1 _ \token_to_str:N #2 _tl }
-          }
-          {
-            \exp_not:n {#2}
-            \exp_not:v { c_@@_purify_ \token_to_str:N #1 _tl }
-          }
       }
-    \tl_map_inline:nn { \` \' \^ \~ \= \u \. \" \r \H \v \d \c \k \b \t }
-      { \text_declare_purify_equivalent:Nn #1 { \@@_purify_accent:NN #1 } }
-    \group_begin:
-      \cs_set_protected:Npn \@@_loop:Nn #1#2
-        {
-          \quark_if_recursion_tail_stop:N #1
-          \tl_const:cx { c_@@_purify_ \token_to_str:N #1 _tl }
-            { \codepoint_generate:nn {"#2} { \char_value_catcode:n { "#2 } } }
-          \@@_loop:Nn
-        }
-      \@@_loop:Nn
-        \` { 0300 }
-        \' { 0301 }
-        \^ { 0302 }
-        \~ { 0303 }
-        \= { 0304 }
-        \u { 0306 }
-        \. { 0307 }
-        \" { 0308 }
-        \r { 030A }
-        \H { 030B }
-        \v { 030C }
-        \d { 0323 }
-        \c { 0327 }
-        \k { 0328 }
-        \b { 0331 }
-        \t { 0361 }
-        \q_recursion_tail { }
-        \q_recursion_stop
-    \group_end:
+      {
+        \exp_not:n {#2}
+        \exp_not:v { c_@@_purify_ \token_to_str:N #1 _tl }
+      }
   }
+\tl_map_inline:nn { \` \' \^ \~ \= \u \. \" \r \H \v \d \c \k \b \t }
+  { \text_declare_purify_equivalent:Nn #1 { \@@_purify_accent:NN #1 } }
+\group_begin:
+  \cs_set_protected:Npn \@@_loop:Nn #1#2
+    {
+      \quark_if_recursion_tail_stop:N #1
+      \tl_const:cx { c_@@_purify_ \token_to_str:N #1 _tl }
+        { \codepoint_generate:nn {"#2} { \char_value_catcode:n { "#2 } } }
+      \@@_loop:Nn
+    }
+  \@@_loop:Nn
+    \` { 0300 }
+    \' { 0301 }
+    \^ { 0302 }
+    \~ { 0303 }
+    \= { 0304 }
+    \u { 0306 }
+    \. { 0307 }
+    \" { 0308 }
+    \r { 030A }
+    \H { 030B }
+    \v { 030C }
+    \d { 0323 }
+    \c { 0327 }
+    \k { 0328 }
+    \b { 0331 }
+    \t { 0361 }
+    \q_recursion_tail { }
+    \q_recursion_stop
+\group_end:
 %    \end{macrocode}
 % \end{macro}
 %
diff --git a/l3kernel/l3text.dtx b/l3kernel/l3text.dtx
index 9655809f7..4d7d2f2a1 100644
--- a/l3kernel/l3text.dtx
+++ b/l3kernel/l3text.dtx
@@ -73,8 +73,6 @@
 %   Any commands listed in \cs{l_text_expand_exclude_tl} are excluded from
 %   expansion. Letter-like commands, e.g.~\cs{ae}, and accent commands creating
 %   pre-composed UTF-8 codepoints are converted to the UTF-8 equivalents.
-%   For Unicode engines, all accents are converted to the appropriate combining
-%   characters.
 % \end{function}
 %
 % \begin{function}[added = 2020-01-22, updated = 2023-02-05]
@@ -1434,16 +1432,15 @@
 %
 % \subsection{Accent and letter-like data for expandsion}
 %
-% \begin{macro}[rEXP]{\@@_expand_accent:Nn, \@@_expand_accent_aux:Nn}
+% \begin{macro}[rEXP]{\@@_expand_accent:NN}
 %   Accent \textsc{licr} handling is a little more complex. Accents may exist
 %   as pre-composed codepoints or as independent glyphs. At the expansion
 %   stage, only pre-composed glyphs can be processed: for \emph{ad hoc} accents,
-%   we have to retain the command version for $8$-bit typesetting. On the
-%   other hand, some text processing will not work properly with such
-%   command-based accents, so we convert to combining characters if there is
-%   engine support.
+%   we have to retain the command version for $8$-bit typesetting. So at this
+%   stage we do not remove everything (see the purification code for the
+%   action once we are definitely past typesetting).
 %    \begin{macrocode}
-\cs_new:Npn \@@_expand_accent:Nn #1#2
+\cs_new:Npn \@@_expand_accent:NN #1#2
   {
     \cs_if_exist:cTF
       { c_@@_expand_ \token_to_str:N #1 _ \token_to_str:N #2 _tl }
@@ -1451,52 +1448,10 @@
         \exp_not:v
           { c_@@_expand_ \token_to_str:N #1 _ \token_to_str:N #2 _tl }
       }
-      { \@@_expand_accent_aux:Nn #1 {#2} }
-  }
-\bool_lazy_or:nnTF
-  { \sys_if_engine_luatex_p: }
-  { \sys_if_engine_xetex_p: }
-  {
-    \cs_new:Npn \@@_expand_accent_aux:Nn #1#2
-      {
-        #2
-        \use:c { c_@@_expand_ \token_to_str:N #1 _tl }
-      }
-    \group_begin:
-      \cs_set_protected:Npn \@@_loop:Nn #1#2
-        {
-          \quark_if_recursion_tail_stop:N #1
-          \tl_const:cx { c_@@_expand_ \token_to_str:N #1 _tl }
-            { \codepoint_generate:nn {"#2} { \char_value_catcode:n { "#2 } } }
-          \@@_loop:Nn
-        }
-      \@@_loop:Nn
-        \` { 0300 }
-        \' { 0301 }
-        \^ { 0302 }
-        \~ { 0303 }
-        \= { 0304 }
-        \u { 0306 }
-        \. { 0307 }
-        \" { 0308 }
-        \r { 030A }
-        \H { 030B }
-        \v { 030C }
-        \d { 0323 }
-        \c { 0327 }
-        \k { 0328 }
-        \b { 0331 }
-        \t { 0361 }
-        \q_recursion_tail { }
-        \q_recursion_stop
-    \group_end:
-  }
-  {
-    \cs_new:Npn \@@_expand_accent_aux:Nn #1#2
       { \exp_not:n {#1} {#2} }
   }
 \tl_map_inline:nn { \` \' \^ \~ \= \u \. \" \r \H \v \d \c \k \b \t }
-  { \text_declare_expand_equivalent:Nn #1 { \@@_expand_accent:Nn #1 } }
+  { \text_declare_expand_equivalent:Nn #1 { \@@_expand_accent:NN #1 } }
 %    \end{macrocode}
 %   The list of pre-composed accent characters here is taken from
 %   \texttt{puenc.def}. All of the pre-composed cases take a single letter
diff --git a/l3kernel/testfiles/m3text001.luatex.tlg b/l3kernel/testfiles/m3text001.luatex.tlg
index 67ae745ae..aa4607de4 100644
--- a/l3kernel/testfiles/m3text001.luatex.tlg
+++ b/l3kernel/testfiles/m3text001.luatex.tlg
@@ -30,7 +30,7 @@ TEST 4: Letter-like commands
 ============================================================
 TEST 5: Accents
 ============================================================
-^^e4ĕi̋ǒu̇
+^^e4ĕ\H {i}ǒ\.{u}
 ============================================================
 ============================================================
 TEST 6: Implicit tokens
diff --git a/l3kernel/testfiles/m3text001.xetex.tlg b/l3kernel/testfiles/m3text001.xetex.tlg
index 67ae745ae..aa4607de4 100644
--- a/l3kernel/testfiles/m3text001.xetex.tlg
+++ b/l3kernel/testfiles/m3text001.xetex.tlg
@@ -30,7 +30,7 @@ TEST 4: Letter-like commands
 ============================================================
 TEST 5: Accents
 ============================================================
-^^e4ĕi̋ǒu̇
+^^e4ĕ\H {i}ǒ\.{u}
 ============================================================
 ============================================================
 TEST 6: Implicit tokens
diff --git a/l3kernel/testfiles/m3text002.luatex.tlg b/l3kernel/testfiles/m3text002.luatex.tlg
index 6f4570b0c..10708fb9b 100644
--- a/l3kernel/testfiles/m3text002.luatex.tlg
+++ b/l3kernel/testfiles/m3text002.luatex.tlg
@@ -347,10 +347,10 @@ TEST 27: Letter-like commands
 ============================================================
 TEST 28: Accents
 ============================================================
-^^e4ĕi̋ǒu̇
-^^c4ĔI̋ǑU̇
-^^c4ĔI̋ǒu̇
-^^c4ĔI̋ǒu̇
+^^e4ĕ\H {i}ǒ\.{u}
+^^c4Ĕ\H {I}Ǒ\.{U}
+^^c4Ĕ\H {I}ǒ\.{u}
+^^c4Ĕ\H {I}ǒ\.{u}
 ============================================================
 ============================================================
 TEST 29: Active chars
diff --git a/l3kernel/testfiles/m3text002.xetex.tlg b/l3kernel/testfiles/m3text002.xetex.tlg
index 6f4570b0c..10708fb9b 100644
--- a/l3kernel/testfiles/m3text002.xetex.tlg
+++ b/l3kernel/testfiles/m3text002.xetex.tlg
@@ -347,10 +347,10 @@ TEST 27: Letter-like commands
 ============================================================
 TEST 28: Accents
 ============================================================
-^^e4ĕi̋ǒu̇
-^^c4ĔI̋ǑU̇
-^^c4ĔI̋ǒu̇
-^^c4ĔI̋ǒu̇
+^^e4ĕ\H {i}ǒ\.{u}
+^^c4Ĕ\H {I}Ǒ\.{U}
+^^c4Ĕ\H {I}ǒ\.{u}
+^^c4Ĕ\H {I}ǒ\.{u}
 ============================================================
 ============================================================
 TEST 29: Active chars