[latex3-commits] [git/LaTeX3-latex3-latex3] main: Preserve catcode when case changing 8-bit chars < "80 (a6aeb66e7)

Joseph Wright joseph.wright at morningstar2.co.uk
Wed Nov 9 09:51:17 CET 2022


Repository : https://github.com/latex3/latex3
On branch  : main
Link       : https://github.com/latex3/latex3/commit/a6aeb66e75ae448d1d2927e9c20a7f5812d21932

>---------------------------------------------------------------

commit a6aeb66e75ae448d1d2927e9c20a7f5812d21932
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Wed Nov 9 08:48:18 2022 +0000

    Preserve catcode when case changing 8-bit chars < "80
    
    As discussed with the team.
    Note that active chars already have independent handling.


>---------------------------------------------------------------

a6aeb66e75ae448d1d2927e9c20a7f5812d21932
 l3kernel/l3text-case.dtx | 44 +++++++++++++++++++++++++++++++++-----------
 l3kernel/l3unicode.dtx   | 16 ++++++++--------
 2 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/l3kernel/l3text-case.dtx b/l3kernel/l3text-case.dtx
index 12c18e2f9..ff8c2640d 100644
--- a/l3kernel/l3text-case.dtx
+++ b/l3kernel/l3text-case.dtx
@@ -704,21 +704,43 @@
   {
     \use:e { \@@_change_case_codepoint_aux:nnnn #1 {#2} }
   }
-\cs_new:Npn \@@_change_case_codepoint_aux:nnnn #1#2#3#4
+%    \end{macrocode}
+%   We need to ensure that only valid catcode-extraction is attempted. That's
+%   fine with Unicode engines but needs a bit of work with 8-bit ones. The
+%   logic is that if the original codepoint was in the ASCII range, we keep
+%   the catcode. Otherwise, if the target is in the ASCII range, we use
+%   the standard catcode. If neither are true, we set as 13 on the grounds that
+%   this will be what is used anyway!
+%    \begin{macrocode}
+\cs_new:Npx \@@_change_case_codepoint_aux:nnnn #1#2#3#4
   {
-    \@@_codepoint_compare:nNnTF {#4} = {#1}
-      { \exp_not:n {#4} }
+    \exp_not:N \@@_codepoint_compare:nNnTF {#4} = {#1}
+      { \exp_not:N \exp_not:n {#4} }
       {
-        \codepoint_generate:nn {#1}
-          { \@@_char_catcode:N #4 }
-        \tl_if_blank:nF {#2}
+        \exp_not:N \codepoint_generate:nn {#1}
+          {
+            \bool_lazy_or:nnTF
+              { \sys_if_engine_luatex_p: }
+              { \sys_if_engine_xetex_p: }
+              { \exp_not:N  \@@_char_catcode:N #4 }
+              {
+                \exp_not:N \@@_codepoint_compare:nNnTF {#4} < { "80 }
+                  { \exp_not:N \@@_char_catcode:N #4 }
+                  {
+                    \exp_not:N \int_compare:nNnTF {#1} < { "80 }
+                      { \exp_not:N \char_value_catcode:n {#1} }
+                      { 13 }
+                  }
+              }
+          }
+        \exp_not:N \tl_if_blank:nF {#2}
           {
-            \codepoint_generate:nn {#2}
-              { \char_value_catcode:n {#2} }
-            \tl_if_blank:nF {#3}
+            \exp_not:N \codepoint_generate:nn {#2}
+              { \exp_not:N \char_value_catcode:n {#2} }
+            \exp_not:N \tl_if_blank:nF {#3}
              {
-               \codepoint_generate:nn {#3}
-                 { \char_value_catcode:n {#3} }
+               \exp_not:N \codepoint_generate:nn {#3}
+                 { \exp_not:N \char_value_catcode:n {#3} }
              }
           }
       }
diff --git a/l3kernel/l3unicode.dtx b/l3kernel/l3unicode.dtx
index e7466d9a6..90f6ccaa1 100644
--- a/l3kernel/l3unicode.dtx
+++ b/l3kernel/l3unicode.dtx
@@ -55,7 +55,7 @@
 % Unicode \meta{codepoints} and are designed to give useable results with
 % both Unicode-aware and $8$-bit engines.
 %
-% \begin{function}[EXP, added = 2022-10-09]
+% \begin{function}[EXP, added = 2022-10-09, updated = 2022-11-09]
 %   {\codepoint_generate:nn}
 %   \begin{syntax}
 %      \cs{codepoint_generate:nn} \Arg{codepoint} \Arg{catcode}
@@ -80,8 +80,8 @@
 %   produced: these will be the bytes of the UTF-8 representation of the
 %   \meta{codepoint}. For all codepoints outside of the classical ASCII
 %   range, the generated character tokens will be active (category code
-%   $13$); for codepoints in the ASCII range, the prevailing category code of
-%   the character is used. To allow the result of this function to be used
+%   $13$); for codepoints in the ASCII range, the given \meta{catcode}
+%   will be used. To allow the result of this function to be used
 %   inside a expansion context, the result is protected by \cs{exp_not:n}.
 % \end{function}
 %
@@ -191,7 +191,11 @@
         \int_compare:nNnTF {#1} = { `\  }
           { ~ }
           {
-            \int_compare:nNnTF {#1} > { "80 }
+            \int_compare:nNnTF {#1} < { "80 }
+              {
+                \__kernel_exp_not:w \exp_after:wN \exp_after:wN \exp_after:wN
+                  { \char_generate:nn {#1} {#2} }
+              }
               {
                 \use:e
                   {
@@ -199,10 +203,6 @@
                       \__kernel_codepoint_to_bytes:n {#1}
                   }
               }
-              {
-                \__kernel_exp_not:w \exp_after:wN \exp_after:wN \exp_after:wN
-                  { \char_generate:nn {#1} { \char_value_catcode:n {#1} } }
-              }
           }
       }
     \cs_new:Npn \@@_generate:nnnn #1#2#3#4





More information about the latex3-commits mailing list.