[latex3-commits] [git/LaTeX3-latex3-latex3] main: Preserve catcode when case changing 8-bit chars < "80 (0c3da10e9)

Joseph Wright joseph.wright at morningstar2.co.uk
Wed Nov 9 15:30:20 CET 2022


Repository : https://github.com/latex3/latex3
On branch  : main
Link       : https://github.com/latex3/latex3/commit/0c3da10e9a7b0d6b4d82d6afeec3336573fb7908

>---------------------------------------------------------------

commit 0c3da10e9a7b0d6b4d82d6afeec3336573fb7908
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Wed Nov 9 14:30:20 2022 +0000

    Preserve catcode when case changing 8-bit chars < "80
    
    This time working!


>---------------------------------------------------------------

0c3da10e9a7b0d6b4d82d6afeec3336573fb7908
 l3kernel/l3text-case.dtx | 124 ++++++++++++++++++++++++++++++++++++-----------
 l3kernel/l3unicode.dtx   |  16 +++---
 2 files changed, 103 insertions(+), 37 deletions(-)

diff --git a/l3kernel/l3text-case.dtx b/l3kernel/l3text-case.dtx
index 12c18e2f9..861d702cb 100644
--- a/l3kernel/l3text-case.dtx
+++ b/l3kernel/l3text-case.dtx
@@ -710,7 +710,7 @@
       { \exp_not:n {#4} }
       {
         \codepoint_generate:nn {#1}
-          { \@@_char_catcode:N #4 }
+          { \@@_change_case_catcode:nn {#4} {#1} }
         \tl_if_blank:nF {#2}
           {
             \codepoint_generate:nn {#2}
@@ -723,6 +723,33 @@
           }
       }
   }
+%    \end{macrocode}
+%   We need to ensure that only valid catcode-extraction is attempted. That's
+%   fine with Unicode engines but needs a bit of work with 8-bit ones. The
+%   logic is that if the original codepoint was in the ASCII range, we keep
+%   the catcode. Otherwise, if the target is in the ASCII range, we use
+%   the standard catcode. If neither are true, we set as 13 on the grounds that
+%   this will be what is used anyway!
+%    \begin{macrocode}
+\bool_lazy_or:nnTF
+  { \sys_if_engine_luatex_p: }
+  { \sys_if_engine_xetex_p: }
+  {
+    \cs_new:Npn \@@_change_case_catcode:nn #1#2
+      { \@@_char_catcode:N #1 }
+  }
+  {
+    \cs_new:Npn \@@_change_case_catcode:nn #1#2
+      {
+        \@@_codepoint_compare:nNnTF {#1} < { "80 }
+          { \@@_char_catcode:N #1 }
+          {
+            \int_compare:nNnTF {#2} < { "80 }
+              { \char_value_catcode:n {#2} }
+              { 13 }
+          }
+      }
+  }
 \cs_new:Npn \@@_change_case_next_lower:nn #1#2
   { \@@_change_case_loop:nnw {#1} {#2} }
 \cs_new_eq:NN \@@_change_case_next_upper:nn
@@ -810,7 +837,10 @@
     \@@_codepoint_compare:nNnTF {#4} = { "00DF }
       {
         \@@_change_case_store:e
-         { \codepoint_generate:nn { "1E9E } { \@@_char_catcode:N #4 } }
+         {
+           \codepoint_generate:nn { "1E9E }
+             { \@@_change_case_catcode:nn {#4} { "1E9E } }
+         }
         \use:c { @@_change_case_next_ #2 :nn }
           {#2} {#3}
       }
@@ -1038,8 +1068,14 @@
         \bool_lazy_or:nnTF
           { \@@_codepoint_compare_p:nNn {#1} = { "0399 } }
           { \@@_codepoint_compare_p:nNn {#1} = { "03B9 } }
-          { \codepoint_generate:nn { "03AA } { \@@_char_catcode:N #1 } }
-          { \codepoint_generate:nn { "03AB } { \@@_char_catcode:N #1 } }
+          {
+            \codepoint_generate:nn { "03AA }
+              { \@@_change_case_catcode:nn {#1} { "03AA } }
+          }
+          {
+            \codepoint_generate:nn { "03AB }
+              { \@@_change_case_catcode:nn {#1} { "03AB } }
+          }
       }
   }
 %    \end{macrocode}
@@ -1318,7 +1354,10 @@
       { \@@_change_case_boundary_upper_el:nnnN {#3} }
       {
         \@@_change_case_store:e
-          { \codepoint_generate:nn { "0389 } { \@@_char_catcode:N #3 } }
+          {
+            \codepoint_generate:nn { "0389 }
+              { \@@_change_case_catcode:nn {#3} { "0389 } }
+          }
         \@@_change_case_loop:nnw
       }
         {#1} {#2} #4 \q_@@_recursion_stop
@@ -1336,7 +1375,10 @@
       { \@@_change_case_loop:nnw {#2} {#3} #1#4 }
       {
         \@@_change_case_store:e
-          { \codepoint_generate:nn { "0389 } { \@@_char_catcode:N #1 } }
+          {
+            \codepoint_generate:nn { "0389 }
+              { \@@_change_case_catcode:nn {#1} { "0389 } }
+          }
         \@@_change_case_loop:nnw {#2} {#3} #4
       }
   }
@@ -1370,8 +1412,10 @@
       {
         \@@_change_case_store:e
           {
-            \codepoint_generate:nn { "0535 } { \@@_char_catcode:N #4 }
-            \codepoint_generate:nn { "054E } { \@@_char_catcode:N #4 }
+            \codepoint_generate:nn { "0535 }
+              { \@@_change_case_catcode:nn {#4} { "0535 } }
+            \codepoint_generate:nn { "054E }
+              { \@@_change_case_catcode:nn {#4} { "054E } }
           }
         \use:c { @@_change_case_next_ #2 :nn }
           {#2} {#3}
@@ -1384,8 +1428,10 @@
       {
         \@@_change_case_store:e
           {
-            \codepoint_generate:nn { "0535 } { \@@_char_catcode:N #4 }
-            \codepoint_generate:nn { "057E } { \@@_char_catcode:N #4 }
+            \codepoint_generate:nn { "0535 }
+              { \@@_change_case_catcode:nn {#4} { "0535 } }
+            \codepoint_generate:nn { "057E }
+              { \@@_change_case_catcode:nn {#4} { "057E } }
           }
         \use:c { @@_change_case_next_ #2 :nn }
           {#2} {#3}
@@ -1405,12 +1451,10 @@
 %    \begin{macrocode}
 \cs_new:cpn { @@_change_case_lower_la-x-medieval:nnnn } #1#2#3#4
   {
-    \@@_codepoint_compare:nNnTF {#4} = { `V }
+    \@@_codepoint_compare:nNnTF {#4} = { "0056 }
       {
         \@@_change_case_store:e
-          {
-            \char_generate:nn { `u } { \@@_char_catcode:N #4 }
-          }
+          { \char_generate:nn { "0075 } { \@@_char_catcode:N #4 } }
         \use:c { @@_change_case_next_ #2 :nn }
           {#2} {#3}
       }
@@ -1418,12 +1462,10 @@
   }
 \cs_new:cpn { @@_change_case_upper_la-x-medieval:nnnn } #1#2#3#4
   {
-    \@@_codepoint_compare:nNnTF {#4} = { `u }
+    \@@_codepoint_compare:nNnTF {#4} = { "0075 }
       {
         \@@_change_case_store:e
-          {
-            \char_generate:nn { `V } { \@@_char_catcode:N #4 }
-          }
+          { \char_generate:nn { "0056 } { \@@_char_catcode:N #4 } }
         \use:c { @@_change_case_next_ #2 :nn }
           {#2} {#3}
       }
@@ -1483,9 +1525,12 @@
       {
         \@@_change_case_store:e
           {
-            \codepoint_generate:nn { "0069 } { \@@_char_catcode:N #4 }
-            \codepoint_generate:nn { "0307 } { \@@_char_catcode:N #4 }
-            \codepoint_generate:nn {#1} { \@@_char_catcode:N #4 }
+            \codepoint_generate:nn { "0069 }
+              { \@@_change_case_catcode:nn {#4} { "0069 } }
+            \codepoint_generate:nn { "0307 }
+              { \@@_change_case_catcode:nn {#4} { "0307 } }
+            \codepoint_generate:nn {#1}
+              { \@@_change_case_catcode:nn {#4} {#1} }
           }
         \@@_change_case_loop:nnw {#2} {#3}
       }
@@ -1501,7 +1546,10 @@
       { \@@_change_case_codepoint:nnnn {#2} {#2} {#3} {#4} }
       {
         \@@_change_case_store:e
-          { \codepoint_generate:nn {#1} { \@@_char_catcode:N #4 } }
+          {
+            \codepoint_generate:nn {#1}
+              { \@@_change_case_catcode:nn {#4} {#1} }
+          }
         \@@_change_case_lower_lt:nnw {#2} {#3}
       }
   }
@@ -1535,7 +1583,10 @@
       }
       {
         \@@_change_case_store:e
-          { \codepoint_generate:nn { "0307 } { \@@_char_catcode:N #3 } }
+          {
+            \codepoint_generate:nn { "0307 }
+              { \@@_change_case_catcode:nn {#3} { "0307 } }
+          }
       }
     \@@_change_case_loop:nnw {#1} {#2} #3
   }
@@ -1574,7 +1625,10 @@
       { \@@_change_case_codepoint:nnnn { upper } {#2} {#3} {#4} }
       {
         \@@_change_case_store:e
-          { \codepoint_generate:nn {#1} { \@@_char_catcode:N #4 } }
+          {
+            \codepoint_generate:nn {#1}
+              { \@@_change_case_catcode:nn {#4} {#1} }
+          }
         \@@_change_case_upper_lt:nnw {#2} {#3}
       }
   }
@@ -1680,7 +1734,10 @@
         \@@_codepoint_compare:nNnTF {#4} = { "0130 }
           {
             \@@_change_case_store:e
-              { \codepoint_generate:nn { "0069 } { \@@_char_catcode:N #4 } }
+              {
+                \codepoint_generate:nn { "0069 }
+                  { \@@_change_case_catcode:nn {#4} { "0069 } }
+              }
             \@@_change_case_loop:nnw {#1} {#3}
           }
           { \@@_change_case_codepoint:nnnn {#1} {#2} {#3} {#4} }
@@ -1698,7 +1755,10 @@
       { \@@_change_case_lower_tr:NnnN  #3 {#1} {#2} }
       {
         \@@_change_case_store:e
-          { \codepoint_generate:nn { "0131 } { \@@_char_catcode:N #3 } }
+          {
+            \codepoint_generate:nn { "0131 }
+              { \@@_change_case_catcode:nn {#3} { "0131 } }
+          }
         \@@_change_case_loop:nnw {#1} {#2}
       }
         #4 \q_@@_recursion_stop
@@ -1719,12 +1779,18 @@
       { ! \@@_codepoint_compare_p:nNn {#4} = { "0307 } }
       {
         \@@_change_case_store:e 
-          { \codepoint_generate:nn { "0131 } { \@@_char_catcode:N #1 } }
+          {
+            \codepoint_generate:nn { "0131 }
+              { \@@_change_case_catcode:nn {#1} { "0131 } }
+          }
         \@@_change_case_loop:nnw {#2} {#3} #4
       }
       {
         \@@_change_case_store:e
-          { \codepoint_generate:nn { "0069 } { \@@_char_catcode:N #1 } }
+          {
+            \codepoint_generate:nn { "0069 }
+              { \@@_change_case_catcode:nn {#1} { "0069 } }
+          }
         \@@_change_case_loop:nnw {#2} {#3}
       }
   }
@@ -1743,7 +1809,7 @@
         \@@_change_case_store:e
           {
             \codepoint_generate:nn { "0130 }
-              { \@@_char_catcode:N #4 }
+              { \@@_change_case_catcode:nn {#4} { "0130 } }
           }
         \use:c { @@_change_case_next_ #2 :nn } {#2} {#3}
       }
diff --git a/l3kernel/l3unicode.dtx b/l3kernel/l3unicode.dtx
index e7466d9a6..90f6ccaa1 100644
--- a/l3kernel/l3unicode.dtx
+++ b/l3kernel/l3unicode.dtx
@@ -55,7 +55,7 @@
 % Unicode \meta{codepoints} and are designed to give useable results with
 % both Unicode-aware and $8$-bit engines.
 %
-% \begin{function}[EXP, added = 2022-10-09]
+% \begin{function}[EXP, added = 2022-10-09, updated = 2022-11-09]
 %   {\codepoint_generate:nn}
 %   \begin{syntax}
 %      \cs{codepoint_generate:nn} \Arg{codepoint} \Arg{catcode}
@@ -80,8 +80,8 @@
 %   produced: these will be the bytes of the UTF-8 representation of the
 %   \meta{codepoint}. For all codepoints outside of the classical ASCII
 %   range, the generated character tokens will be active (category code
-%   $13$); for codepoints in the ASCII range, the prevailing category code of
-%   the character is used. To allow the result of this function to be used
+%   $13$); for codepoints in the ASCII range, the given \meta{catcode}
+%   will be used. To allow the result of this function to be used
 %   inside a expansion context, the result is protected by \cs{exp_not:n}.
 % \end{function}
 %
@@ -191,7 +191,11 @@
         \int_compare:nNnTF {#1} = { `\  }
           { ~ }
           {
-            \int_compare:nNnTF {#1} > { "80 }
+            \int_compare:nNnTF {#1} < { "80 }
+              {
+                \__kernel_exp_not:w \exp_after:wN \exp_after:wN \exp_after:wN
+                  { \char_generate:nn {#1} {#2} }
+              }
               {
                 \use:e
                   {
@@ -199,10 +203,6 @@
                       \__kernel_codepoint_to_bytes:n {#1}
                   }
               }
-              {
-                \__kernel_exp_not:w \exp_after:wN \exp_after:wN \exp_after:wN
-                  { \char_generate:nn {#1} { \char_value_catcode:n {#1} } }
-              }
           }
       }
     \cs_new:Npn \@@_generate:nnnn #1#2#3#4





More information about the latex3-commits mailing list.