[latex3-commits] [git/LaTeX3-latex3-latex3] text-map: Move codepoint handling to l3text core (86fd81a70)

Joseph Wright joseph.wright at morningstar2.co.uk
Mon Aug 8 13:51:42 CEST 2022


Repository : https://github.com/latex3/latex3
On branch  : text-map
Link       : https://github.com/latex3/latex3/commit/86fd81a702504e19b9890e781dff32fc0ca398be

>---------------------------------------------------------------

commit 86fd81a702504e19b9890e781dff32fc0ca398be
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Mon Aug 8 12:49:24 2022 +0100

    Move codepoint handling to l3text core
    
    This will allow the same ideas to be used in other
    parts of the module, most obviously the case changer.


>---------------------------------------------------------------

86fd81a702504e19b9890e781dff32fc0ca398be
 l3kernel/l3text-map.dtx | 133 ++++--------------------------------------------
 l3kernel/l3text.dtx     | 115 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+), 122 deletions(-)

diff --git a/l3kernel/l3text-map.dtx b/l3kernel/l3text-map.dtx
index 918721e39..c461568f4 100644
--- a/l3kernel/l3text-map.dtx
+++ b/l3kernel/l3text-map.dtx
@@ -71,10 +71,6 @@
 % \begin{macro}[EXP]{\@@_map_group:Nnn}
 % \begin{macro}[EXP]{\@@_map_space:Nnw}
 % \begin{macro}[EXP]{\@@_map_N_type:NnN}
-% \begin{macro}[EXP]{\@@_map_char:NnN}
-% \begin{macro}[EXP]{\@@_map_char:NnNN}
-% \begin{macro}[EXP]{\@@_map_char:NnNNN}
-% \begin{macro}[EXP]{\@@_map_char:NnNNNN}
 % \begin{macro}[EXP]{\@@_map_codepoint:Nnn}
 % \begin{macro}[EXP]{\@@_map_CR:Nnw}
 % \begin{macro}[EXP]{\@@_map_CR:NnN}
@@ -184,59 +180,25 @@
         #1 {#3}
         \@@_map_loop:Nnw #1 { }
       }
-      { \@@_map_char:NnN #1 {#2} #3 }
-  }
-%    \end{macrocode}
-%  We want to keep common code paths, so collect up one Unicode codepoint
-%  as a single argument in an engine-independent way.
-%    \begin{macrocode}
-\bool_lazy_or:nnTF
-  { \sys_if_engine_luatex_p: }
-  { \sys_if_engine_xetex_p: }
-  {
-    \cs_new:Npn \@@_map_char:NnN #1#2#3
-      { \@@_map_codepoint:Nnn #1 {#2} #3 }
-  }
-  {
-    \cs_new:Npn \@@_map_char:NnN #1#2#3
       {
-        \int_compare:nNnTF { `#3 } > { "80 }
-          {
-            \int_compare:nNnTF { `#3 } < { "E0 }
-              { \@@_map_char:NnNN }
-              {
-                 \int_compare:nNnTF { `#3 } < { "F0 }
-                   { \@@_map_char:NnNNN }
-                   { \@@_map_char:NnNNNN }
-              }
-          }
-          { \@@_map_codepoint:Nnn }
-            #1 {#2} #3
+        \@@_codepoint_process:nN
+          { \@@_map_codepoint:Nnn #1 {#2} } #3
       }
-    \cs_new:Npn \@@_map_char:NnNN #1#2#3#4
-      { \@@_map_codepoint:Nnn #1 {#2} {#3#4} }
-    \cs_new:Npn \@@_map_char:NnNNN #1#2#3#4#5
-      { \@@_map_codepoint:Nnn #1 {#2} {#3#4#5} }
-    \cs_new:Npn \@@_map_char:NnNNNN #1#2#3#4#5#6
-      { \@@_map_codepoint:Nnn #1 {#2} {#3#4#5#6} }
   }
 %    \end{macrocode}
-%  We want to keep common code paths, so collect up one Unicode codepoint
-%  as a single argument in an engine-independent way. We can then pull out
-%  the special cases: hard-coded for speed so not actually using the
-%  grapheme data. The carriage return case needs a bit of context handling
-%  so has an auxiliary. Codepoint U+200D is the zero-width joiner, which has
-%  no context to concern us: just don't break.
+%  We pull out a few special cases here. Carriage returns case needs a bit of
+%  context handling so has an auxiliary. Codepoint U+200D is the zero-width
+%  joiner, which has no context to concern us: just don't break.
 %    \begin{macrocode}
 \cs_new:Npn \@@_map_codepoint:Nnn #1#2#3
   {
-    \@@_map_codepoint_compare:nNnTF {#3} =  { "0D }
+    \@@_codepoint_compare:nNnTF {#3} = { "0D }
       {
         \@@_map_output:Nn #1 {#2}
         \@@_map_CR:Nnw #1 {#3}
       }
       {
-        \@@_map_codepoint_compare:nNnTF {#3} = { "200D }
+        \@@_codepoint_compare:nNnTF {#3} = { "200D }
           { \@@_map_loop:Nnw #1 {#2#3} }
           { \@@_map_class:Nnnn #1 {#2} {#3} { Control } }
       }
@@ -292,13 +254,13 @@
   {
     \@@_if_q_recursion_tail_stop_do:nn {#5}
       { \use:c { @@_map_not_ #4 :Nnn } #1 {#2} {#3} }
-    \@@_map_codepoint_compare:nNnTF {#3} < { "#5 }
+    \@@_codepoint_compare:nNnTF {#3} < { "#5 }
       {
         \@@_map_class_end:nw
           { \use:c { @@_map_not_ #4 :Nnn } #1 {#2} {#3} }
       }
       {
-        \@@_map_codepoint_compare:nNnTF {#3} > { "#6 }
+        \@@_codepoint_compare:nNnTF {#3} > { "#6 }
           { \@@_map_class_loop:Nnnnw #1 {#2} {#3} {#4} }
           {
             \@@_map_class_end:nw
@@ -445,10 +407,10 @@
   {
     \@@_if_q_recursion_tail_stop_do:nn {#6}
       { \@@_map_hangul_next:Nnnn #1 {#2} {#3} {#5} }
-    \@@_map_codepoint_compare:nNnTF {#3} < { "#6 }
+    \@@_codepoint_compare:nNnTF {#3} < { "#6 }
       { \@@_map_hangul_next:Nnnn #1 {#2} {#3} {#5} }
       {
-        \@@_map_codepoint_compare:nNnTF {#3} > { "#7 }
+        \@@_codepoint_compare:nNnTF {#3} > { "#7 }
           { \@@_map_hangul_loop:Nnnnnw #1 {#2} {#3} {#4} {#5} }
           {
             \@@_map_hangul_end:nw
@@ -521,79 +483,6 @@
 % \end{macro}
 % \end{macro}
 % \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[EXP, TF]{\@@_map_codepoint_compare:nNn}
-% \begin{macro}[EXP]{\@@_map_codepoint_compare:N, \@@_map_codepoint_compare_aux:N}
-% \begin{macro}[EXP]{\@@_map_codepoint_compare:NN}
-% \begin{macro}[EXP]{\@@_map_codepoint_compare:NNN}
-% \begin{macro}[EXP]{\@@_map_codepoint_compare:NNNN}
-%   Allows comparison for all engines using a first \enquote{character} followed
-%   by a codepoint.
-%    \begin{macrocode}
-\bool_lazy_or:nnTF
-  { \sys_if_engine_luatex_p: }
-  { \sys_if_engine_xetex_p: }
-  {
-    \prg_new_conditional:Npnn
-      \@@_map_codepoint_compare:nNn #1#2#3 { TF }
-      {
-        \int_compare:nNnTF { `#1 } #2 {#3}
-          \prg_return_true: \prg_return_false:
-      }
-  }
-  {
-    \prg_new_conditional:Npnn
-      \@@_map_codepoint_compare:nNn #1#2#3 { TF }
-      {
-        \int_compare:nNnTF { \@@_map_codepoint_compare:N #1 }
-            #2 {#3}
-          \prg_return_true: \prg_return_false:
-      }
-    \cs_new:Npn \@@_map_codepoint_compare:N #1
-      {
-        \if_int_compare:w `#1 > "80 \exp_stop_f:
-          \if_int_compare:w `#1 < "E0 \exp_stop_f:
-            \exp_after:wN \exp_after:wN \exp_after:wN
-              \@@_map_codepoint_compare:NN
-          \else:
-            \if_int_compare:w `#1 < "F0 \exp_stop_f:
-              \exp_after:wN \exp_after:wN \exp_after:wN
-              \exp_after:wN \exp_after:wN \exp_after:wN
-              \exp_after:wN \@@_map_codepoint_compare:NNN
-            \else:
-              \exp_after:wN \exp_after:wN \exp_after:wN
-              \exp_after:wN \exp_after:wN \exp_after:wN
-              \exp_after:wN \@@_map_codepoint_compare:NNNN
-            \fi:
-          \fi:
-        \else:
-          \exp_after:wN \@@_map_codepoint_compare_aux:N
-        \fi:
-          #1
-      }
-    \cs_new:Npn \@@_map_codepoint_compare_aux:N #1 { `#1 }
-    \cs_new:Npn \@@_map_codepoint_compare:NN #1#2
-      { (`#1 - "C0) * "40 + `#2 - "80 }
-    \cs_new:Npn \@@_map_codepoint_compare:NNN #1#2#3
-      { (`#1 - "E0) * "1000 + (`#2 - "80) * "40 + `#3 - "80 }
-    \cs_new:Npn \@@_map_codepoint_compare:NNNN #1#2#3#4
-      {
-          (`#1 - "F0) * "40000 
-        + (`#2 - "80) * "1000
-        + (`#3 - "80) * "40
-        + `#4 - "80
-      }
-  }
-%    \end{macrocode}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
 %
 % \begin{macro}{\text_map_inline:nn}
 %   The standard non-expandable inline version.
diff --git a/l3kernel/l3text.dtx b/l3kernel/l3text.dtx
index 99f4a5364..570cf8942 100644
--- a/l3kernel/l3text.dtx
+++ b/l3kernel/l3text.dtx
@@ -630,6 +630,121 @@
 %    \end{macrocode}
 % \end{macro}
 %
+% \subsection{Codepoint utilities}
+%
+% For working with codepoints in an engine-neutral way.
+%
+% \begin{macro}[EXP]{\@@_codepoint_process:nN}
+% \begin{macro}[EXP]{\@@_codepoint_process:nNN}
+% \begin{macro}[EXP]{\@@_codepoint_process:nNNN}
+% \begin{macro}[EXP]{\@@_codepoint_process:nNNNN}
+%   Grab a codepoint and apply some code to it: here |#1| should expect one
+%   following \emph{balanced text}.
+%    \begin{macrocode}
+\bool_lazy_or:nnTF
+  { \sys_if_engine_luatex_p: }
+  { \sys_if_engine_xetex_p: }
+  {
+    \cs_new:Npn \@@_codepoint_process:nN #1#2 { #1 {#2} }
+  }
+  {
+    \cs_new:Npn \@@_codepoint_process:nN #1#2
+      {
+        \int_compare:nNnTF { `#2 } > { "80 }
+          {
+            \int_compare:nNnTF { `#2 } < { "E0 }
+              { \@@_codepoint_process:nNN }
+              {
+                 \int_compare:nNnTF { `#2 } < { "F0 }
+                   { \@@_codepoint_process:nNNN }
+                   { \@@_codepoint_process:nNNNN }
+              }
+          }
+          { \use:n }
+            {#1} #2
+      }
+    \cs_new:Npn \@@_codepoint_process:nNN #1#2#3
+      { #1 {#2#3} }
+    \cs_new:Npn \@@_codepoint_process:nNNN #1#2#3#4
+      { #1 {#2#3#4} }
+    \cs_new:Npn \@@_codepoint_process:nNNNN #1#2#3#4#5
+      { #1 {#2#3#4#5} }
+  }
+%    \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[EXP, TF]{\@@_codepoint_compare:nNn}
+% \begin{macro}[EXP]{\@@_codepoint_compare:N, \@@_codepoint_compare_aux:N}
+% \begin{macro}[EXP]{\@@_codepoint_compare:NN}
+% \begin{macro}[EXP]{\@@_codepoint_compare:NNN}
+% \begin{macro}[EXP]{\@@_codepoint_compare:NNNN}
+%   Allows comparison for all engines using a first \enquote{character} followed
+%   by a codepoint.
+%    \begin{macrocode}
+\bool_lazy_or:nnTF
+  { \sys_if_engine_luatex_p: }
+  { \sys_if_engine_xetex_p: }
+  {
+    \prg_new_conditional:Npnn
+      \@@_codepoint_compare:nNn #1#2#3 { TF }
+      {
+        \int_compare:nNnTF { `#1 } #2 {#3}
+          \prg_return_true: \prg_return_false:
+      }
+  }
+  {
+    \prg_new_conditional:Npnn
+      \@@_codepoint_compare:nNn #1#2#3 { TF }
+      {
+        \int_compare:nNnTF { \@@_codepoint_compare:N #1 }
+            #2 {#3}
+          \prg_return_true: \prg_return_false:
+      }
+    \cs_new:Npn \@@_codepoint_compare:N #1
+      {
+        \if_int_compare:w `#1 > "80 \exp_stop_f:
+          \if_int_compare:w `#1 < "E0 \exp_stop_f:
+            \exp_after:wN \exp_after:wN \exp_after:wN
+              \@@_codepoint_compare:NN
+          \else:
+            \if_int_compare:w `#1 < "F0 \exp_stop_f:
+              \exp_after:wN \exp_after:wN \exp_after:wN
+              \exp_after:wN \exp_after:wN \exp_after:wN
+              \exp_after:wN \@@_codepoint_compare:NNN
+            \else:
+              \exp_after:wN \exp_after:wN \exp_after:wN
+              \exp_after:wN \exp_after:wN \exp_after:wN
+              \exp_after:wN \@@_codepoint_compare:NNNN
+            \fi:
+          \fi:
+        \else:
+          \exp_after:wN \@@_codepoint_compare_aux:N
+        \fi:
+          #1
+      }
+    \cs_new:Npn \@@_codepoint_compare_aux:N #1 { `#1 }
+    \cs_new:Npn \@@_codepoint_compare:NN #1#2
+      { (`#1 - "C0) * "40 + `#2 - "80 }
+    \cs_new:Npn \@@_codepoint_compare:NNN #1#2#3
+      { (`#1 - "E0) * "1000 + (`#2 - "80) * "40 + `#3 - "80 }
+    \cs_new:Npn \@@_codepoint_compare:NNNN #1#2#3#4
+      {
+          (`#1 - "F0) * "40000 
+        + (`#2 - "80) * "1000
+        + (`#3 - "80) * "40
+        + `#4 - "80
+      }
+  }
+%    \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
 % \subsection{Configuration variables}
 %
 % \begin{variable}{\l_text_accents_tl, \l_text_letterlike_tl}





More information about the latex3-commits mailing list.