[latex3-commits] [git/LaTeX3-latex3-latex3] text-map: Move codepoint handling to l3text core (86fd81a70)
Joseph Wright
joseph.wright at morningstar2.co.uk
Mon Aug 8 13:51:42 CEST 2022
Repository : https://github.com/latex3/latex3
On branch : text-map
Link : https://github.com/latex3/latex3/commit/86fd81a702504e19b9890e781dff32fc0ca398be
>---------------------------------------------------------------
commit 86fd81a702504e19b9890e781dff32fc0ca398be
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date: Mon Aug 8 12:49:24 2022 +0100
Move codepoint handling to l3text core
This will allow the same ideas to be used in other
parts of the module, most obviously the case changer.
>---------------------------------------------------------------
86fd81a702504e19b9890e781dff32fc0ca398be
l3kernel/l3text-map.dtx | 133 ++++--------------------------------------------
l3kernel/l3text.dtx | 115 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 126 insertions(+), 122 deletions(-)
diff --git a/l3kernel/l3text-map.dtx b/l3kernel/l3text-map.dtx
index 918721e39..c461568f4 100644
--- a/l3kernel/l3text-map.dtx
+++ b/l3kernel/l3text-map.dtx
@@ -71,10 +71,6 @@
% \begin{macro}[EXP]{\@@_map_group:Nnn}
% \begin{macro}[EXP]{\@@_map_space:Nnw}
% \begin{macro}[EXP]{\@@_map_N_type:NnN}
-% \begin{macro}[EXP]{\@@_map_char:NnN}
-% \begin{macro}[EXP]{\@@_map_char:NnNN}
-% \begin{macro}[EXP]{\@@_map_char:NnNNN}
-% \begin{macro}[EXP]{\@@_map_char:NnNNNN}
% \begin{macro}[EXP]{\@@_map_codepoint:Nnn}
% \begin{macro}[EXP]{\@@_map_CR:Nnw}
% \begin{macro}[EXP]{\@@_map_CR:NnN}
@@ -184,59 +180,25 @@
#1 {#3}
\@@_map_loop:Nnw #1 { }
}
- { \@@_map_char:NnN #1 {#2} #3 }
- }
-% \end{macrocode}
-% We want to keep common code paths, so collect up one Unicode codepoint
-% as a single argument in an engine-independent way.
-% \begin{macrocode}
-\bool_lazy_or:nnTF
- { \sys_if_engine_luatex_p: }
- { \sys_if_engine_xetex_p: }
- {
- \cs_new:Npn \@@_map_char:NnN #1#2#3
- { \@@_map_codepoint:Nnn #1 {#2} #3 }
- }
- {
- \cs_new:Npn \@@_map_char:NnN #1#2#3
{
- \int_compare:nNnTF { `#3 } > { "80 }
- {
- \int_compare:nNnTF { `#3 } < { "E0 }
- { \@@_map_char:NnNN }
- {
- \int_compare:nNnTF { `#3 } < { "F0 }
- { \@@_map_char:NnNNN }
- { \@@_map_char:NnNNNN }
- }
- }
- { \@@_map_codepoint:Nnn }
- #1 {#2} #3
+ \@@_codepoint_process:nN
+ { \@@_map_codepoint:Nnn #1 {#2} } #3
}
- \cs_new:Npn \@@_map_char:NnNN #1#2#3#4
- { \@@_map_codepoint:Nnn #1 {#2} {#3#4} }
- \cs_new:Npn \@@_map_char:NnNNN #1#2#3#4#5
- { \@@_map_codepoint:Nnn #1 {#2} {#3#4#5} }
- \cs_new:Npn \@@_map_char:NnNNNN #1#2#3#4#5#6
- { \@@_map_codepoint:Nnn #1 {#2} {#3#4#5#6} }
}
% \end{macrocode}
-% We want to keep common code paths, so collect up one Unicode codepoint
-% as a single argument in an engine-independent way. We can then pull out
-% the special cases: hard-coded for speed so not actually using the
-% grapheme data. The carriage return case needs a bit of context handling
-% so has an auxiliary. Codepoint U+200D is the zero-width joiner, which has
-% no context to concern us: just don't break.
+% We pull out a few special cases here. Carriage returns case needs a bit of
+% context handling so has an auxiliary. Codepoint U+200D is the zero-width
+% joiner, which has no context to concern us: just don't break.
% \begin{macrocode}
\cs_new:Npn \@@_map_codepoint:Nnn #1#2#3
{
- \@@_map_codepoint_compare:nNnTF {#3} = { "0D }
+ \@@_codepoint_compare:nNnTF {#3} = { "0D }
{
\@@_map_output:Nn #1 {#2}
\@@_map_CR:Nnw #1 {#3}
}
{
- \@@_map_codepoint_compare:nNnTF {#3} = { "200D }
+ \@@_codepoint_compare:nNnTF {#3} = { "200D }
{ \@@_map_loop:Nnw #1 {#2#3} }
{ \@@_map_class:Nnnn #1 {#2} {#3} { Control } }
}
@@ -292,13 +254,13 @@
{
\@@_if_q_recursion_tail_stop_do:nn {#5}
{ \use:c { @@_map_not_ #4 :Nnn } #1 {#2} {#3} }
- \@@_map_codepoint_compare:nNnTF {#3} < { "#5 }
+ \@@_codepoint_compare:nNnTF {#3} < { "#5 }
{
\@@_map_class_end:nw
{ \use:c { @@_map_not_ #4 :Nnn } #1 {#2} {#3} }
}
{
- \@@_map_codepoint_compare:nNnTF {#3} > { "#6 }
+ \@@_codepoint_compare:nNnTF {#3} > { "#6 }
{ \@@_map_class_loop:Nnnnw #1 {#2} {#3} {#4} }
{
\@@_map_class_end:nw
@@ -445,10 +407,10 @@
{
\@@_if_q_recursion_tail_stop_do:nn {#6}
{ \@@_map_hangul_next:Nnnn #1 {#2} {#3} {#5} }
- \@@_map_codepoint_compare:nNnTF {#3} < { "#6 }
+ \@@_codepoint_compare:nNnTF {#3} < { "#6 }
{ \@@_map_hangul_next:Nnnn #1 {#2} {#3} {#5} }
{
- \@@_map_codepoint_compare:nNnTF {#3} > { "#7 }
+ \@@_codepoint_compare:nNnTF {#3} > { "#7 }
{ \@@_map_hangul_loop:Nnnnnw #1 {#2} {#3} {#4} {#5} }
{
\@@_map_hangul_end:nw
@@ -521,79 +483,6 @@
% \end{macro}
% \end{macro}
% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-%
-% \begin{macro}[EXP, TF]{\@@_map_codepoint_compare:nNn}
-% \begin{macro}[EXP]{\@@_map_codepoint_compare:N, \@@_map_codepoint_compare_aux:N}
-% \begin{macro}[EXP]{\@@_map_codepoint_compare:NN}
-% \begin{macro}[EXP]{\@@_map_codepoint_compare:NNN}
-% \begin{macro}[EXP]{\@@_map_codepoint_compare:NNNN}
-% Allows comparison for all engines using a first \enquote{character} followed
-% by a codepoint.
-% \begin{macrocode}
-\bool_lazy_or:nnTF
- { \sys_if_engine_luatex_p: }
- { \sys_if_engine_xetex_p: }
- {
- \prg_new_conditional:Npnn
- \@@_map_codepoint_compare:nNn #1#2#3 { TF }
- {
- \int_compare:nNnTF { `#1 } #2 {#3}
- \prg_return_true: \prg_return_false:
- }
- }
- {
- \prg_new_conditional:Npnn
- \@@_map_codepoint_compare:nNn #1#2#3 { TF }
- {
- \int_compare:nNnTF { \@@_map_codepoint_compare:N #1 }
- #2 {#3}
- \prg_return_true: \prg_return_false:
- }
- \cs_new:Npn \@@_map_codepoint_compare:N #1
- {
- \if_int_compare:w `#1 > "80 \exp_stop_f:
- \if_int_compare:w `#1 < "E0 \exp_stop_f:
- \exp_after:wN \exp_after:wN \exp_after:wN
- \@@_map_codepoint_compare:NN
- \else:
- \if_int_compare:w `#1 < "F0 \exp_stop_f:
- \exp_after:wN \exp_after:wN \exp_after:wN
- \exp_after:wN \exp_after:wN \exp_after:wN
- \exp_after:wN \@@_map_codepoint_compare:NNN
- \else:
- \exp_after:wN \exp_after:wN \exp_after:wN
- \exp_after:wN \exp_after:wN \exp_after:wN
- \exp_after:wN \@@_map_codepoint_compare:NNNN
- \fi:
- \fi:
- \else:
- \exp_after:wN \@@_map_codepoint_compare_aux:N
- \fi:
- #1
- }
- \cs_new:Npn \@@_map_codepoint_compare_aux:N #1 { `#1 }
- \cs_new:Npn \@@_map_codepoint_compare:NN #1#2
- { (`#1 - "C0) * "40 + `#2 - "80 }
- \cs_new:Npn \@@_map_codepoint_compare:NNN #1#2#3
- { (`#1 - "E0) * "1000 + (`#2 - "80) * "40 + `#3 - "80 }
- \cs_new:Npn \@@_map_codepoint_compare:NNNN #1#2#3#4
- {
- (`#1 - "F0) * "40000
- + (`#2 - "80) * "1000
- + (`#3 - "80) * "40
- + `#4 - "80
- }
- }
-% \end{macrocode}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
-% \end{macro}
%
% \begin{macro}{\text_map_inline:nn}
% The standard non-expandable inline version.
diff --git a/l3kernel/l3text.dtx b/l3kernel/l3text.dtx
index 99f4a5364..570cf8942 100644
--- a/l3kernel/l3text.dtx
+++ b/l3kernel/l3text.dtx
@@ -630,6 +630,121 @@
% \end{macrocode}
% \end{macro}
%
+% \subsection{Codepoint utilities}
+%
+% For working with codepoints in an engine-neutral way.
+%
+% \begin{macro}[EXP]{\@@_codepoint_process:nN}
+% \begin{macro}[EXP]{\@@_codepoint_process:nNN}
+% \begin{macro}[EXP]{\@@_codepoint_process:nNNN}
+% \begin{macro}[EXP]{\@@_codepoint_process:nNNNN}
+% Grab a codepoint and apply some code to it: here |#1| should expect one
+% following \emph{balanced text}.
+% \begin{macrocode}
+\bool_lazy_or:nnTF
+ { \sys_if_engine_luatex_p: }
+ { \sys_if_engine_xetex_p: }
+ {
+ \cs_new:Npn \@@_codepoint_process:nN #1#2 { #1 {#2} }
+ }
+ {
+ \cs_new:Npn \@@_codepoint_process:nN #1#2
+ {
+ \int_compare:nNnTF { `#2 } > { "80 }
+ {
+ \int_compare:nNnTF { `#2 } < { "E0 }
+ { \@@_codepoint_process:nNN }
+ {
+ \int_compare:nNnTF { `#2 } < { "F0 }
+ { \@@_codepoint_process:nNNN }
+ { \@@_codepoint_process:nNNNN }
+ }
+ }
+ { \use:n }
+ {#1} #2
+ }
+ \cs_new:Npn \@@_codepoint_process:nNN #1#2#3
+ { #1 {#2#3} }
+ \cs_new:Npn \@@_codepoint_process:nNNN #1#2#3#4
+ { #1 {#2#3#4} }
+ \cs_new:Npn \@@_codepoint_process:nNNNN #1#2#3#4#5
+ { #1 {#2#3#4#5} }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}[EXP, TF]{\@@_codepoint_compare:nNn}
+% \begin{macro}[EXP]{\@@_codepoint_compare:N, \@@_codepoint_compare_aux:N}
+% \begin{macro}[EXP]{\@@_codepoint_compare:NN}
+% \begin{macro}[EXP]{\@@_codepoint_compare:NNN}
+% \begin{macro}[EXP]{\@@_codepoint_compare:NNNN}
+% Allows comparison for all engines using a first \enquote{character} followed
+% by a codepoint.
+% \begin{macrocode}
+\bool_lazy_or:nnTF
+ { \sys_if_engine_luatex_p: }
+ { \sys_if_engine_xetex_p: }
+ {
+ \prg_new_conditional:Npnn
+ \@@_codepoint_compare:nNn #1#2#3 { TF }
+ {
+ \int_compare:nNnTF { `#1 } #2 {#3}
+ \prg_return_true: \prg_return_false:
+ }
+ }
+ {
+ \prg_new_conditional:Npnn
+ \@@_codepoint_compare:nNn #1#2#3 { TF }
+ {
+ \int_compare:nNnTF { \@@_codepoint_compare:N #1 }
+ #2 {#3}
+ \prg_return_true: \prg_return_false:
+ }
+ \cs_new:Npn \@@_codepoint_compare:N #1
+ {
+ \if_int_compare:w `#1 > "80 \exp_stop_f:
+ \if_int_compare:w `#1 < "E0 \exp_stop_f:
+ \exp_after:wN \exp_after:wN \exp_after:wN
+ \@@_codepoint_compare:NN
+ \else:
+ \if_int_compare:w `#1 < "F0 \exp_stop_f:
+ \exp_after:wN \exp_after:wN \exp_after:wN
+ \exp_after:wN \exp_after:wN \exp_after:wN
+ \exp_after:wN \@@_codepoint_compare:NNN
+ \else:
+ \exp_after:wN \exp_after:wN \exp_after:wN
+ \exp_after:wN \exp_after:wN \exp_after:wN
+ \exp_after:wN \@@_codepoint_compare:NNNN
+ \fi:
+ \fi:
+ \else:
+ \exp_after:wN \@@_codepoint_compare_aux:N
+ \fi:
+ #1
+ }
+ \cs_new:Npn \@@_codepoint_compare_aux:N #1 { `#1 }
+ \cs_new:Npn \@@_codepoint_compare:NN #1#2
+ { (`#1 - "C0) * "40 + `#2 - "80 }
+ \cs_new:Npn \@@_codepoint_compare:NNN #1#2#3
+ { (`#1 - "E0) * "1000 + (`#2 - "80) * "40 + `#3 - "80 }
+ \cs_new:Npn \@@_codepoint_compare:NNNN #1#2#3#4
+ {
+ (`#1 - "F0) * "40000
+ + (`#2 - "80) * "1000
+ + (`#3 - "80) * "40
+ + `#4 - "80
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
% \subsection{Configuration variables}
%
% \begin{variable}{\l_text_accents_tl, \l_text_letterlike_tl}
More information about the latex3-commits
mailing list.