[latex3-commits] [git/LaTeX3-latex3-latex3] main: Preserve catcode when case changing 8-bit chars < "80 (0c3da10e9)
Joseph Wright
joseph.wright at morningstar2.co.uk
Wed Nov 9 15:30:20 CET 2022
Repository : https://github.com/latex3/latex3
On branch : main
Link : https://github.com/latex3/latex3/commit/0c3da10e9a7b0d6b4d82d6afeec3336573fb7908
>---------------------------------------------------------------
commit 0c3da10e9a7b0d6b4d82d6afeec3336573fb7908
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date: Wed Nov 9 14:30:20 2022 +0000
Preserve catcode when case changing 8-bit chars < "80
This time working!
>---------------------------------------------------------------
0c3da10e9a7b0d6b4d82d6afeec3336573fb7908
l3kernel/l3text-case.dtx | 124 ++++++++++++++++++++++++++++++++++++-----------
l3kernel/l3unicode.dtx | 16 +++---
2 files changed, 103 insertions(+), 37 deletions(-)
diff --git a/l3kernel/l3text-case.dtx b/l3kernel/l3text-case.dtx
index 12c18e2f9..861d702cb 100644
--- a/l3kernel/l3text-case.dtx
+++ b/l3kernel/l3text-case.dtx
@@ -710,7 +710,7 @@
{ \exp_not:n {#4} }
{
\codepoint_generate:nn {#1}
- { \@@_char_catcode:N #4 }
+ { \@@_change_case_catcode:nn {#4} {#1} }
\tl_if_blank:nF {#2}
{
\codepoint_generate:nn {#2}
@@ -723,6 +723,33 @@
}
}
}
+% \end{macrocode}
+% We need to ensure that only valid catcode-extraction is attempted. That's
+% fine with Unicode engines but needs a bit of work with 8-bit ones. The
+% logic is that if the original codepoint was in the ASCII range, we keep
+% the catcode. Otherwise, if the target is in the ASCII range, we use
+% the standard catcode. If neither are true, we set as 13 on the grounds that
+% this will be what is used anyway!
+% \begin{macrocode}
+\bool_lazy_or:nnTF
+ { \sys_if_engine_luatex_p: }
+ { \sys_if_engine_xetex_p: }
+ {
+ \cs_new:Npn \@@_change_case_catcode:nn #1#2
+ { \@@_char_catcode:N #1 }
+ }
+ {
+ \cs_new:Npn \@@_change_case_catcode:nn #1#2
+ {
+ \@@_codepoint_compare:nNnTF {#1} < { "80 }
+ { \@@_char_catcode:N #1 }
+ {
+ \int_compare:nNnTF {#2} < { "80 }
+ { \char_value_catcode:n {#2} }
+ { 13 }
+ }
+ }
+ }
\cs_new:Npn \@@_change_case_next_lower:nn #1#2
{ \@@_change_case_loop:nnw {#1} {#2} }
\cs_new_eq:NN \@@_change_case_next_upper:nn
@@ -810,7 +837,10 @@
\@@_codepoint_compare:nNnTF {#4} = { "00DF }
{
\@@_change_case_store:e
- { \codepoint_generate:nn { "1E9E } { \@@_char_catcode:N #4 } }
+ {
+ \codepoint_generate:nn { "1E9E }
+ { \@@_change_case_catcode:nn {#4} { "1E9E } }
+ }
\use:c { @@_change_case_next_ #2 :nn }
{#2} {#3}
}
@@ -1038,8 +1068,14 @@
\bool_lazy_or:nnTF
{ \@@_codepoint_compare_p:nNn {#1} = { "0399 } }
{ \@@_codepoint_compare_p:nNn {#1} = { "03B9 } }
- { \codepoint_generate:nn { "03AA } { \@@_char_catcode:N #1 } }
- { \codepoint_generate:nn { "03AB } { \@@_char_catcode:N #1 } }
+ {
+ \codepoint_generate:nn { "03AA }
+ { \@@_change_case_catcode:nn {#1} { "03AA } }
+ }
+ {
+ \codepoint_generate:nn { "03AB }
+ { \@@_change_case_catcode:nn {#1} { "03AB } }
+ }
}
}
% \end{macrocode}
@@ -1318,7 +1354,10 @@
{ \@@_change_case_boundary_upper_el:nnnN {#3} }
{
\@@_change_case_store:e
- { \codepoint_generate:nn { "0389 } { \@@_char_catcode:N #3 } }
+ {
+ \codepoint_generate:nn { "0389 }
+ { \@@_change_case_catcode:nn {#3} { "0389 } }
+ }
\@@_change_case_loop:nnw
}
{#1} {#2} #4 \q_@@_recursion_stop
@@ -1336,7 +1375,10 @@
{ \@@_change_case_loop:nnw {#2} {#3} #1#4 }
{
\@@_change_case_store:e
- { \codepoint_generate:nn { "0389 } { \@@_char_catcode:N #1 } }
+ {
+ \codepoint_generate:nn { "0389 }
+ { \@@_change_case_catcode:nn {#1} { "0389 } }
+ }
\@@_change_case_loop:nnw {#2} {#3} #4
}
}
@@ -1370,8 +1412,10 @@
{
\@@_change_case_store:e
{
- \codepoint_generate:nn { "0535 } { \@@_char_catcode:N #4 }
- \codepoint_generate:nn { "054E } { \@@_char_catcode:N #4 }
+ \codepoint_generate:nn { "0535 }
+ { \@@_change_case_catcode:nn {#4} { "0535 } }
+ \codepoint_generate:nn { "054E }
+ { \@@_change_case_catcode:nn {#4} { "054E } }
}
\use:c { @@_change_case_next_ #2 :nn }
{#2} {#3}
@@ -1384,8 +1428,10 @@
{
\@@_change_case_store:e
{
- \codepoint_generate:nn { "0535 } { \@@_char_catcode:N #4 }
- \codepoint_generate:nn { "057E } { \@@_char_catcode:N #4 }
+ \codepoint_generate:nn { "0535 }
+ { \@@_change_case_catcode:nn {#4} { "0535 } }
+ \codepoint_generate:nn { "057E }
+ { \@@_change_case_catcode:nn {#4} { "057E } }
}
\use:c { @@_change_case_next_ #2 :nn }
{#2} {#3}
@@ -1405,12 +1451,10 @@
% \begin{macrocode}
\cs_new:cpn { @@_change_case_lower_la-x-medieval:nnnn } #1#2#3#4
{
- \@@_codepoint_compare:nNnTF {#4} = { `V }
+ \@@_codepoint_compare:nNnTF {#4} = { "0056 }
{
\@@_change_case_store:e
- {
- \char_generate:nn { `u } { \@@_char_catcode:N #4 }
- }
+ { \char_generate:nn { "0075 } { \@@_char_catcode:N #4 } }
\use:c { @@_change_case_next_ #2 :nn }
{#2} {#3}
}
@@ -1418,12 +1462,10 @@
}
\cs_new:cpn { @@_change_case_upper_la-x-medieval:nnnn } #1#2#3#4
{
- \@@_codepoint_compare:nNnTF {#4} = { `u }
+ \@@_codepoint_compare:nNnTF {#4} = { "0075 }
{
\@@_change_case_store:e
- {
- \char_generate:nn { `V } { \@@_char_catcode:N #4 }
- }
+ { \char_generate:nn { "0056 } { \@@_char_catcode:N #4 } }
\use:c { @@_change_case_next_ #2 :nn }
{#2} {#3}
}
@@ -1483,9 +1525,12 @@
{
\@@_change_case_store:e
{
- \codepoint_generate:nn { "0069 } { \@@_char_catcode:N #4 }
- \codepoint_generate:nn { "0307 } { \@@_char_catcode:N #4 }
- \codepoint_generate:nn {#1} { \@@_char_catcode:N #4 }
+ \codepoint_generate:nn { "0069 }
+ { \@@_change_case_catcode:nn {#4} { "0069 } }
+ \codepoint_generate:nn { "0307 }
+ { \@@_change_case_catcode:nn {#4} { "0307 } }
+ \codepoint_generate:nn {#1}
+ { \@@_change_case_catcode:nn {#4} {#1} }
}
\@@_change_case_loop:nnw {#2} {#3}
}
@@ -1501,7 +1546,10 @@
{ \@@_change_case_codepoint:nnnn {#2} {#2} {#3} {#4} }
{
\@@_change_case_store:e
- { \codepoint_generate:nn {#1} { \@@_char_catcode:N #4 } }
+ {
+ \codepoint_generate:nn {#1}
+ { \@@_change_case_catcode:nn {#4} {#1} }
+ }
\@@_change_case_lower_lt:nnw {#2} {#3}
}
}
@@ -1535,7 +1583,10 @@
}
{
\@@_change_case_store:e
- { \codepoint_generate:nn { "0307 } { \@@_char_catcode:N #3 } }
+ {
+ \codepoint_generate:nn { "0307 }
+ { \@@_change_case_catcode:nn {#3} { "0307 } }
+ }
}
\@@_change_case_loop:nnw {#1} {#2} #3
}
@@ -1574,7 +1625,10 @@
{ \@@_change_case_codepoint:nnnn { upper } {#2} {#3} {#4} }
{
\@@_change_case_store:e
- { \codepoint_generate:nn {#1} { \@@_char_catcode:N #4 } }
+ {
+ \codepoint_generate:nn {#1}
+ { \@@_change_case_catcode:nn {#4} {#1} }
+ }
\@@_change_case_upper_lt:nnw {#2} {#3}
}
}
@@ -1680,7 +1734,10 @@
\@@_codepoint_compare:nNnTF {#4} = { "0130 }
{
\@@_change_case_store:e
- { \codepoint_generate:nn { "0069 } { \@@_char_catcode:N #4 } }
+ {
+ \codepoint_generate:nn { "0069 }
+ { \@@_change_case_catcode:nn {#4} { "0069 } }
+ }
\@@_change_case_loop:nnw {#1} {#3}
}
{ \@@_change_case_codepoint:nnnn {#1} {#2} {#3} {#4} }
@@ -1698,7 +1755,10 @@
{ \@@_change_case_lower_tr:NnnN #3 {#1} {#2} }
{
\@@_change_case_store:e
- { \codepoint_generate:nn { "0131 } { \@@_char_catcode:N #3 } }
+ {
+ \codepoint_generate:nn { "0131 }
+ { \@@_change_case_catcode:nn {#3} { "0131 } }
+ }
\@@_change_case_loop:nnw {#1} {#2}
}
#4 \q_@@_recursion_stop
@@ -1719,12 +1779,18 @@
{ ! \@@_codepoint_compare_p:nNn {#4} = { "0307 } }
{
\@@_change_case_store:e
- { \codepoint_generate:nn { "0131 } { \@@_char_catcode:N #1 } }
+ {
+ \codepoint_generate:nn { "0131 }
+ { \@@_change_case_catcode:nn {#1} { "0131 } }
+ }
\@@_change_case_loop:nnw {#2} {#3} #4
}
{
\@@_change_case_store:e
- { \codepoint_generate:nn { "0069 } { \@@_char_catcode:N #1 } }
+ {
+ \codepoint_generate:nn { "0069 }
+ { \@@_change_case_catcode:nn {#1} { "0069 } }
+ }
\@@_change_case_loop:nnw {#2} {#3}
}
}
@@ -1743,7 +1809,7 @@
\@@_change_case_store:e
{
\codepoint_generate:nn { "0130 }
- { \@@_char_catcode:N #4 }
+ { \@@_change_case_catcode:nn {#4} { "0130 } }
}
\use:c { @@_change_case_next_ #2 :nn } {#2} {#3}
}
diff --git a/l3kernel/l3unicode.dtx b/l3kernel/l3unicode.dtx
index e7466d9a6..90f6ccaa1 100644
--- a/l3kernel/l3unicode.dtx
+++ b/l3kernel/l3unicode.dtx
@@ -55,7 +55,7 @@
% Unicode \meta{codepoints} and are designed to give useable results with
% both Unicode-aware and $8$-bit engines.
%
-% \begin{function}[EXP, added = 2022-10-09]
+% \begin{function}[EXP, added = 2022-10-09, updated = 2022-11-09]
% {\codepoint_generate:nn}
% \begin{syntax}
% \cs{codepoint_generate:nn} \Arg{codepoint} \Arg{catcode}
@@ -80,8 +80,8 @@
% produced: these will be the bytes of the UTF-8 representation of the
% \meta{codepoint}. For all codepoints outside of the classical ASCII
% range, the generated character tokens will be active (category code
-% $13$); for codepoints in the ASCII range, the prevailing category code of
-% the character is used. To allow the result of this function to be used
+% $13$); for codepoints in the ASCII range, the given \meta{catcode}
+% will be used. To allow the result of this function to be used
% inside a expansion context, the result is protected by \cs{exp_not:n}.
% \end{function}
%
@@ -191,7 +191,11 @@
\int_compare:nNnTF {#1} = { `\ }
{ ~ }
{
- \int_compare:nNnTF {#1} > { "80 }
+ \int_compare:nNnTF {#1} < { "80 }
+ {
+ \__kernel_exp_not:w \exp_after:wN \exp_after:wN \exp_after:wN
+ { \char_generate:nn {#1} {#2} }
+ }
{
\use:e
{
@@ -199,10 +203,6 @@
\__kernel_codepoint_to_bytes:n {#1}
}
}
- {
- \__kernel_exp_not:w \exp_after:wN \exp_after:wN \exp_after:wN
- { \char_generate:nn {#1} { \char_value_catcode:n {#1} } }
- }
}
}
\cs_new:Npn \@@_generate:nnnn #1#2#3#4
More information about the latex3-commits
mailing list.