[latex3-commits] [git/LaTeX3-latex3-latex3] main: Preserve catcode when case changing 8-bit chars < "80 (a6aeb66e7)
Joseph Wright
joseph.wright at morningstar2.co.uk
Wed Nov 9 09:51:17 CET 2022
Repository : https://github.com/latex3/latex3
On branch : main
Link : https://github.com/latex3/latex3/commit/a6aeb66e75ae448d1d2927e9c20a7f5812d21932
>---------------------------------------------------------------
commit a6aeb66e75ae448d1d2927e9c20a7f5812d21932
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date: Wed Nov 9 08:48:18 2022 +0000
Preserve catcode when case changing 8-bit chars < "80
As discussed with the team.
Note that active chars already have independent handling.
>---------------------------------------------------------------
a6aeb66e75ae448d1d2927e9c20a7f5812d21932
l3kernel/l3text-case.dtx | 44 +++++++++++++++++++++++++++++++++-----------
l3kernel/l3unicode.dtx | 16 ++++++++--------
2 files changed, 41 insertions(+), 19 deletions(-)
diff --git a/l3kernel/l3text-case.dtx b/l3kernel/l3text-case.dtx
index 12c18e2f9..ff8c2640d 100644
--- a/l3kernel/l3text-case.dtx
+++ b/l3kernel/l3text-case.dtx
@@ -704,21 +704,43 @@
{
\use:e { \@@_change_case_codepoint_aux:nnnn #1 {#2} }
}
-\cs_new:Npn \@@_change_case_codepoint_aux:nnnn #1#2#3#4
+% \end{macrocode}
+% We need to ensure that only valid catcode-extraction is attempted. That's
+% fine with Unicode engines but needs a bit of work with 8-bit ones. The
+% logic is that if the original codepoint was in the ASCII range, we keep
+% the catcode. Otherwise, if the target is in the ASCII range, we use
+% the standard catcode. If neither are true, we set as 13 on the grounds that
+% this will be what is used anyway!
+% \begin{macrocode}
+\cs_new:Npx \@@_change_case_codepoint_aux:nnnn #1#2#3#4
{
- \@@_codepoint_compare:nNnTF {#4} = {#1}
- { \exp_not:n {#4} }
+ \exp_not:N \@@_codepoint_compare:nNnTF {#4} = {#1}
+ { \exp_not:N \exp_not:n {#4} }
{
- \codepoint_generate:nn {#1}
- { \@@_char_catcode:N #4 }
- \tl_if_blank:nF {#2}
+ \exp_not:N \codepoint_generate:nn {#1}
+ {
+ \bool_lazy_or:nnTF
+ { \sys_if_engine_luatex_p: }
+ { \sys_if_engine_xetex_p: }
+ { \exp_not:N \@@_char_catcode:N #4 }
+ {
+ \exp_not:N \@@_codepoint_compare:nNnTF {#4} < { "80 }
+ { \exp_not:N \@@_char_catcode:N #4 }
+ {
+ \exp_not:N \int_compare:nNnTF {#1} < { "80 }
+ { \exp_not:N \char_value_catcode:n {#1} }
+ { 13 }
+ }
+ }
+ }
+ \exp_not:N \tl_if_blank:nF {#2}
{
- \codepoint_generate:nn {#2}
- { \char_value_catcode:n {#2} }
- \tl_if_blank:nF {#3}
+ \exp_not:N \codepoint_generate:nn {#2}
+ { \exp_not:N \char_value_catcode:n {#2} }
+ \exp_not:N \tl_if_blank:nF {#3}
{
- \codepoint_generate:nn {#3}
- { \char_value_catcode:n {#3} }
+ \exp_not:N \codepoint_generate:nn {#3}
+ { \exp_not:N \char_value_catcode:n {#3} }
}
}
}
diff --git a/l3kernel/l3unicode.dtx b/l3kernel/l3unicode.dtx
index e7466d9a6..90f6ccaa1 100644
--- a/l3kernel/l3unicode.dtx
+++ b/l3kernel/l3unicode.dtx
@@ -55,7 +55,7 @@
% Unicode \meta{codepoints} and are designed to give useable results with
% both Unicode-aware and $8$-bit engines.
%
-% \begin{function}[EXP, added = 2022-10-09]
+% \begin{function}[EXP, added = 2022-10-09, updated = 2022-11-09]
% {\codepoint_generate:nn}
% \begin{syntax}
% \cs{codepoint_generate:nn} \Arg{codepoint} \Arg{catcode}
@@ -80,8 +80,8 @@
% produced: these will be the bytes of the UTF-8 representation of the
% \meta{codepoint}. For all codepoints outside of the classical ASCII
% range, the generated character tokens will be active (category code
-% $13$); for codepoints in the ASCII range, the prevailing category code of
-% the character is used. To allow the result of this function to be used
+% $13$); for codepoints in the ASCII range, the given \meta{catcode}
+% will be used. To allow the result of this function to be used
% inside a expansion context, the result is protected by \cs{exp_not:n}.
% \end{function}
%
@@ -191,7 +191,11 @@
\int_compare:nNnTF {#1} = { `\ }
{ ~ }
{
- \int_compare:nNnTF {#1} > { "80 }
+ \int_compare:nNnTF {#1} < { "80 }
+ {
+ \__kernel_exp_not:w \exp_after:wN \exp_after:wN \exp_after:wN
+ { \char_generate:nn {#1} {#2} }
+ }
{
\use:e
{
@@ -199,10 +203,6 @@
\__kernel_codepoint_to_bytes:n {#1}
}
}
- {
- \__kernel_exp_not:w \exp_after:wN \exp_after:wN \exp_after:wN
- { \char_generate:nn {#1} { \char_value_catcode:n {#1} } }
- }
}
}
\cs_new:Npn \@@_generate:nnnn #1#2#3#4
More information about the latex3-commits
mailing list.