[latex3-commits] [l3svn] 01/03: Making \char_generate:nn fully expandable
noreply at latex-project.org
noreply at latex-project.org
Thu Sep 10 17:54:13 CEST 2015
This is an automated email from the git hooks/post-receive script.
joseph pushed a commit to branch master
in repository l3svn.
commit cb5d10b9f42c3d4efac7df47477ed3d5603694f9
Author: Qing Lee <sobenlee at gmail.com>
Date: Thu Sep 10 19:29:20 2015 +0800
Making \char_generate:nn fully expandable
It is exactly two expansion steps to generate the result.
---
l3kernel/l3candidates.dtx | 141 +++++++++++++++++---------------
l3kernel/testfiles/m3char001.xetex.tlg | 13 +--
2 files changed, 83 insertions(+), 71 deletions(-)
diff --git a/l3kernel/l3candidates.dtx b/l3kernel/l3candidates.dtx
index f312878..a933028 100644
--- a/l3kernel/l3candidates.dtx
+++ b/l3kernel/l3candidates.dtx
@@ -822,7 +822,7 @@
% letter-like characters which can also be case-changed). Begin-group and
% end-group characters in the \meta{tokens} are normalized and become |{|
% and |}|, respectively.
-%
+%
% Importantly, notice that these functions are intended for working with
% user text for typesetting. For case changing programmatic data see the
% \pkg{l3str} module and discussion there of \cs{str_lower_case:n},
@@ -833,7 +833,7 @@
% input in the form of token lists or expandable functions will be expanded
% \emph{unless} it falls within one of the special handling classes described
% below. This expansion approach means that in general the result of case
-% changing will match the \enquote{natural} outcome expected from a
+% changing will match the \enquote{natural} outcome expected from a
% \enquote{functional} approach to case modification. For example
% \begin{verbatim}
% \tl_set:Nn \l_tmpa_tl { hello }
@@ -859,7 +859,7 @@
% \end{verbatim}
% Material inside math mode is left entirely unchanged: in particular, no
% expansion is undertaken.
-%
+%
% Detection of math mode is controlled by the list of tokens in
% \cs{l_tl_case_change_math_tl}, which should be in open--close pairs. In
% package mode the standard settings is
@@ -875,7 +875,7 @@
% the case changing functions are intended to apply to this should not be
% an issue.
% \end{variable}
-%
+%
% \begin{variable}{\l_tl_case_change_exclude_tl}
% Case changing can be prevented by using any command on the list
% \cs{l_tl_case_change_exclude_tl}. Each entry should be a function
@@ -902,7 +902,7 @@
% and |\ref| are automatically included in the list for exclusion from
% case changing.
% \end{variable}
-%
+%
% In package mode, the case change system will also
% convert text stored using the \LaTeXe{} \enquote{LICR} approach. This
% will upper/lower case tokens as implemented for the font encodings
@@ -926,12 +926,12 @@
% \end{verbatim}
% When finding the first \enquote{letter} for this process, any content in
% math mode or covered by \cs{l_tl_case_change_exclude_tl} is ignored.
-%
+%
% (Note that the Unicode Consortium describe this as \enquote{title case}, but
% that in English title case applies on a word-by-word basis. The
% \enquote{mixed} case implemented here is a lower level concept needed for
% both \enquote{title} and \enquote{sentence} casing of text.)
-%
+%
% \begin{variable}{\l_tl_mixed_case_ignore_tl}
% The list of characters to ignore when searching for the first
% \enquote{letter} in mixed-casing is determined by
@@ -1040,7 +1040,7 @@
% \cs{char_set_active_eq:NN} \meta{char} \meta{function}
% \end{syntax}
% Sets the behaviour of the \meta{char} in situations where it is
-% active (category code $13$) to be equivalent to that of the
+% active (category code $13$) to be equivalent to that of the
% \meta{function}. The category code of the \meta{char} is
% \emph{unchanged} by this process. The \meta{function} may itself
% be an active character.
@@ -1061,7 +1061,7 @@
%
% \begin{function}[EXP, added = 2015-09-09]{\char_generate:nn}
% \begin{syntax}
-% \cs{char_generate:nn} \meta{charcode} \meta{catcode}
+% \cs{char_generate:nn} \Arg{charcode} \Arg{catcode}
% \end{syntax}
% Generates a character token of the given \meta{charcode} and \meta{catcode}
% (both of which may be integer expressions). The \meta{catcode} may be
@@ -2994,7 +2994,7 @@
\cs_generate_variant:Nn \tl_if_head_eq_catcode:nNTF { o }
% \end{macrocode}
% \end{macro}
-%
+%
% \begin{macro}[EXP]{\tl_lower_case:n, \tl_upper_case:n, \tl_mixed_case:n}
% \begin{macro}[EXP]{\tl_lower_case:nn, \tl_upper_case:nn, \tl_mixed_case:nn}
% The user level functions here are all wrappers around the internal
@@ -3294,7 +3294,7 @@
% a string then examining the first four characters. For Cyrillic, the
% fourth character can be used for a second split based on the character
% code.
-%
+%
% Note that as this is dependent on \LaTeXe{}, in format mode the code
% goes straight to the final phase of handling control sequences.
% \begin{macrocode}
@@ -3333,8 +3333,8 @@
\cs_new:Npn \@@_change_case_cs_cyr:NnNNNNw #1#2#3#4#5#6#7 \q_stop
{
\@@_change_case_cs_type:Nnnnn #1
- { cyrillic }
- {
+ { cyrillic }
+ {
#2 _
\int_to_roman:n
{
@@ -3462,7 +3462,7 @@
% \end{macro}
% \end{macro}
% \end{macro}
-%
+%
% \begin{macro}[aux, EXP]{\@@_change_case_lower_sigma:Nnw}
% \begin{macro}[aux, EXP]{\@@_change_case_lower_sigma:w}
% \begin{macro}[aux, EXP]{\@@_change_case_lower_sigma:Nw}
@@ -3767,7 +3767,7 @@
% to generic routines, but at the cost of making the process rather opaque.
% Instead, the approach taken here is to use a dedicated set of functions
% which keep the different loop requirements clearly separate.
-%
+%
% The main loop looks for the first \enquote{real} char in the input
% (skipping any pre-letter chars). Once one is found, it is case changed to
% upper case but first checking that there is not an entry in the exceptions
@@ -3862,7 +3862,7 @@
\@@_change_case_cs:Nnnn #1 { upper }
{
\@@_change_case_loop:wnn
- #2 \q_recursion_stop { lower } {#3}
+ #2 \q_recursion_stop { lower } {#3}
}
{
\@@_change_case_cs:N #1
@@ -4016,7 +4016,7 @@
%</package>
% \end{macrocode}
% \end{variable}
-%
+%
% \begin{variable}{\l_tl_mixed_case_ignore_tl}
% Characters to skip over when finding the first letter in a word to be
% mixed cased.
@@ -4157,7 +4157,7 @@
\cyrishrt \CYRISHRT
\cyrishrtdsc \CYRISHRTDSC
\cyrizh \CYRIZH
- \cyrje \CYRJE
+ \cyrje \CYRJE
\cyrk \CYRK
\cyrkbeak \CYRKBEAK
\cyrkdsc \CYRKDSC
@@ -4225,7 +4225,7 @@
\cyrz \CYRZ
\cyrzdsc \CYRZDSC
\cyrzh \CYRZH
- \cyrzhdsc \CYRZHDSC
+ \cyrzhdsc \CYRZHDSC
}
{ \q_recursion_tail }
\@@_change_case_setup:nnnn
@@ -4280,7 +4280,7 @@
\accdialytikatonos { \exp_stop_f: \LGR at accDialytika }
\accdialytikavaria { \exp_stop_f: \LGR at accDialytika }
\accdialytikaperispomeni { \exp_stop_f: \LGR at accDialytika }
- \accperispomeni { \exp_stop_f: \LGR at accdropped }
+ \accperispomeni { \exp_stop_f: \LGR at accdropped }
}
\tl_const:Nn \c_@@_change_case_acc_lower_tl { }
\tl_const:Nn \c_@@_change_case_misc_upper_tl
@@ -4378,8 +4378,9 @@
% \begin{macrocode}
\cs_new:Npn \char_generate:nn #1#2
{
- \exp_args:Nff \@@_generate_auxi:nn
- { \int_eval:n {#1} } { \int_eval:n {#2} }
+ \exp:w \exp_after:wN \@@_generate_auxi:ww
+ \int_use:N \__int_eval:w #1 \exp_after:wN ;
+ \int_use:N \__int_eval:w #2 ;
}
% \end{macrocode}
% Before doing any actual conversion, first some special case filtering.
@@ -4391,7 +4392,7 @@
% done with macro emulation either, so is flagged up separately. That
% done, hand off to the engine-dependent part.
% \begin{macrocode}
-\cs_new:Npn \@@_generate_auxi:nn #1#2
+\cs_new:Npn \@@_generate_auxi:ww #1 ; #2 ;
{
\if_int_compare:w #2 = \c_thirteen
\__msg_kernel_expandable_error:nn { kernel } { char-active }
@@ -4403,9 +4404,19 @@
\__msg_kernel_expandable_error:nn { kernel } { char-space }
\fi:
\else:
- \@@_generate_auxii:nn {#1} {#2}
+ \if_int_odd:w 0
+ \if_int_compare:w #2 < \c_one 1 \fi:
+ \if_int_compare:w #2 = \c_five 1 \fi:
+ \if_int_compare:w #2 = \c_nine 1 \fi:
+ \if_int_compare:w #2 > \c_thirteen 1 \fi: \exp_stop_f:
+ \__msg_kernel_expandable_error:nn { kernel }
+ { char-invalid-catcode }
+ \else:
+ \@@_generate_auxii:nn {#1} {#2}
+ \fi:
\fi:
- \fi:
+ \fi:
+ \exp_end:
}
\tl_new:N \l_@@_tmp_tl
% \end{macrocode}
@@ -4421,39 +4432,36 @@
\cs_set_nopar:Npn ^^L { }
%</package>
\char_set_catcode_other:n { 0 }
- \if_int_compare:w 0
- \cs_if_exist:NT \luatex_directlua:D { 1 }
- \cs_if_exist:NT \utex_charcat:D { 1 }
- > \c_zero
+ \if_int_odd:w 0
+ \cs_if_exist:NT \luatex_directlua:D { 1 }
+ \cs_if_exist:NT \utex_charcat:D { 1 } \exp_stop_f:
\cs_new:Npn \@@_generate_auxii:nn #1#2
{
- \if_int_compare:w 0
- \if_int_compare:w #2 < \c_one 1 \fi:
- \if_int_compare:w #2 = \c_five 1 \fi:
- \if_int_compare:w #2 = \c_nine 1 \fi:
- \if_int_compare:w #2 > \c_thirteen 1 \fi:
- > \c_zero
- \@@_generate_invalid_catcode:
- \else:
- \if_int_compare:w 0
+ \if_int_odd:w 0
\if_int_compare:w #1 < \c_zero 1 \fi:
- \if_int_compare:w #1 > 1114111 ~ 1 \fi:
- > \c_one
- \__msg_kernel_expandable_error:nn { kernel }
- { char-out-of-range }
- \else:
- \@@_generate_auxiii:nn {#1} {#2}
- \fi:
+ \if_int_compare:w #1 > 1114111 ~ 1 \fi: \exp_stop_f:
+ \__msg_kernel_expandable_error:nn { kernel }
+ { char-out-of-range }
+ \else:
+ \@@_generate_auxiii:nnw {#1} {#2}
\fi:
}
\cs_if_exist:NTF \luatex_directlua:D
{
- \cs_new:Npn \@@_generate_auxiii:nn #1#2
- { \lua_now_x:n { l3kernel.charcat(#1, #2) } }
+ \cs_new:Npn \@@_generate_auxiii:nnw #1#2#3 \exp_end:
+ {
+ #3
+ \exp_after:wN \exp_end:
+ \luatex_directlua:D { l3kernel.charcat(#1, #2) }
+ }
}
{
- \cs_new:Npn \@@_generate_auxiii:nn #1#2
- { \utex_charcat:D #1 ~ #2 ~ }
+ \cs_new:Npn \@@_generate_auxiii:nnw #1#2#3 \exp_end:
+ {
+ #3
+ \exp_after:wN \exp_end:
+ \utex_charcat:D #1 ~ #2 ~
+ }
}
\else:
% \end{macrocode}
@@ -4464,15 +4472,14 @@
% is all done such that it can be quickly accessed using a |\if_case:w|
% low-level conditional. There are a few things to notice here.
% As |^^L| is |\outer| we need to locally set it to avoid a problem.
-% To get open/close braces into the list, they are set up using |if_false:|
+% To get open/close braces into the list, they are set up using |\if_false:|
% pairing here and will later be |x|-type expanded into the desired form.
% For making spaces, there needs to be an |o|-type expansion of a |\use:n|
% (or some other tokenization) to avoid dropping the space. We also
% set up active tokens although they are (currently) filtered out by the
-% interface layer (\tn{Ucharcat}| cannot make active tokens).
+% interface layer (\tn{Ucharcat} cannot make active tokens).
% \begin{macrocode}
- \tl_set:Nn \l_@@_tmp_tl
- { \exp_not:n { \@@_generate_invalid_catcode: \or: } }
+ \tl_set:Nn \l_@@_tmp_tl { \exp_not:N \or: }
\char_set_catcode_group_begin:n { 0 } % {
\tl_put_right:Nn \l_@@_tmp_tl { ^^@ \if_false: } }
\char_set_catcode_group_end:n { 0 }
@@ -4482,14 +4489,14 @@
\tl_put_right:Nn \l_@@_tmp_tl { \or: ^^@ }
\char_set_catcode_alignment:n { 0 }
\tl_put_right:Nn \l_@@_tmp_tl { \or: ^^@ }
- \tl_put_right:Nn \l_@@_tmp_tl { \or: \@@_generate_invalid_catcode: }
+ \tl_put_right:Nn \l_@@_tmp_tl { \or: }
\char_set_catcode_parameter:n { 0 }
\tl_put_right:Nn \l_@@_tmp_tl { \or: ^^@ }
\char_set_catcode_math_superscript:n { 0 }
\tl_put_right:Nn \l_@@_tmp_tl { \or: ^^@ }
\char_set_catcode_math_subscript:n { 0 }
\tl_put_right:Nn \l_@@_tmp_tl { \or: ^^@ }
- \tl_put_right:Nn \l_@@_tmp_tl { \or: \@@_generate_invalid_catcode: }
+ \tl_put_right:Nn \l_@@_tmp_tl { \or: }
\char_set_catcode_space:n { 0 }
\tl_put_right:No \l_@@_tmp_tl { \use:n { \or: } ^^@ }
\char_set_catcode_letter:n { 0 }
@@ -4498,7 +4505,6 @@
\tl_put_right:Nn \l_@@_tmp_tl { \or: ^^@ }
\char_set_catcode_active:n { 0 }
\tl_put_right:Nn \l_@@_tmp_tl { \or: ^^@ }
- \tl_put_right:Nn \l_@@_tmp_tl { \else: \@@_generate_invalid_catcode: }
% \end{macrocode}
% Convert the above temporary list into a series of constant token
% lists, one for each character code, using \tn{tex_lowercase:D} to
@@ -4525,25 +4531,28 @@
\cs_new:Npn \@@_generate_auxii:nn #1#2
{
\tl_if_exist:cTF { c_@@_ \__int_to_roman:w #1 _tl }
- {
- \exp_after:wN \exp_after:wN
- \if_case:w #2
- \exp_last_unbraced:Nv \exp_stop_f:
- { c_@@_ \__int_to_roman:w #1 _tl }
- \fi:
- }
+ { \@@_generate_auxiii:nnw {#1} {#2} }
{
\__msg_kernel_expandable_error:nn { kernel }
{ char-out-of-range }
}
}
+ \cs_new:Npn \@@_generate_auxiii:nnw #1#2#3 \exp_end:
+ {
+ #3
+ \exp_after:wN \exp_after:wN
+ \exp_after:wN \exp_end:
+ \exp_after:wN \exp_after:wN
+ \if_case:w #2
+ \exp_last_unbraced:Nv \exp_stop_f:
+ { c_@@_ \__int_to_roman:w #1 _tl }
+ \fi:
+ }
\fi:
\group_end:
% \end{macrocode}
-% Job done, set up a few messages.
+% Job done, set up a few messages.
% \begin{macrocode}
-\cs_new:Npn \@@_generate_invalid_catcode:
- { \__msg_kernel_expandable_error:nn { kernel } { char-invalid-catcode } }
\__msg_kernel_new:nnn { kernel } { char-active }
{ Cannot~generate~active~chars. }
\__msg_kernel_new:nnn { kernel } { char-invalid-catcode }
diff --git a/l3kernel/testfiles/m3char001.xetex.tlg b/l3kernel/testfiles/m3char001.xetex.tlg
index 0c3a6c6..7182c1c 100644
--- a/l3kernel/testfiles/m3char001.xetex.tlg
+++ b/l3kernel/testfiles/m3char001.xetex.tlg
@@ -326,12 +326,15 @@ l. ... }
============================================================
TEST 4: Errors
============================================================
-! Bad character code (-1).
-\__char_generate_auxiii:nn ...>\utex_charcat:D #1
-#2
+! Undefined control sequence.
+<argument> \LaTeX3 error:
+Charcode requested out of engine range.
l. ... }
-A Unicode scalar value must be between 0 and "10FFFF.
-I changed this one to zero.
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
! Undefined control sequence.
<argument> \LaTeX3 error:
Invalid catcode for char generation.
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.
More information about the latex3-commits
mailing list