[latex3-commits] [l3svn] 01/03: Making \char_generate:nn fully expandable

Thu Sep 10 17:54:13 CEST 2015

This is an automated email from the git hooks/post-receive script.

joseph pushed a commit to branch master
in repository l3svn.

commit cb5d10b9f42c3d4efac7df47477ed3d5603694f9
Author: Qing Lee <sobenlee at gmail.com>
Date:   Thu Sep 10 19:29:20 2015 +0800

    Making \char_generate:nn fully expandable
    
    It is exactly two expansion steps to generate the result.
---
 l3kernel/l3candidates.dtx              |  141 +++++++++++++++++---------------
 l3kernel/testfiles/m3char001.xetex.tlg |   13 +--
 2 files changed, 83 insertions(+), 71 deletions(-)

diff --git a/l3kernel/l3candidates.dtx b/l3kernel/l3candidates.dtx
index f312878..a933028 100644
--- a/l3kernel/l3candidates.dtx
+++ b/l3kernel/l3candidates.dtx
@@ -822,7 +822,7 @@
 %   letter-like characters which can also be case-changed).  Begin-group and
 %   end-group characters in the \meta{tokens} are normalized and become |{|
 %   and |}|, respectively.
-%   
+%
 %   Importantly, notice that these functions are intended for working with
 %   user text for typesetting. For case changing programmatic data see the
 %   \pkg{l3str} module and discussion there of \cs{str_lower_case:n},
@@ -833,7 +833,7 @@
 % input in the form of token lists or expandable functions will be expanded
 % \emph{unless} it falls within one of the special handling classes described
 % below. This expansion approach means that in general the result of case
-% changing will match the \enquote{natural} outcome expected from a 
+% changing will match the \enquote{natural} outcome expected from a
 % \enquote{functional} approach to case modification. For example
 % \begin{verbatim}
 %   \tl_set:Nn \l_tmpa_tl { hello }
@@ -859,7 +859,7 @@
 %   \end{verbatim}
 %   Material inside math mode is left entirely unchanged: in particular, no
 %   expansion is undertaken.
-%   
+%
 %   Detection of math mode is controlled by the list of tokens in
 %   \cs{l_tl_case_change_math_tl}, which should be in open--close pairs. In
 %   package mode the standard settings is
@@ -875,7 +875,7 @@
 %   the case changing functions are intended to apply to this should not be
 %   an issue.
 % \end{variable}
-%   
+%
 % \begin{variable}{\l_tl_case_change_exclude_tl}
 %   Case changing can be prevented by using any command on the list
 %   \cs{l_tl_case_change_exclude_tl}. Each entry should be a function
@@ -902,7 +902,7 @@
 %   and |\ref| are automatically included in the list for exclusion from
 %   case changing.
 % \end{variable}
-% 
+%
 % In package mode, the case change system will also
 % convert text stored using the \LaTeXe{} \enquote{LICR} approach. This
 % will upper/lower case tokens as implemented for the font encodings
@@ -926,12 +926,12 @@
 % \end{verbatim}
 % When finding the first \enquote{letter} for this process, any content in
 % math mode or covered by \cs{l_tl_case_change_exclude_tl} is ignored.
-% 
+%
 % (Note that the Unicode Consortium describe this as \enquote{title case}, but
 % that in English title case applies on a word-by-word basis. The
 % \enquote{mixed} case implemented here is a lower level concept needed for
 % both \enquote{title} and \enquote{sentence} casing of text.)
-% 
+%
 % \begin{variable}{\l_tl_mixed_case_ignore_tl}
 %   The list of characters to ignore when searching for the first
 %   \enquote{letter} in mixed-casing is determined by
@@ -1040,7 +1040,7 @@
 %      \cs{char_set_active_eq:NN} \meta{char} \meta{function}
 %   \end{syntax}
 %   Sets the behaviour of the \meta{char} in situations where it is
-%   active (category code $13$) to be equivalent to that of the 
+%   active (category code $13$) to be equivalent to that of the
 %   \meta{function}. The category code of the \meta{char} is
 %   \emph{unchanged} by this process. The \meta{function} may itself
 %   be an active character.
@@ -1061,7 +1061,7 @@
 %
 % \begin{function}[EXP, added = 2015-09-09]{\char_generate:nn}
 %   \begin{syntax}
-%      \cs{char_generate:nn} \meta{charcode} \meta{catcode}
+%      \cs{char_generate:nn} \Arg{charcode} \Arg{catcode}
 %   \end{syntax}
 %   Generates a character token of the given \meta{charcode} and \meta{catcode}
 %   (both of which may be integer expressions). The \meta{catcode} may be
@@ -2994,7 +2994,7 @@
 \cs_generate_variant:Nn \tl_if_head_eq_catcode:nNTF { o }
 %    \end{macrocode}
 % \end{macro}
-% 
+%
 % \begin{macro}[EXP]{\tl_lower_case:n, \tl_upper_case:n, \tl_mixed_case:n}
 % \begin{macro}[EXP]{\tl_lower_case:nn, \tl_upper_case:nn, \tl_mixed_case:nn}
 %   The user level functions here are all wrappers around the internal
@@ -3294,7 +3294,7 @@
 %   a string then examining the first four characters. For Cyrillic, the
 %   fourth character can be used for a second split based on the character
 %   code.
-%   
+%
 %   Note that as this is dependent on \LaTeXe{}, in format mode the code
 %   goes straight to the final phase of handling control sequences.
 %    \begin{macrocode}
@@ -3333,8 +3333,8 @@
 \cs_new:Npn \@@_change_case_cs_cyr:NnNNNNw #1#2#3#4#5#6#7 \q_stop
   {
     \@@_change_case_cs_type:Nnnnn #1
-      { cyrillic } 
-      { 
+      { cyrillic }
+      {
         #2 _
         \int_to_roman:n
           {
@@ -3462,7 +3462,7 @@
 % \end{macro}
 % \end{macro}
 % \end{macro}
-%    
+%
 % \begin{macro}[aux, EXP]{\@@_change_case_lower_sigma:Nnw}
 % \begin{macro}[aux, EXP]{\@@_change_case_lower_sigma:w}
 % \begin{macro}[aux, EXP]{\@@_change_case_lower_sigma:Nw}
@@ -3767,7 +3767,7 @@
 %   to generic routines, but at the cost of making the process rather opaque.
 %   Instead, the approach taken here is to use a dedicated set of functions
 %   which keep the different loop requirements clearly separate.
-%   
+%
 %   The main loop looks for the first \enquote{real} char in the input
 %   (skipping any pre-letter chars). Once one is found, it is case changed to
 %   upper case but first checking that there is not an entry in the exceptions
@@ -3862,7 +3862,7 @@
         \@@_change_case_cs:Nnnn #1 { upper }
           {
             \@@_change_case_loop:wnn
-              #2 \q_recursion_stop { lower } {#3} 
+              #2 \q_recursion_stop { lower } {#3}
           }
           {
             \@@_change_case_cs:N #1
@@ -4016,7 +4016,7 @@
 %</package>
 %    \end{macrocode}
 % \end{variable}
-% 
+%
 % \begin{variable}{\l_tl_mixed_case_ignore_tl}
 %   Characters to skip over when finding the first letter in a word to be
 %   mixed cased.
@@ -4157,7 +4157,7 @@
       \cyrishrt     \CYRISHRT
       \cyrishrtdsc  \CYRISHRTDSC
       \cyrizh       \CYRIZH
-      \cyrje        \CYRJE 
+      \cyrje        \CYRJE
       \cyrk         \CYRK
       \cyrkbeak     \CYRKBEAK
       \cyrkdsc      \CYRKDSC
@@ -4225,7 +4225,7 @@
       \cyrz         \CYRZ
       \cyrzdsc      \CYRZDSC
       \cyrzh        \CYRZH
-      \cyrzhdsc     \CYRZHDSC 
+      \cyrzhdsc     \CYRZHDSC
     }
     { \q_recursion_tail }
   \@@_change_case_setup:nnnn
@@ -4280,7 +4280,7 @@
       \accdialytikatonos       { \exp_stop_f: \LGR at accDialytika }
       \accdialytikavaria       { \exp_stop_f: \LGR at accDialytika }
       \accdialytikaperispomeni { \exp_stop_f: \LGR at accDialytika }
-      \accperispomeni          { \exp_stop_f: \LGR at accdropped }   
+      \accperispomeni          { \exp_stop_f: \LGR at accdropped }
     }
   \tl_const:Nn \c_@@_change_case_acc_lower_tl { }
   \tl_const:Nn \c_@@_change_case_misc_upper_tl
@@ -4378,8 +4378,9 @@
 %    \begin{macrocode}
 \cs_new:Npn \char_generate:nn #1#2
   {
-    \exp_args:Nff \@@_generate_auxi:nn
-      { \int_eval:n {#1} } { \int_eval:n {#2} }
+    \exp:w \exp_after:wN \@@_generate_auxi:ww
+      \int_use:N \__int_eval:w #1 \exp_after:wN ;
+      \int_use:N \__int_eval:w #2 ;
   }
 %    \end{macrocode}
 %   Before doing any actual conversion, first some special case filtering.
@@ -4391,7 +4392,7 @@
 %   done with macro emulation either, so is flagged up separately. That
 %   done, hand off to the engine-dependent part.
 %    \begin{macrocode}
-\cs_new:Npn \@@_generate_auxi:nn #1#2
+\cs_new:Npn \@@_generate_auxi:ww #1 ; #2 ;
   {
     \if_int_compare:w #2 = \c_thirteen
       \__msg_kernel_expandable_error:nn { kernel } { char-active }
@@ -4403,9 +4404,19 @@
           \__msg_kernel_expandable_error:nn { kernel } { char-space }
         \fi:
       \else:
-        \@@_generate_auxii:nn {#1} {#2}
+        \if_int_odd:w 0
+            \if_int_compare:w #2 < \c_one      1 \fi:
+            \if_int_compare:w #2 = \c_five     1 \fi:
+            \if_int_compare:w #2 = \c_nine     1 \fi:
+            \if_int_compare:w #2 > \c_thirteen 1 \fi: \exp_stop_f:
+          \__msg_kernel_expandable_error:nn { kernel }
+            { char-invalid-catcode }
+        \else:
+          \@@_generate_auxii:nn {#1} {#2}
+        \fi:
       \fi:
-    \fi: 
+    \fi:
+    \exp_end:
   }
 \tl_new:N \l_@@_tmp_tl
 %    \end{macrocode}
@@ -4421,39 +4432,36 @@
   \cs_set_nopar:Npn ^^L { }
 %</package>
   \char_set_catcode_other:n { 0 }
-  \if_int_compare:w 0
-    \cs_if_exist:NT \luatex_directlua:D { 1 }
-    \cs_if_exist:NT \utex_charcat:D     { 1 }
-    > \c_zero
+  \if_int_odd:w 0
+      \cs_if_exist:NT \luatex_directlua:D { 1 }
+      \cs_if_exist:NT \utex_charcat:D     { 1 } \exp_stop_f:
     \cs_new:Npn \@@_generate_auxii:nn #1#2
       {
-        \if_int_compare:w 0
-          \if_int_compare:w #2 < \c_one      1 \fi:
-          \if_int_compare:w #2 = \c_five     1 \fi:
-          \if_int_compare:w #2 = \c_nine     1 \fi:
-          \if_int_compare:w #2 > \c_thirteen 1 \fi:
-          > \c_zero
-          \@@_generate_invalid_catcode:
-        \else:
-          \if_int_compare:w 0
+        \if_int_odd:w 0
             \if_int_compare:w #1 < \c_zero   1 \fi:
-            \if_int_compare:w #1 > 1114111 ~ 1 \fi:
-            > \c_one
-            \__msg_kernel_expandable_error:nn { kernel }
-              { char-out-of-range }
-          \else:
-            \@@_generate_auxiii:nn {#1} {#2}
-          \fi:
+            \if_int_compare:w #1 > 1114111 ~ 1 \fi: \exp_stop_f:
+          \__msg_kernel_expandable_error:nn { kernel }
+            { char-out-of-range }
+        \else:
+          \@@_generate_auxiii:nnw {#1} {#2}
         \fi:
       }
     \cs_if_exist:NTF \luatex_directlua:D
       {
-        \cs_new:Npn \@@_generate_auxiii:nn #1#2
-          { \lua_now_x:n { l3kernel.charcat(#1, #2) } }
+        \cs_new:Npn \@@_generate_auxiii:nnw #1#2#3 \exp_end:
+          {
+            #3
+            \exp_after:wN \exp_end:
+            \luatex_directlua:D { l3kernel.charcat(#1, #2) }
+          }
       }
       {
-        \cs_new:Npn \@@_generate_auxiii:nn #1#2
-          { \utex_charcat:D  #1 ~ #2 ~ }
+        \cs_new:Npn \@@_generate_auxiii:nnw #1#2#3 \exp_end:
+          {
+            #3
+            \exp_after:wN \exp_end:
+            \utex_charcat:D  #1 ~ #2 ~
+          }
       }
   \else:
 %    \end{macrocode}
@@ -4464,15 +4472,14 @@
 %   is all done such that it can be quickly accessed using a |\if_case:w|
 %   low-level conditional. There are a few things to notice here.
 %   As |^^L| is |\outer| we need to locally set it to avoid a problem.
-%   To get open/close braces into the list, they are set up using |if_false:|
+%   To get open/close braces into the list, they are set up using |\if_false:|
 %   pairing here and will later be |x|-type expanded into the desired form.
 %   For making spaces, there needs to be an |o|-type expansion of a |\use:n|
 %   (or some other tokenization) to avoid dropping the space. We also
 %   set up active tokens although they are (currently) filtered out by the
-%   interface layer (\tn{Ucharcat}| cannot make active tokens).
+%   interface layer (\tn{Ucharcat} cannot make active tokens).
 %    \begin{macrocode}
-      \tl_set:Nn \l_@@_tmp_tl
-        { \exp_not:n { \@@_generate_invalid_catcode: \or: } }
+      \tl_set:Nn \l_@@_tmp_tl { \exp_not:N \or: }
       \char_set_catcode_group_begin:n { 0 } % {
       \tl_put_right:Nn \l_@@_tmp_tl { ^^@ \if_false: } }
       \char_set_catcode_group_end:n { 0 }
@@ -4482,14 +4489,14 @@
       \tl_put_right:Nn \l_@@_tmp_tl { \or: ^^@ }
       \char_set_catcode_alignment:n { 0 }
       \tl_put_right:Nn \l_@@_tmp_tl { \or: ^^@ }
-      \tl_put_right:Nn \l_@@_tmp_tl { \or: \@@_generate_invalid_catcode: }
+      \tl_put_right:Nn \l_@@_tmp_tl { \or: }
       \char_set_catcode_parameter:n { 0 }
       \tl_put_right:Nn \l_@@_tmp_tl { \or: ^^@ }
       \char_set_catcode_math_superscript:n { 0 }
       \tl_put_right:Nn \l_@@_tmp_tl { \or: ^^@ }
       \char_set_catcode_math_subscript:n { 0 }
       \tl_put_right:Nn \l_@@_tmp_tl { \or: ^^@ }
-      \tl_put_right:Nn \l_@@_tmp_tl { \or: \@@_generate_invalid_catcode: }
+      \tl_put_right:Nn \l_@@_tmp_tl { \or: }
       \char_set_catcode_space:n { 0 }
       \tl_put_right:No \l_@@_tmp_tl { \use:n { \or: } ^^@ }
       \char_set_catcode_letter:n { 0 }
@@ -4498,7 +4505,6 @@
       \tl_put_right:Nn \l_@@_tmp_tl { \or: ^^@ }
       \char_set_catcode_active:n { 0 }
       \tl_put_right:Nn \l_@@_tmp_tl { \or: ^^@ }
-      \tl_put_right:Nn \l_@@_tmp_tl { \else: \@@_generate_invalid_catcode: }
 %    \end{macrocode}
 %   Convert the above temporary list into a series of constant token
 %   lists, one for each character code, using \tn{tex_lowercase:D} to
@@ -4525,25 +4531,28 @@
       \cs_new:Npn \@@_generate_auxii:nn #1#2
         {
           \tl_if_exist:cTF { c_@@_ \__int_to_roman:w #1 _tl }
-            {
-              \exp_after:wN \exp_after:wN
-              \if_case:w #2
-                \exp_last_unbraced:Nv \exp_stop_f:
-                  { c_@@_ \__int_to_roman:w #1 _tl }
-              \fi:
-            }
+            { \@@_generate_auxiii:nnw {#1} {#2} }
             {
               \__msg_kernel_expandable_error:nn { kernel }
                 { char-out-of-range }
             }
         }
+      \cs_new:Npn \@@_generate_auxiii:nnw #1#2#3 \exp_end:
+        {
+          #3
+          \exp_after:wN \exp_after:wN
+          \exp_after:wN \exp_end:
+          \exp_after:wN \exp_after:wN
+          \if_case:w #2
+            \exp_last_unbraced:Nv \exp_stop_f:
+              { c_@@_ \__int_to_roman:w #1 _tl }
+          \fi:
+        }
   \fi:
 \group_end:
 %    \end{macrocode}
-%   Job done, set up a few messages. 
+%   Job done, set up a few messages.
 %    \begin{macrocode}
-\cs_new:Npn \@@_generate_invalid_catcode:
-  { \__msg_kernel_expandable_error:nn { kernel } { char-invalid-catcode } }
 \__msg_kernel_new:nnn { kernel } { char-active }
   { Cannot~generate~active~chars. }
 \__msg_kernel_new:nnn { kernel } { char-invalid-catcode }
diff --git a/l3kernel/testfiles/m3char001.xetex.tlg b/l3kernel/testfiles/m3char001.xetex.tlg
index 0c3a6c6..7182c1c 100644
--- a/l3kernel/testfiles/m3char001.xetex.tlg
+++ b/l3kernel/testfiles/m3char001.xetex.tlg
@@ -326,12 +326,15 @@ l. ...  }
 ============================================================
 TEST 4: Errors
 ============================================================
-! Bad character code (-1).
-\__char_generate_auxiii:nn ...>\utex_charcat:D #1 
-#2 
+! Undefined control sequence.
+<argument> \LaTeX3 error: 
+Charcode requested out of engine range.
 l. ...  }
-A Unicode scalar value must be between 0 and "10FFFF.
-I changed this one to zero.
+The control sequence at the end of the top line
+of your error message was never \def'ed. If you have
+misspelled it (e.g., `\hobx'), type `I' and the correct
+spelling (e.g., `I\hbox'). Otherwise just continue,
+and I'll forget about whatever was undefined.
 ! Undefined control sequence.
 <argument> \LaTeX3 error: 
 Invalid catcode for char generation.

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.