[latex3-commits] [git/LaTeX3-latex3-latex3] l3text: Add \text_purify:n (253291ebd)
Joseph Wright
joseph.wright at morningstar2.co.uk
Fri Dec 6 10:22:48 CET 2019
Repository : https://github.com/latex3/latex3
On branch : l3text
Link : https://github.com/latex3/latex3/commit/253291ebd9663e08bec2c086b0be59ada3e08eaa
>---------------------------------------------------------------
commit 253291ebd9663e08bec2c086b0be59ada3e08eaa
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date: Fri Dec 6 07:29:38 2019 +0000
Add \text_purify:n
This removes *text* formatting: math mode still
to be considered.
>---------------------------------------------------------------
253291ebd9663e08bec2c086b0be59ada3e08eaa
l3kernel/CHANGELOG.md | 1 +
l3kernel/l3text.dtx | 734 ++++++++++++++++++++-
.../{m3text001.tlg => m3text003.luatex.tlg} | 22 +-
l3kernel/testfiles/m3text003.lvt | 62 ++
.../{m3str002.uptex.tlg => m3text003.ptex.tlg} | 27 +-
.../testfiles/{m3text001.tlg => m3text003.tlg} | 22 +-
.../{m3text001.tlg => m3text003.uptex.tlg} | 22 +-
.../{m3text001.tlg => m3text003.xetex.tlg} | 22 +-
8 files changed, 830 insertions(+), 82 deletions(-)
diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index a711ed31f..1bea4ea51 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -13,6 +13,7 @@ this project uses date-based 'snapshot' version identifiers.
- `\str_foldcase:n`
- `\str_lowercase:n`
- `\str_uppercase:n`
+- `\text_purify:n`
- `\text_lowercase:n, `\text_lowercase:nn`
- `\text_uppercase:n, `\text_uppercase:nn`
- `\text_titlecase:n, `\text_titlecase:nn`
diff --git a/l3kernel/l3text.dtx b/l3kernel/l3text.dtx
index 700c9a3d9..ab7513970 100644
--- a/l3kernel/l3text.dtx
+++ b/l3kernel/l3text.dtx
@@ -159,6 +159,33 @@
% \end{itemize}
% \end{function}
%
+% \begin{function}[rEXP, added = 2019-12-05]{\text_purify:n}
+% \begin{syntax}
+% \cs{text_purify:n} \Arg{text}
+% \end{syntax}
+% Takes user input \meta{text} and expands as described for
+% \cs{text_expand:n}, then removes all functions from the resulting
+% text. No processing takes place of math mode material (as delimited by
+% pairs given in \cs{l_text_math_delims_tl} or as the argument to commands
+% listed in \cs{l_text_math_arg_tl}); these tokens and functions are left
+% in-place. Non-expandable functions present in the \meta{text} must either
+% have a defined equivalent (see \cs{text_declare_purify_equivalent:Nn})
+% or will be removed from the result.
+% \end{function}
+%
+% \begin{function}[added = 2019-12-05]
+% {
+% \text_declare_purify_equivalent:Nn ,
+% \text_declare_purify_equivalent:Nx
+% }
+% \begin{syntax}
+% \cs{text_declare_purify_equivalent:Nn} \meta{cmd} \Arg{replacement}
+% \end{syntax}
+% Declares that the \meta{replacement} tokens should be used whenever the
+% \meta{cmd} (a single token) is encountered. The \meta{replacement} tokens
+% should be expandable.
+% \end{function}
+%
% \begin{variable}{\l_text_accents_tl}
% Lists commands which represent accents, and which are left unchanged
% by expansion.
@@ -372,6 +399,29 @@
% \end{macrocode}
% \end{macro}
%
+% \begin{macro}[EXP, TF]{\@@_if_expandable:N}
+% Test for tokens that make sense to expand here: that is more
+% restrictive than the engine view.
+% \begin{macrocode}
+\prg_new_conditional:Npnn \@@_if_expandable:N #1 { T , F , TF }
+ {
+ \token_if_expandable:NTF #1
+ {
+ \bool_lazy_any:nTF
+ {
+ { \token_if_protected_macro_p:N #1 }
+ { \token_if_protected_long_macro_p:N #1 }
+ { \token_if_eq_meaning_p:NN \q_recursion_tail #1 }
+ }
+ { \prg_return_false: }
+ { \prg_return_true: }
+ }
+ { \prg_return_false: }
+ }
+% \end{macrocode}
+% \end{macro}
+%
+%
% \subsection{Configuration variables}
%
% \begin{variable}{\l_text_accents_tl, \l_text_letterlike_tl}
@@ -482,7 +532,6 @@
% \begin{macro}[rEXP]{\@@_expand_protect:nN}
% \begin{macro}[rEXP]{\@@_expand_protect:Nw}
% \begin{macro}[rEXP]{\@@_expand_cs_expand:N}
-% \begin{macro}[rEXP]{\@@_expand_if_expand:NTF}
% After precautions against |&| tokens, start a simple loop: that of
% course means that \enquote{text} cannot contain the two recursion
% quarks.
@@ -772,28 +821,13 @@
% \begin{macrocode}
\cs_new:Npn \@@_expand_cs_expand:N #1
{
- \@@_expand_if_expand:NTF #1
+ \@@_if_expandable:NTF #1
{ \exp_after:wN \@@_expand_loop:w #1 }
{
\exp_not:n {#1}
\@@_expand_loop:w
}
}
-\cs_new:Npn \@@_expand_if_expand:NTF #1
- {
- \token_if_expandable:NTF #1
- {
- \bool_lazy_any:nTF
- {
- { \token_if_protected_macro_p:N #1 }
- { \token_if_protected_long_macro_p:N #1 }
- { \token_if_eq_meaning_p:NN \q_recursion_tail #1 }
- }
- { \use_ii:nn }
- { \use_i:nn }
- }
- { \use_ii:nn }
- }
% \end{macrocode}
% \end{macro}
% \end{macro}
@@ -815,7 +849,6 @@
% \end{macro}
% \end{macro}
% \end{macro}
-% \end{macro}
%
% \subsection{Case changing}
%
@@ -1806,7 +1839,7 @@
% \end{macrocode}
% \end{macro}
%
-% \subsection{Case changing data}
+% \subsection{Case changing data for $8$-bit engines}
%
% \begin{variable}
% {
@@ -2064,6 +2097,669 @@
\group_end:
% \end{macrocode}
%
+% \subsection{Purifying text}
+%
+% \begin{macro}[rEXP]{\text_purify:n, \@@_purify:n}
+% \begin{macro}[rEXP]{\@@_purify_loop:w}
+% \begin{macro}[rEXP]{\@@_purify_N_type:N}
+% \begin{macro}[rEXP]{\@@_purify_group:n}
+% \begin{macro}[rEXP]{\@@_purify_space:w}
+% As in the other parts of the module, we start off with a standard
+% \enquote{action} loop, with expansion applied up-front.
+% \begin{macrocode}
+\cs_new:Npn \text_purify:n #1
+ {
+ \group_align_safe_begin:
+ \exp_args:Ne \@@_purify:n
+ { \text_expand:n {#1} }
+ \group_align_safe_end:
+ }
+\cs_new:Npn \@@_purify:n #1
+ { \@@_purify_loop:w #1 \q_recursion_tail \q_recursion_stop }
+% \end{macrocode}
+% The main loop is a standard \enquote{tl action}. Unlike the expansion
+% or case changing, here any groups have to be run inline. Most of the
+% business end is as before in the \texttt{N}-type token processing.
+% \begin{macrocode}
+\cs_new:Npn \@@_purify_loop:w #1 \q_recursion_stop
+ {
+ \tl_if_head_is_N_type:nTF {#1}
+ { \@@_purify_N_type:N }
+ {
+ \tl_if_head_is_group:nTF {#1}
+ { \@@_purify_group:n }
+ { \@@_purify_space:w }
+ }
+ #1 \q_recursion_stop
+ }
+\cs_new:Npn \@@_purify_group:n #1 { \@@_purify_loop:w #1 }
+\exp_last_unbraced:NNo \cs_new:Npn \@@_purify_space:w \c_space_tl
+ {
+ \c_space_tl
+ \@@_purify_loop:w
+ }
+% \end{macrocode}
+% The first part of handling math mode is exactly the same as in the
+% other functions: look for a start-of-math mode token and if found start
+% a new loop tracking the closing token.
+% \begin{macrocode}
+\cs_new:Npn \@@_purify_N_type:N #1
+ {
+ \quark_if_recursion_tail_stop:N #1
+ \@@_purify_N_type_aux:N #1
+ }
+\cs_new:Npn \@@_purify_N_type_aux:N #1
+ {
+ \exp_after:wN \@@_purify_math_search:NNN
+ \exp_after:wN #1 \l_text_math_delims_tl
+ \q_recursion_tail ?
+ \q_recursion_stop
+ }
+\cs_new:Npn \@@_purify_math_search:NNN #1#2#3
+ {
+ \quark_if_recursion_tail_stop_do:Nn #2
+ { \@@_purify_math_cmd:N #1 }
+ \token_if_eq_meaning:NNTF #1 #2
+ {
+ \use_i_delimit_by_q_recursion_stop:nw
+ {
+ \exp_not:n {#1}
+ \@@_purify_math_loop:Nw #3
+ }
+ }
+ { \@@_purify_math_search:NNN #1 }
+ }
+\cs_new:Npn \@@_purify_math_loop:Nw #1#2 \q_recursion_stop
+ {
+ \tl_if_head_is_N_type:nTF {#2}
+ { \@@_purify_math_N_type:NN }
+ {
+ \tl_if_head_is_group:nTF {#2}
+ { \@@_purify_math_group:Nn }
+ { \@@_purify_math_space:Nw }
+ }
+ #1#2 \q_recursion_stop
+ }
+\cs_new:Npn \@@_purify_math_N_type:NN #1#2
+ {
+ \quark_if_recursion_tail_stop:N #2
+ \exp_not:n {#2}
+ \token_if_eq_meaning:NNTF #2 #1
+ { \@@_purify_loop:w }
+ { \@@_purify_math_loop:Nw #1 }
+ }
+\cs_new:Npn \@@_purify_math_group:Nn #1#2
+ {
+ { \exp_not:n {#2} }
+ \@@_purify_math_loop:Nw #1
+ }
+\exp_after:wN \cs_new:Npn \exp_after:wN \@@_purify_math_space:Nw
+ \exp_after:wN # \exp_after:wN 1 \c_space_tl
+ {
+ \c_space_tl
+ \@@_purify_math_loop:Nw #1
+ }
+% \end{macrocode}
+% Then handle math mode as an argument: same outcomes, different input
+% syntax.
+% \begin{macrocode}
+\cs_new:Npn \@@_purify_math_cmd:N #1
+ {
+ \exp_after:wN \@@_purify_math_cmd:NN \exp_after:wN #1
+ \l_text_math_arg_tl \q_recursion_tail \q_recursion_stop
+ }
+\cs_new:Npn \@@_purify_math_cmd:NN #1#2
+ {
+ \quark_if_recursion_tail_stop_do:Nn #2
+ { \@@_purify_replace:N #1 }
+ \cs_if_eq:NNTF #2 #1
+ {
+ \use_i_delimit_by_q_recursion_stop:nw
+ { \@@_purify_math_cmd:Nn #1 }
+ }
+ { \@@_purify_math_cmd:NN #1 }
+ }
+\cs_new:Npn \@@_purify_math_cmd:Nn #1#2
+ {
+ \exp_not:n { #1 {#2} }
+ \@@_purify_loop:w
+ }
+% \end{macrocode}
+% For \texttt{N}-type tokens, we first look for a string-context replacement
+% before anything else: this can therefore cover anything. Assuming we don't
+% find one, check to see if we can expand control sequences: if not, they have
+% to be dropped. We also allow for \LaTeXe{} \tn{protect}: there's an
+% assumption that we don't have |\protect { \oops }| or similar, but that's
+% also in the expansion code and seems like a reasonable balance.
+% \begin{macrocode}
+\cs_new:Npn \@@_purify_replace:N #1
+ {
+ \cs_if_exist:cTF { l_@@_purify_ \token_to_str:N #1 _tl }
+ {
+ \exp_args:Nv \@@_purify_replace:n
+ { l_@@_purify_ \token_to_str:N #1 _tl }
+ }
+ {
+ \token_if_cs:NTF #1
+ { \@@_purify_expand:N #1 }
+ {
+ \exp_not:n {#1}
+ \@@_purify_loop:w
+ }
+ }
+ }
+\cs_new:Npn \@@_purify_replace:n #1 { \@@_purify_loop:w #1 }
+\cs_new:Npn \@@_purify_expand:N #1
+ {
+ \str_if_eq:nnTF {#1} { \protect }
+ { \@@_purify_protect:N }
+ {
+ \@@_if_expandable:NTF #1
+ { \exp_after:wN \@@_purify_loop:w #1 }
+ { \@@_purify_loop:w }
+ }
+ }
+\cs_new:Npn \@@_purify_protect:N #1
+ {
+ \quark_if_recursion_tail_stop:N #1
+ \@@_purify_loop:w
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
+% \begin{macro}
+% {
+% \text_declare_purify_equivalent:Nn,
+% \text_declare_purify_equivalent:Nx
+% }
+% \begin{macrocode}
+\cs_new_protected:Npn \text_declare_purify_equivalent:Nn #1#2
+ {
+ \tl_clear_new:c { l_@@_purify_ \token_to_str:N #1 _tl }
+ \tl_set:cn { l_@@_purify_ \token_to_str:N #1 _tl } {#2}
+ }
+\cs_generate_variant:Nn \text_declare_purify_equivalent:Nn { Nx }
+% \end{macrocode}
+% \end{macro}
+%
+% Now pre-define a range of standard commands that need dedicated definitions
+% in purified text. First handle font-related stuff: all of this needs to be
+% disabled.
+% \begin{macrocode}
+\tl_map_inline:nn
+ {
+ \fontencoding
+ \fontfamily
+ \fontseries
+ \fontshape
+ }
+ { \text_declare_purify_equivalent:Nn #1 { \use_none:n } }
+\text_declare_purify_equivalent:Nn \fontsize { \use_none:nn }
+\text_declare_purify_equivalent:Nn \selectfont { }
+\text_declare_purify_equivalent:Nn \usefont { \use_none:nnnn }
+\tl_map_inline:nn
+ {
+ \emph
+ \text
+ \textnormal
+ \textrm
+ \textsf
+ \texttt
+ \textbf
+ \textmd
+ \textit
+ \textsl
+ \textup
+ \textsc
+ \textulc
+ }
+ { \text_declare_purify_equivalent:Nn #1 { \use:n } }
+\tl_map_inline:nn
+ {
+ \normalfont
+ \rmfamily
+ \sffamily
+ \ttfamily
+ \bfseries
+ \mdseries
+ \itshape
+ \scshape
+ \slshape
+ \upshape
+ \em
+ \Huge
+ \LARGE
+ \Large
+ \footnotesize
+ \huge
+ \large
+ \normalsize
+ \scriptsize
+ \small
+ \tiny
+ }
+ { \text_declare_purify_equivalent:Nn #1 { } }
+% \end{macrocode}
+% Environments have to be handled by pure expansion.
+% \begin{macrocode}
+\text_declare_purify_equivalent:Nn \begin { \use:c }
+\text_declare_purify_equivalent:Nn \end { \use:c }
+% \end{macrocode}
+% Some common symbols and similar ideas.
+% \begin{macrocode}
+\text_declare_purify_equivalent:Nn \\ { }
+\tl_map_inline:nn
+ { \{ \} \# \$ \% \_ }
+ { \text_declare_purify_equivalent:Nx #1 { \cs_to_str:N #1 } }
+% \end{macrocode}
+% Cross-referencing.
+% \begin{macrocode}
+\text_declare_purify_equivalent:Nn \label { \use_none:n }
+% \end{macrocode}
+%
+% \subsection{Accent and letter-like data for purifying text}
+%
+% In contrast to case changing, both $8$-bit and Unicode engines need
+% information for text purification to handle accents and letter-like
+% functions: these all need to be removed. However, the results are
+% of course engine-dependent.
+%
+% For the letter-like commands, life is relatively easy: they are all
+% simply added as standard exceptions. The only oddity is \tn{SS}, which
+% gets converted to two letters. (At some stage an alternative version
+% can presumably be added to \pkg{babel} or similar.)
+% \begin{macrocode}
+\bool_lazy_or:nnTF
+ { \sys_if_engine_luatex_p: }
+ { \sys_if_engine_xetex_p: }
+ {
+ \cs_set_protected:Npn \@@_loop:Nn #1#2
+ {
+ \quark_if_recursion_tail_stop:N #1
+ \text_declare_purify_equivalent:Nx #1
+ {
+ \char_generate:nn { "#2 }
+ { \char_value_catcode:n { "#2 } }
+ }
+ \@@_loop:Nn
+ }
+ }
+ {
+ \cs_set_protected:Npn \@@_loop:Nn #1#2
+ {
+ \quark_if_recursion_tail_stop:N #1
+ \text_declare_purify_equivalent:Nx #1
+ {
+ \exp_args:Ne \@@_tmp:n
+ { \char_codepoint_to_bytes:n { "#2 } }
+ }
+ \@@_loop:Nn
+ }
+ \cs_set:Npn \@@_tmp:n #1 { \@@_tmp:nnnn #1 }
+ \cs_set:Npn \@@_tmp:nnnn #1#2#3#4
+ {
+ \exp_after:wN \exp_after:wN \exp_after:wN
+ \exp_not:N \char_generate:nn {#1} { 13 }
+ \exp_after:wN \exp_after:wN \exp_after:wN
+ \exp_not:N \char_generate:nn {#2} { 13 }
+ }
+ }
+\@@_loop:Nn
+ \AA { 00C5 }
+ \AE { 00C6 }
+ \DH { 00D0 }
+ \DJ { 0110 }
+ \IJ { 0132 }
+ \L { 0141 }
+ \NG { 014A }
+ \O { 00D8 }
+ \OE { 0152 }
+ \TH { 00DE }
+ \aa { 00E5 }
+ \ae { 00E6 }
+ \dh { 00F0 }
+ \dj { 0111 }
+ \i { 0131 }
+ \j { 0237 }
+ \ij { 0132 }
+ \l { 0142 }
+ \ng { 014B }
+ \o { 00F8 }
+ \oe { 0153 }
+ \ss { 00DF }
+ \th { 00FE }
+ \q_recursion_tail ?
+ \q_recursion_stop
+\text_declare_purify_equivalent:Nn \SS { SS }
+% \end{macrocode}
+%
+% \begin{macro}[rEXP]{\@@_purify_accent:NN}
+% Accent \textsc{licr} handling is a little more complex. Accents may exist
+% as pre-composed codepoints or as independent glyphs. The former are all
+% saved as single token lists, whilst for the latter the combining accent
+% needs to be re-ordered compared to the character it applies to.
+% \begin{macrocode}
+\cs_new:Npn \@@_purify_accent:NN #1#2
+ {
+ \cs_if_exist:cTF
+ { c_@@_purify_ \token_to_str:N #1 _ \token_to_str:N #2 _tl }
+ {
+ \exp_not:v
+ { c_@@_purify_ \token_to_str:N #1 _ \token_to_str:N #2 _tl }
+ }
+ {
+ \exp_not:n {#2}
+ \exp_not:v { c_@@_purify_ \token_to_str:N #1 _tl }
+ }
+ }
+\tl_map_inline:Nn \l_text_accents_tl
+ { \text_declare_purify_equivalent:Nn #1 { \@@_purify_accent:NN #1 } }
+% \end{macrocode}
+% First set up the combining accents.
+% \begin{macrocode}
+\group_begin:
+ \cs_set_protected:Npn \@@_loop:Nn #1#2
+ {
+ \quark_if_recursion_tail_stop:N #1
+ \tl_const:cx { c_@@_purify_ \token_to_str:N #1 _tl }
+ { \@@_tmp:n {#2} }
+ \@@_loop:Nn
+ }
+ \bool_lazy_or:nnTF
+ { \sys_if_engine_luatex_p: }
+ { \sys_if_engine_xetex_p: }
+ {
+ \cs_set:Npn \@@_tmp:n #1
+ {
+ \char_generate:nn { "#1 }
+ { \char_value_catcode:n { "#1 } }
+ }
+ }
+ {
+ \cs_set:Npn \@@_tmp:n #1
+ {
+ \exp_args:Ne \@@_tmp_aux:n
+ { \char_codepoint_to_bytes:n { "#1 } }
+ }
+ \cs_set:Npn \@@_tmp_aux:n #1 { \@@_tmp:nnnn #1 }
+ \cs_set:Npn \@@_tmp:nnnn #1#2#3#4
+ {
+ \exp_after:wN \exp_after:wN \exp_after:wN
+ \exp_not:N \char_generate:nn {#1} { 13 }
+ \exp_after:wN \exp_after:wN \exp_after:wN
+ \exp_not:N \char_generate:nn {#2} { 13 }
+ }
+ }
+ \@@_loop:Nn
+ \` { 0300 }
+ \' { 0301 }
+ \^ { 0302 }
+ \~ { 0303 }
+ \= { 0304 }
+ \u { 0306 }
+ \U { 0306 }
+ \. { 0307 }
+ \" { 0308 }
+ \r { 030A }
+ \H { 030B }
+ \v { 030C }
+ \G { 030F }
+ \C { 030F }
+ \d { 0323 }
+ \c { 0327 }
+ \k { 0328 }
+ \b { 0331 }
+ \t { 0361 }
+ \q_recursion_tail ?
+ \q_recursion_stop
+% \end{macrocode}
+% Now we handle the pre-composed accents: the list here is taken from
+% \texttt{puenc.def}. All of the precomposed cases take a single letter
+% as their second argument. We do not try to cover the case where an accent
+% is added to a \enquote{real} dotless-i or -j, or a \ae/\AE. Rather, we
+% assume that if the \textsc{utf}-8 character is used, it will have the
+% real accent character too.
+% \begin{macrocode}
+ \cs_set_protected:Npn \@@_loop:NNn #1#2#3
+ {
+ \quark_if_recursion_tail_stop:N #1
+ \tl_const:cx
+ { c_@@_purify_ \token_to_str:N #1 _ \token_to_str:N #2 _tl }
+ { \@@_tmp:n {#3} }
+ \@@_loop:NNn
+ }
+ \@@_loop:NNn
+ \` A { 00C0 }
+ \' A { 00C1 }
+ \^ A { 00C2 }
+ \~ A { 00C3 }
+ \" A { 00C4 }
+ \r A { 00C5 }
+ \c C { 00C7 }
+ \` E { 00C8 }
+ \' E { 00C9 }
+ \^ E { 00CA }
+ \" E { 00CB }
+ \` I { 00CC }
+ \' I { 00CD }
+ \^ I { 00CE }
+ \" I { 00CF }
+ \~ N { 00D1 }
+ \` O { 00D2 }
+ \' O { 00D3 }
+ \^ O { 00D4 }
+ \~ O { 00D5 }
+ \" O { 00D6 }
+ \` U { 00D9 }
+ \' U { 00DA }
+ \^ U { 00DB }
+ \" U { 00DC }
+ \' Y { 00DD }
+ \` a { 00E0 }
+ \' a { 00E1 }
+ \^ a { 00E2 }
+ \~ a { 00E3 }
+ \" a { 00E4 }
+ \r a { 00E5 }
+ \c c { 00E7 }
+ \` e { 00E8 }
+ \' e { 00E9 }
+ \^ e { 00EA }
+ \" e { 00EB }
+ \` i { 00EC }
+ \` \i { 00EC }
+ \' i { 00ED }
+ \' \i { 00ED }
+ \^ i { 00EE }
+ \^ \i { 00EE }
+ \" i { 00EF }
+ \" \i { 00EF }
+ \~ n { 00F1 }
+ \` o { 00F2 }
+ \' o { 00F3 }
+ \^ o { 00F4 }
+ \~ o { 00F5 }
+ \" o { 00F6 }
+ \` u { 00F9 }
+ \' u { 00FA }
+ \^ u { 00FB }
+ \" u { 00FC }
+ \' y { 00FD }
+ \" y { 00FF }
+ \= A { 0100 }
+ \= a { 0101 }
+ \u A { 0102 }
+ \u a { 0103 }
+ \k A { 0104 }
+ \k a { 0105 }
+ \' C { 0106 }
+ \' c { 0107 }
+ \^ C { 0108 }
+ \^ c { 0109 }
+ \. C { 010A }
+ \. c { 010B }
+ \v C { 010C }
+ \v c { 010D }
+ \v D { 010E }
+ \v d { 010F }
+ \= E { 0112 }
+ \= e { 0113 }
+ \u E { 0114 }
+ \u e { 0115 }
+ \. E { 0116 }
+ \. e { 0117 }
+ \k E { 0118 }
+ \k e { 0119 }
+ \v E { 011A }
+ \v e { 011B }
+ \^ G { 011C }
+ \^ g { 011D }
+ \u G { 011E }
+ \u g { 011F }
+ \. G { 0120 }
+ \. g { 0121 }
+ \c G { 0122 }
+ \c g { 0123 }
+ \^ H { 0124 }
+ \^ h { 0125 }
+ \~ I { 0128 }
+ \~ i { 0129 }
+ \~ \i { 0129 }
+ \= I { 012A }
+ \= i { 012B }
+ \= \i { 012B }
+ \u I { 012C }
+ \u i { 012D }
+ \u \i { 012D }
+ \k I { 012E }
+ \k i { 012F }
+ \k \i { 012F }
+ \. I { 0130 }
+ \^ J { 0134 }
+ \^ j { 0135 }
+ \^ \j { 0135 }
+ \c K { 0136 }
+ \c k { 0137 }
+ \' L { 0139 }
+ \' l { 013A }
+ \c L { 013B }
+ \c l { 013C }
+ \v L { 013D }
+ \v l { 013E }
+ \. L { 013F }
+ \. l { 0140 }
+ \' N { 0143 }
+ \' n { 0144 }
+ \c N { 0145 }
+ \c n { 0146 }
+ \v N { 0147 }
+ \v n { 0148 }
+ \= O { 014C }
+ \= o { 014D }
+ \u O { 014E }
+ \u o { 014F }
+ \H O { 0150 }
+ \H o { 0151 }
+ \' R { 0154 }
+ \' r { 0155 }
+ \c R { 0156 }
+ \c r { 0157 }
+ \v R { 0158 }
+ \v r { 0159 }
+ \' S { 015A }
+ \' s { 015B }
+ \^ S { 015C }
+ \^ s { 015D }
+ \c S { 015E }
+ \c s { 015F }
+ \v S { 0160 }
+ \v s { 0161 }
+ \c T { 0162 }
+ \c t { 0163 }
+ \v T { 0164 }
+ \v t { 0165 }
+ \~ U { 0168 }
+ \~ u { 0169 }
+ \= U { 016A }
+ \= u { 016B }
+ \u U { 016C }
+ \u u { 016D }
+ \r U { 016E }
+ \r u { 016F }
+ \H U { 0170 }
+ \H u { 0171 }
+ \k U { 0172 }
+ \k u { 0173 }
+ \^ W { 0174 }
+ \^ w { 0175 }
+ \^ Y { 0176 }
+ \^ y { 0177 }
+ \" Y { 0178 }
+ \' Z { 0179 }
+ \' z { 017A }
+ \. Z { 017B }
+ \. z { 017C }
+ \v Z { 017D }
+ \v z { 017E }
+ \v A { 01CD }
+ \v a { 01CE }
+ \v I { 01CF }
+ \v \i { 01D0 }
+ \v i { 01D0 }
+ \v O { 01D1 }
+ \v o { 01D2 }
+ \v U { 01D3 }
+ \v u { 01D4 }
+ \v G { 01E6 }
+ \v g { 01E7 }
+ \v K { 01E8 }
+ \v k { 01E9 }
+ \k O { 01EA }
+ \k o { 01EB }
+ \v \j { 01F0 }
+ \v j { 01F0 }
+ \' G { 01F4 }
+ \' g { 01F5 }
+ \` N { 01F8 }
+ \` n { 01F9 }
+ \' \AE { 01FC }
+ \' \ae { 01FD }
+ \' \O { 01FE }
+ \' \o { 01FF }
+ \G A { 0200 }
+ \G a { 0201 }
+ \G E { 0204 }
+ \G e { 0205 }
+ \G I { 0208 }
+ \G \i { 0209 }
+ \G i { 0209 }
+ \G O { 020C }
+ \G o { 020D }
+ \G R { 0210 }
+ \G r { 0211 }
+ \G U { 0214 }
+ \G u { 0215 }
+ \v H { 021E }
+ \v h { 021F }
+ \. A { 0226 }
+ \. a { 0227 }
+ \c E { 0228 }
+ \c e { 0229 }
+ \. O { 022E }
+ \. o { 022F }
+ \= Y { 0232 }
+ \= y { 0233 }
+ \q_recursion_tail ? ?
+ \q_recursion_stop
+% \end{macrocode}
+\group_end:
+% \end{macrocode}
+% \end{macro}
+%
% \begin{macrocode}
%</initex|package>
% \end{macrocode}
diff --git a/l3kernel/testfiles/m3text001.tlg b/l3kernel/testfiles/m3text003.luatex.tlg
similarity index 82%
copy from l3kernel/testfiles/m3text001.tlg
copy to l3kernel/testfiles/m3text003.luatex.tlg
index 66b9aba64..165d69b57 100644
--- a/l3kernel/testfiles/m3text001.tlg
+++ b/l3kernel/testfiles/m3text003.luatex.tlg
@@ -2,39 +2,37 @@ This is a generated file for the LaTeX (2e + expl3) validation system.
Don't change this file in any respect.
Author: Joseph Wright
============================================================
-TEST 1: Expanding content
+TEST 1: Purify content
============================================================
Some text Hello
Hello sometext
-Some text Hello
-Hello sometext
-Some text \cs_tmp:w
-\cs_tmp:w sometext
============================================================
============================================================
-TEST 2: Expansion in braces
+TEST 2: Purify in braces
============================================================
-{Hello} world \par with \ERROR &##
+Hello world with &##
============================================================
============================================================
-TEST 3: Expansion exclusions
+TEST 3: Purification exclusions
============================================================
-FOO \cite {\l_tmpa_tl } {BAZ}
-\emph {BAR} {BAZ}
+FOO Hello BAZ
+ BAZ
============================================================
============================================================
TEST 4: Math-mode escape
============================================================
Some text $y = \sin \theta $
Opps not close token in $y = \sin \theta
+\ensuremath {y=mx+c} is an equation
============================================================
============================================================
TEST 5: Letter-like commands
============================================================
-\AA \aa \J \ae \dh \ss \l \O
+^^c5^^e5^^e6^^f0^^dfł^^d8
============================================================
============================================================
TEST 6: Accents
============================================================
-\"{a}\u {e}\H {i}\v {o}\.{u}
+^^e4ĕi̋ǒu̇
+^^f1r̨
============================================================
diff --git a/l3kernel/testfiles/m3text003.lvt b/l3kernel/testfiles/m3text003.lvt
new file mode 100644
index 000000000..c1031f295
--- /dev/null
+++ b/l3kernel/testfiles/m3text003.lvt
@@ -0,0 +1,62 @@
+%
+% Copyright (C) 2019 LaTeX3 Project
+%
+\input{regression-test}
+
+\RequirePackage[enable-debug]{expl3}
+\ExplSyntaxOn
+\debug_on:n { check-declarations , deprecation , log-functions }
+\ExplSyntaxOff
+
+\START
+\AUTHOR{Joseph Wright}
+\ExplSyntaxOn
+
+\OMIT
+ \tl_set:Nn \l_tmpa_tl { Hello }
+ \tl_set:Nn \l_tmpb_tl { \l_tmpa_tl }
+ \cs_set_protected:Npn \cs_tmp:w { \l_tmpa_tl }
+\TIMO
+
+\TESTEXP { Purify~content }
+ {
+ \text_purify:n { Some~\emph{text}~\l_tmpa_tl }
+ \NEWLINE
+ \text_purify:n { \l_tmpa_tl \c_space_tl some { \bfseries text } }
+ }
+
+\TESTEXP { Purify~in~braces }
+ {
+ \text_purify:n { { \emph { \l_tmpa_tl } }~world~\par with~\ERROR & # }
+ }
+
+\TESTEXP { Purification~exclusions }
+ {
+ \text_purify:n { FOO~\cite { \l_tmpa_tl } ~ { BAZ } }
+ \NEWLINE
+ \text_purify:n { \label { BAR } ~ { BAZ } }
+ }
+
+\TESTEXP { Math-mode~escape }
+ {
+ \text_purify:n { Some~text~$y~=~\sin \theta$ }
+ \NEWLINE
+ \text_purify:n { Opps~not~close~token~in~$y~=~\sin \theta }
+ \NEWLINE
+ \text_purify:n { \ensuremath { y = mx + c }~is~an~equation }
+ }
+
+\sys_if_engine_ptex:T { \END }
+
+\TESTEXP { Letter-like~commands }
+ {
+ \text_purify:n { \AA \aa \J \ae \dh \ss \l \O }
+ }
+
+\TESTEXP { Accents }
+ {
+ \text_purify:n { \"{a} \u{e} \H{i} \v{o} \.{u} } \NEWLINE
+ \text_purify:n { \~{n} \k{r} }
+ }
+
+\END
diff --git a/l3kernel/testfiles/m3str002.uptex.tlg b/l3kernel/testfiles/m3text003.ptex.tlg
similarity index 74%
copy from l3kernel/testfiles/m3str002.uptex.tlg
copy to l3kernel/testfiles/m3text003.ptex.tlg
index 221de224e..f3efcb6c4 100644
--- a/l3kernel/testfiles/m3str002.uptex.tlg
+++ b/l3kernel/testfiles/m3text003.ptex.tlg
@@ -2,29 +2,26 @@ This is a generated file for the LaTeX (2e + expl3) validation system.
Don't change this file in any respect.
Author: Joseph Wright
============================================================
-TEST 1: Simple Latin case folding
+TEST 1: Purify content
============================================================
-"abc 123 abc !@"
-" abc 123 abc !@ "
-"some $&## odd text { } "
+Some text Hello
+Hello sometext
============================================================
============================================================
-TEST 2: Checking category codes
+TEST 2: Purify in braces
============================================================
-FALSE
-TRUE
+Hello world with &##
============================================================
============================================================
-TEST 3: Accented characters, etc.
+TEST 3: Purification exclusions
============================================================
-"ĆėƊṐṑ"
-"ᾪωΝ"
-"ΰῢst"
-"Ꚉꚇ"
-"ZꝎⓍ"
+FOO Hello BAZ
+ BAZ
============================================================
============================================================
-TEST 4: Characters with context-sensitive Unicode behaviour
+TEST 4: Math-mode escape
============================================================
-FALSE
+Some text $y = \sin \theta $
+Opps not close token in $y = \sin \theta
+\ensuremath {y=mx+c} is an equation
============================================================
diff --git a/l3kernel/testfiles/m3text001.tlg b/l3kernel/testfiles/m3text003.tlg
similarity index 82%
copy from l3kernel/testfiles/m3text001.tlg
copy to l3kernel/testfiles/m3text003.tlg
index 66b9aba64..509f3fd2d 100644
--- a/l3kernel/testfiles/m3text001.tlg
+++ b/l3kernel/testfiles/m3text003.tlg
@@ -2,39 +2,37 @@ This is a generated file for the LaTeX (2e + expl3) validation system.
Don't change this file in any respect.
Author: Joseph Wright
============================================================
-TEST 1: Expanding content
+TEST 1: Purify content
============================================================
Some text Hello
Hello sometext
-Some text Hello
-Hello sometext
-Some text \cs_tmp:w
-\cs_tmp:w sometext
============================================================
============================================================
-TEST 2: Expansion in braces
+TEST 2: Purify in braces
============================================================
-{Hello} world \par with \ERROR &##
+Hello world with &##
============================================================
============================================================
-TEST 3: Expansion exclusions
+TEST 3: Purification exclusions
============================================================
-FOO \cite {\l_tmpa_tl } {BAZ}
-\emph {BAR} {BAZ}
+FOO Hello BAZ
+ BAZ
============================================================
============================================================
TEST 4: Math-mode escape
============================================================
Some text $y = \sin \theta $
Opps not close token in $y = \sin \theta
+\ensuremath {y=mx+c} is an equation
============================================================
============================================================
TEST 5: Letter-like commands
============================================================
-\AA \aa \J \ae \dh \ss \l \O
+^^c3^^85^^c3^^a5^^c3^^a6^^c3^^b0^^c3^^9f^^c5^^82^^c3^^98
============================================================
============================================================
TEST 6: Accents
============================================================
-\"{a}\u {e}\H {i}\v {o}\.{u}
+^^c3^^a4^^c4^^95i^^cc^^8b^^c7^^92u^^cc^^87
+^^c3^^b1r^^cc^^a8
============================================================
diff --git a/l3kernel/testfiles/m3text001.tlg b/l3kernel/testfiles/m3text003.uptex.tlg
similarity index 82%
copy from l3kernel/testfiles/m3text001.tlg
copy to l3kernel/testfiles/m3text003.uptex.tlg
index 66b9aba64..165d69b57 100644
--- a/l3kernel/testfiles/m3text001.tlg
+++ b/l3kernel/testfiles/m3text003.uptex.tlg
@@ -2,39 +2,37 @@ This is a generated file for the LaTeX (2e + expl3) validation system.
Don't change this file in any respect.
Author: Joseph Wright
============================================================
-TEST 1: Expanding content
+TEST 1: Purify content
============================================================
Some text Hello
Hello sometext
-Some text Hello
-Hello sometext
-Some text \cs_tmp:w
-\cs_tmp:w sometext
============================================================
============================================================
-TEST 2: Expansion in braces
+TEST 2: Purify in braces
============================================================
-{Hello} world \par with \ERROR &##
+Hello world with &##
============================================================
============================================================
-TEST 3: Expansion exclusions
+TEST 3: Purification exclusions
============================================================
-FOO \cite {\l_tmpa_tl } {BAZ}
-\emph {BAR} {BAZ}
+FOO Hello BAZ
+ BAZ
============================================================
============================================================
TEST 4: Math-mode escape
============================================================
Some text $y = \sin \theta $
Opps not close token in $y = \sin \theta
+\ensuremath {y=mx+c} is an equation
============================================================
============================================================
TEST 5: Letter-like commands
============================================================
-\AA \aa \J \ae \dh \ss \l \O
+^^c5^^e5^^e6^^f0^^dfł^^d8
============================================================
============================================================
TEST 6: Accents
============================================================
-\"{a}\u {e}\H {i}\v {o}\.{u}
+^^e4ĕi̋ǒu̇
+^^f1r̨
============================================================
diff --git a/l3kernel/testfiles/m3text001.tlg b/l3kernel/testfiles/m3text003.xetex.tlg
similarity index 82%
copy from l3kernel/testfiles/m3text001.tlg
copy to l3kernel/testfiles/m3text003.xetex.tlg
index 66b9aba64..165d69b57 100644
--- a/l3kernel/testfiles/m3text001.tlg
+++ b/l3kernel/testfiles/m3text003.xetex.tlg
@@ -2,39 +2,37 @@ This is a generated file for the LaTeX (2e + expl3) validation system.
Don't change this file in any respect.
Author: Joseph Wright
============================================================
-TEST 1: Expanding content
+TEST 1: Purify content
============================================================
Some text Hello
Hello sometext
-Some text Hello
-Hello sometext
-Some text \cs_tmp:w
-\cs_tmp:w sometext
============================================================
============================================================
-TEST 2: Expansion in braces
+TEST 2: Purify in braces
============================================================
-{Hello} world \par with \ERROR &##
+Hello world with &##
============================================================
============================================================
-TEST 3: Expansion exclusions
+TEST 3: Purification exclusions
============================================================
-FOO \cite {\l_tmpa_tl } {BAZ}
-\emph {BAR} {BAZ}
+FOO Hello BAZ
+ BAZ
============================================================
============================================================
TEST 4: Math-mode escape
============================================================
Some text $y = \sin \theta $
Opps not close token in $y = \sin \theta
+\ensuremath {y=mx+c} is an equation
============================================================
============================================================
TEST 5: Letter-like commands
============================================================
-\AA \aa \J \ae \dh \ss \l \O
+^^c5^^e5^^e6^^f0^^dfł^^d8
============================================================
============================================================
TEST 6: Accents
============================================================
-\"{a}\u {e}\H {i}\v {o}\.{u}
+^^e4ĕi̋ǒu̇
+^^f1r̨
============================================================
More information about the latex3-commits
mailing list