[latex3-commits] [git/LaTeX3-latex3-latex3] filename-sanitize: \csname-based filename sanitisation (ca4cabd4e)
PhelypeOleinik
phelype.oleinik at latex-project.org
Sat Apr 17 23:18:13 CEST 2021
Repository : https://github.com/latex3/latex3
On branch : filename-sanitize
Link : https://github.com/latex3/latex3/commit/ca4cabd4eb1ec3232731d7de68e3e715c00a82a5
>---------------------------------------------------------------
commit ca4cabd4eb1ec3232731d7de68e3e715c00a82a5
Author: PhelypeOleinik <phelype.oleinik at latex-project.org>
Date: Sat Apr 17 18:18:13 2021 -0300
\csname-based filename sanitisation
>---------------------------------------------------------------
ca4cabd4eb1ec3232731d7de68e3e715c00a82a5
l3kernel/l3file.dtx | 122 +++++++++++++++++++++++++++++++---------------------
1 file changed, 74 insertions(+), 48 deletions(-)
diff --git a/l3kernel/l3file.dtx b/l3kernel/l3file.dtx
index bb3276c32..479f72ea2 100644
--- a/l3kernel/l3file.dtx
+++ b/l3kernel/l3file.dtx
@@ -2437,68 +2437,81 @@
% \begin{macro}[EXP]{\__kernel_file_name_trim_spaces:nw}
% \begin{macro}[EXP]{\__kernel_file_name_trim_spaces_aux:n}
% \begin{macro}[EXP]{\__kernel_file_name_trim_spaces_aux:w}
-% Expanding the file name without expanding active characters is done
-% using the same token-by-token approach as for example case changing.
-% The finale outcome only need be \texttt{e}-type expandable, so there
-% is no need for the shuffling that is seen in other locations.
+% Expanding the file name uses a \tn{csname}-based approach, and
+% relies on active characters (for example from UTF-8 characters)
+% being properly set up to expand to a expansion-safe version using
+% \cs{ifcsname}. This is less conservative than the token-by-token
+% approach used before, but it is much faster.
% \begin{macrocode}
\cs_new:Npn \__kernel_file_name_sanitize:n #1
{
\exp_args:Ne \__kernel_file_name_trim_spaces:n
{
\exp_args:Ne \__kernel_file_name_strip_quotes:n
- {
- \__kernel_file_name_expand_loop:w #1
- \q_@@_recursion_tail \q_@@_recursion_stop
- }
+ { \@@_name_expand:n {#1} }
}
}
-\cs_new:Npn \__kernel_file_name_expand_loop:w #1 \q_@@_recursion_stop
+% \end{macrocode}
+%
+% We'll use \cs{cs:w} to start expanding the file name, and to avoid
+% creating csnames equal to \tn{relax} with \enquote{common} names,
+% there's a prefix |__file_name=| to the csname. There's also a guard
+% token at the end so we can check if there was an error during the
+% process and (try to) clean up gracefully.
+% \begin{macrocode}
+\cs_new:Npn \@@_name_expand:n #1
{
- \tl_if_head_is_N_type:nTF {#1}
- { \__kernel_file_name_expand_N_type:Nw }
- {
- \tl_if_head_is_group:nTF {#1}
- { \__kernel_file_name_expand_group:nw }
- { \__kernel_file_name_expand_space:w }
- }
- #1 \q_@@_recursion_stop
+ \exp_after:wN \@@_name_expand_cleanup:Nw
+ \cs:w @@_name = #1 \cs_end:
+ \@@_name_expand_end:
}
-\cs_new:Npn \__kernel_file_name_expand_N_type:Nw #1
+% \end{macrocode}
+% With the csname built, we grab it, and grab the remaining tokens
+% delimited by \cs{@@_name_expand_end:}. If there are any remaining
+% tokens, something bad happened, so we'll call the error procedure
+% \cs{@@_name_expand_error:Nw}.
+% If everything went according to plan, then use \cs{token_to_str:N}
+% on the csname built, and call \cs{@@_name_expand_cleanup:w} to
+% remove the prefix we added a while back.
+% \cs{@@_name_expand_cleanup:w} takes a leading argument so we don't
+% have to bother about the value of \cs{tex_escapechar:D}.
+% \begin{macrocode}
+\cs_new:Npn \@@_name_expand_cleanup:Nw #1 #2 \@@_name_expand_end:
{
- \@@_if_recursion_tail_stop:N #1
- \bool_lazy_and:nnTF
- { \token_if_expandable_p:N #1 }
- {
- \bool_not_p:n
- {
- \bool_lazy_any_p:n
- {
- { \token_if_protected_macro_p:N #1 }
- { \token_if_protected_long_macro_p:N #1 }
- { \token_if_active_p:N #1 }
- }
- }
- }
- { \exp_after:wN \__kernel_file_name_expand_loop:w #1 }
- {
- \token_to_str:N #1
- \__kernel_file_name_expand_loop:w
- }
+ \tl_if_empty:nF {#2}
+ { \@@_name_expand_error:Nw #2 \@@_name_expand_end: }
+ \exp_after:wN \@@_name_expand_cleanup:w \token_to_str:N #1
}
-\cs_new:Npx \__kernel_file_name_expand_group:nw #1
+\exp_last_unbraced:NNNNo
+\cs_new:Npn \@@_name_expand_cleanup:w #1 \tl_to_str:n { @@_name = } { }
+% \end{macrocode}
+% In non-error cases \cs{@@_name_expand_end:} should not expand. It
+% will only do so in case there is a \cs{csname} too much in the file
+% name, so it will throw an error (while expanding), then insert the
+% missing \cs{cs_end:} and yet another \cs{@@_name_expand_end:} that
+% will be used as a delimiter by \cs{@@_name_expand_cleanup:Nw} (or
+% that will expand again if yet another \cs{endcsname} is missing).
+% \begin{macrocode}
+\cs_new:Npn \@@_name_expand_end:
{
- \c_left_brace_str
- \exp_not:N \__kernel_file_name_expand_loop:w
- #1
- \c_right_brace_str
+ \__kernel_msg_expandable_error:nn
+ { kernel } { filename-missing-endcsname }
+ \cs_end: \@@_name_expand_end:
+ }
+% \end{macrocode}
+% Now to the error case. \cs{@@_name_expand_error:Nw} adds an extra
+% \cs{cs_end:} so that in case there was an extra \tn{csname} in the
+% file name, then \cs{@@_name_expand_error_aux:Nw} throws the error.
+% \begin{macrocode}
+\cs_new:Npn \@@_name_expand_error:Nw #1 #2 \@@_name_expand_end:
+ { \@@_name_expand_error_aux:Nw #1 #2 \cs_end: \@@_name_expand_end: }
+\cs_new:Npn \@@_name_expand_error_aux:Nw #1 #2 \cs_end: #3
+ \@@_name_expand_end:
+ {
+ \__kernel_msg_expandable_error:nnff
+ { kernel } { filename-chars-lost }
+ { \token_to_str:N #1 } { \exp_stop_f: #2 }
}
-\exp_last_unbraced:NNo
- \cs_new:Npx \__kernel_file_name_expand_space:w \c_space_tl
- {
- \c_space_tl
- \exp_not:N \__kernel_file_name_expand_loop:w
- }
% \end{macrocode}
% Quoting file name uses basically the same approach as for
% \texttt{luaquotejobname}: count the |"| tokens and remove them.
@@ -3614,6 +3627,19 @@
#1 \\
.............
}
+\__kernel_msg_new:nnnn { kernel } { filename-chars-lost }
+ { #1~invalid~in~file~name.~Lost:~#2. }
+ {
+ There~was~an~invalid~token~in~the~file~name~that~caused~
+ the~characters~following~it~to~be~lost.
+ }
+\__kernel_msg_new:nnnn { kernel } { filename-missing-endcsname }
+ { Missing~\iow_char:N\\endcsname~inserted~in~filename. }
+ {
+ The~file~name~had~more~\iow_char:N\\csname~commands~than~
+ \iow_char:N\\endcsname~ones.~LaTeX~will~add~the~missing~
+ \iow_char:N\\endcsname~and~try~to~continue~as~best~as~it~can.
+ }
\__kernel_msg_new:nnnn { kernel } { unbalanced-quote-in-filename }
{ Unbalanced~quotes~in~file~name~'#1'. }
{
More information about the latex3-commits
mailing list.