[latex3-commits] [git/LaTeX3-latex3-latex3] pdfescape: Add \str_convert_pdfname:n (0643ef8f8)
Joseph Wright
joseph.wright at morningstar2.co.uk
Mon Apr 6 15:24:29 CEST 2020
Repository : https://github.com/latex3/latex3
On branch : pdfescape
Link : https://github.com/latex3/latex3/commit/0643ef8f87c4ee6b104cbff6ba8e5518c5be62c6
>---------------------------------------------------------------
commit 0643ef8f87c4ee6b104cbff6ba8e5518c5be62c6
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date: Mon Apr 6 14:24:29 2020 +0100
Add \str_convert_pdfname:n
>---------------------------------------------------------------
0643ef8f87c4ee6b104cbff6ba8e5518c5be62c6
l3kernel/CHANGELOG.md | 3 +
l3kernel/l3str-convert.dtx | 109 +++++++++++++++++++++
.../{m3regex010.lvt => m3str-convert005.lvt} | 16 ++-
.../{m3token001.tlg => m3str-convert005.ptex.tlg} | 6 +-
.../{m3token001.tlg => m3str-convert005.tlg} | 6 +-
5 files changed, 131 insertions(+), 9 deletions(-)
diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index 935c8f321..0bec3a384 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -7,6 +7,9 @@ this project uses date-based 'snapshot' version identifiers.
## [Unreleased]
+### Added
+- `\str_convert_pdfname:n`
+
## [2020-04-06]
### Added
diff --git a/l3kernel/l3str-convert.dtx b/l3kernel/l3str-convert.dtx
index ea0839abf..372aed8af 100644
--- a/l3kernel/l3str-convert.dtx
+++ b/l3kernel/l3str-convert.dtx
@@ -186,6 +186,19 @@
% encoding. Instead, the \meta{false code} is performed.
% \end{function}
%
+% \section{Conversion by expansion (for PDF contexts)}
+%
+% A small number of expandable functions are provided for use in PDF string/name
+% contexts. These \emph{assume UTF-8} and \emph{no escaping} in the input.
+%
+% \begin{function}[EXP]{\str_convert_pdfname:n}
+% \begin{syntax}
+% \cs{str_convert_pdfname:n} \meta{string}
+% \end{syntax}
+% As \cs{str_set_convert:Nnnn}, converts the \meta{string} on a byte-by-byte
+% basis with non-ASCII codepoints escaped using hashes.
+% \end{function}
+%
% \section[Creating 8-bit mappings]{Creating $8$-bit mappings}
%
% \begin{function}{\str_declare_eight_bit_encoding:nnn}
@@ -2567,6 +2580,102 @@
\group_end:
% \end{macrocode}
%
+% \subsection{PDF names and strings by expansion}
+%
+% \begin{macro}[EXP]{\str_convert_pdfname:n, \@@_convert_pdfname:n}
+% \begin{macro}[EXP]{\@@_convert_pdfname_first:w, \@@_convert_pdfname_loop:w}
+% \begin{macro}[EXP]{\@@_convert_pdfname_outer:n}
+% \begin{macro}[EXP]{\@@_convert_pdfname_inner:w}
+% \begin{macro}[EXP]
+% {\@@_convert_pdfname_bytes:n, \@@_convert_pdfname_bytes_aux:n}
+% \begin{macro}[EXP]{\@@_convert_pdfname_bytes_aux:nnn}
+% To convert to PDF names by expansion, we work purely on UTF-8 input. The
+% first step is to make a string, after which we use a space-based mapping to
+% divide up the input, then a simple token-by-token approach. In Unicode
+% engines, we break down everything before one-byte codepoints, but for
+% $8$-bit engines there is no need to worry. Actual escaping is covered
+% by the same code as used in the non-expandable route.
+% \begin{macrocode}
+\cs_new:Npn \str_convert_pdfname:n #1
+ {
+ \exp_args:Ne \tl_to_str:n
+ {
+ \exp_after:wN \@@_convert_pdfname:n \exp_after:wN
+ { \tl_to_str:n {#1} }
+ }
+ }
+\cs_new:Npx \@@_convert_pdfname:n #1
+ {
+ \exp_not:N \@@_convert_pdfname_first:w #1 ~
+ \exp_not:N \q_recursion_tail \c_space_tl
+ \exp_not:N \q_recursion_stop
+ }
+\cs_new:Npn \@@_convert_pdfname_first:w #1 ~
+ {
+ \quark_if_recursion_tail_stop:n {#1}
+ \@@_convert_pdfname_outer:n {#1}
+ }
+\cs_new:Npn \@@_convert_pdfname_loop:w #1 ~
+ {
+ \quark_if_recursion_tail_stop:n {#1}
+ \c_hash_str 20
+ \@@_convert_pdfname_outer:n {#1}
+ }
+\cs_new:Npx \@@_convert_pdfname_outer:n #1
+ {
+ \exp_not:N \@@_convert_pdfname_inner:w #1
+ \exp_not:N \q_recursion_tail \exp_not:N \q_recursion_stop
+ \exp_not:N \@@_convert_pdfname_loop:w
+ }
+\bool_lazy_or:nnTF
+ { \sys_if_engine_luatex_p: }
+ { \sys_if_engine_xetex_p: }
+ {
+ \cs_new:Npn \@@_convert_pdfname_inner:w #1
+ {
+ \quark_if_recursion_tail_stop:n {#1}
+ \int_compare:nNnTF { `#1 } > { "7F }
+ { \@@_convert_pdfname_bytes:n {#1} }
+ { \@@_escape_name_char:N #1 }
+ \@@_convert_pdfname_inner:w
+ }
+ \cs_new:Npn \@@_convert_pdfname_bytes:n #1
+ {
+ \exp_args:Ne \@@_convert_pdfname_bytes_aux:n
+ { \char_to_utfviii_bytes:n {`#1} }
+ }
+ \cs_new:Npn \@@_convert_pdfname_bytes_aux:n #1
+ { \@@_convert_pdfname_bytes_aux:nnnn #1 }
+ \cs_new:Npx \@@_convert_pdfname_bytes_aux:nnnn #1#2#3#4
+ {
+ \c_hash_str \exp_not:N \@@_output_hexadecimal:n {#1}
+ \c_hash_str \exp_not:N \@@_output_hexadecimal:n {#2}
+ \exp_not:N \tl_if_blank:nF {#3}
+ {
+ \c_hash_str \exp_not:N \@@_output_hexadecimal:n {#3}
+ \exp_not:N \tl_if_blank:nF {#4}
+ {
+ \c_hash_str \exp_not:N \@@_output_hexadecimal:n {#4}
+ }
+ }
+ }
+ }
+ {
+ \cs_new:Npn \@@_convert_pdfname_inner:w #1
+ {
+ \quark_if_recursion_tail_stop:n {#1}
+ \@@_escape_name_char:N #1
+ \@@_convert_pdfname_inner:w
+ }
+ }
+% \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
% \begin{macrocode}
%</initex|package>
% \end{macrocode}
diff --git a/l3kernel/testfiles/m3regex010.lvt b/l3kernel/testfiles/m3str-convert005.lvt
similarity index 50%
copy from l3kernel/testfiles/m3regex010.lvt
copy to l3kernel/testfiles/m3str-convert005.lvt
index 360e342f0..bd88598fe 100644
--- a/l3kernel/testfiles/m3regex010.lvt
+++ b/l3kernel/testfiles/m3str-convert005.lvt
@@ -1,5 +1,7 @@
%
-% Copyright (C) 2014,2015,2017,2018 The LaTeX3 Project
+% Copyright (C) 2020 The LaTeX3 Project
+%
+
\documentclass{minimal}
\input{regression-test}
@@ -11,15 +13,19 @@
\begin{document}
\START
-\AUTHOR{Bruno Le Floch}
+\AUTHOR{Joseph Wright}
\ExplSyntaxOn
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\TEST { LuaTeX~bug~which~leads~to~an~l3regex~bug }
+\char_set_catcode_other:N \%
+\char_set_catcode_other:N \#
+
+\TESTEXP { PDF~names }
{
- \regex_show:n { \^^@ \^^A }
+ \str_convert_pdfname:n { abczz } \NEWLINE
+ \str_convert_pdfname:n { brackets()[]{}<>xxx } \NEWLINE
+ \str_convert_pdfname:n { grüße€🦆 } \NEWLINE
}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\END
diff --git a/l3kernel/testfiles/m3token001.tlg b/l3kernel/testfiles/m3str-convert005.ptex.tlg
similarity index 78%
copy from l3kernel/testfiles/m3token001.tlg
copy to l3kernel/testfiles/m3str-convert005.ptex.tlg
index cd844648d..85d6d4ee0 100644
--- a/l3kernel/testfiles/m3token001.tlg
+++ b/l3kernel/testfiles/m3str-convert005.ptex.tlg
@@ -2,7 +2,9 @@ This is a generated file for the LaTeX (2e + expl3) validation system.
Don't change this file in any respect.
Author: Joseph Wright
============================================================
-TEST 1: Unicode NFD
+TEST 1: PDF names
============================================================
-A
+abczz
+brackets#28#29#5B#5D#7B#7D#3C#3Exxx
+gr#C3#BC#C3###AC##A6#86
============================================================
diff --git a/l3kernel/testfiles/m3token001.tlg b/l3kernel/testfiles/m3str-convert005.tlg
similarity index 76%
copy from l3kernel/testfiles/m3token001.tlg
copy to l3kernel/testfiles/m3str-convert005.tlg
index cd844648d..a1585222d 100644
--- a/l3kernel/testfiles/m3token001.tlg
+++ b/l3kernel/testfiles/m3str-convert005.tlg
@@ -2,7 +2,9 @@ This is a generated file for the LaTeX (2e + expl3) validation system.
Don't change this file in any respect.
Author: Joseph Wright
============================================================
-TEST 1: Unicode NFD
+TEST 1: PDF names
============================================================
-A
+abczz
+brackets#28#29#5B#5D#7B#7D#3C#3Exxx
+gr#C3#BC#C3#9Fe#E2#82#AC#F0#9F#A6#86
============================================================
More information about the latex3-commits
mailing list.