[latex3-commits] [git/LaTeX3-latex3-latex3] pdfescape: Add \str_convert_pdfname:n (0643ef8f8)

Mon Apr 6 15:24:29 CEST 2020

Repository : https://github.com/latex3/latex3
On branch  : pdfescape
Link       : https://github.com/latex3/latex3/commit/0643ef8f87c4ee6b104cbff6ba8e5518c5be62c6

>---------------------------------------------------------------

commit 0643ef8f87c4ee6b104cbff6ba8e5518c5be62c6
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Mon Apr 6 14:24:29 2020 +0100

    Add \str_convert_pdfname:n


>---------------------------------------------------------------

0643ef8f87c4ee6b104cbff6ba8e5518c5be62c6
 l3kernel/CHANGELOG.md                              |   3 +
 l3kernel/l3str-convert.dtx                         | 109 +++++++++++++++++++++
 .../{m3regex010.lvt => m3str-convert005.lvt}       |  16 ++-
 .../{m3token001.tlg => m3str-convert005.ptex.tlg}  |   6 +-
 .../{m3token001.tlg => m3str-convert005.tlg}       |   6 +-
 5 files changed, 131 insertions(+), 9 deletions(-)

diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index 935c8f321..0bec3a384 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -7,6 +7,9 @@ this project uses date-based 'snapshot' version identifiers.
 
 ## [Unreleased]
 
+### Added
+- `\str_convert_pdfname:n`
+
 ## [2020-04-06]
 
 ### Added
diff --git a/l3kernel/l3str-convert.dtx b/l3kernel/l3str-convert.dtx
index ea0839abf..372aed8af 100644
--- a/l3kernel/l3str-convert.dtx
+++ b/l3kernel/l3str-convert.dtx
@@ -186,6 +186,19 @@
 %   encoding. Instead, the \meta{false code} is performed.
 % \end{function}
 %
+% \section{Conversion by expansion (for PDF contexts)}
+%
+% A small number of expandable functions are provided for use in PDF string/name
+% contexts. These \emph{assume UTF-8} and \emph{no escaping} in the input.
+%
+% \begin{function}[EXP]{\str_convert_pdfname:n}
+%   \begin{syntax}
+%     \cs{str_convert_pdfname:n} \meta{string}
+%   \end{syntax}
+%   As \cs{str_set_convert:Nnnn}, converts the \meta{string} on a byte-by-byte
+%   basis with non-ASCII codepoints  escaped using hashes.
+% \end{function}
+%
 % \section[Creating 8-bit mappings]{Creating $8$-bit mappings}
 %
 % \begin{function}{\str_declare_eight_bit_encoding:nnn}
@@ -2567,6 +2580,102 @@
 \group_end:
 %    \end{macrocode}
 %
+% \subsection{PDF names and strings by expansion}
+%
+% \begin{macro}[EXP]{\str_convert_pdfname:n, \@@_convert_pdfname:n}
+% \begin{macro}[EXP]{\@@_convert_pdfname_first:w, \@@_convert_pdfname_loop:w}
+% \begin{macro}[EXP]{\@@_convert_pdfname_outer:n}
+% \begin{macro}[EXP]{\@@_convert_pdfname_inner:w}
+% \begin{macro}[EXP]
+%   {\@@_convert_pdfname_bytes:n, \@@_convert_pdfname_bytes_aux:n}
+% \begin{macro}[EXP]{\@@_convert_pdfname_bytes_aux:nnn}
+%   To convert to PDF names by expansion, we work purely on UTF-8 input. The
+%   first step is to make a string, after which we use a space-based mapping to
+%   divide up the input, then a simple token-by-token approach. In Unicode
+%   engines, we break down everything before one-byte codepoints, but for
+%   $8$-bit engines there is no need to worry. Actual escaping is covered
+%   by the same code as used in the non-expandable route.
+%    \begin{macrocode}
+\cs_new:Npn \str_convert_pdfname:n #1
+  {
+    \exp_args:Ne \tl_to_str:n
+      {
+         \exp_after:wN \@@_convert_pdfname:n \exp_after:wN
+           { \tl_to_str:n {#1} }
+      }
+  }
+\cs_new:Npx \@@_convert_pdfname:n #1
+  {
+    \exp_not:N \@@_convert_pdfname_first:w #1 ~
+      \exp_not:N \q_recursion_tail \c_space_tl
+      \exp_not:N \q_recursion_stop
+  }
+\cs_new:Npn \@@_convert_pdfname_first:w #1 ~
+  {
+    \quark_if_recursion_tail_stop:n {#1}
+    \@@_convert_pdfname_outer:n {#1}
+  }
+\cs_new:Npn \@@_convert_pdfname_loop:w #1 ~
+  {
+    \quark_if_recursion_tail_stop:n {#1}
+    \c_hash_str 20
+    \@@_convert_pdfname_outer:n {#1}
+  }
+\cs_new:Npx \@@_convert_pdfname_outer:n #1
+  {
+    \exp_not:N \@@_convert_pdfname_inner:w #1
+      \exp_not:N \q_recursion_tail \exp_not:N \q_recursion_stop
+    \exp_not:N \@@_convert_pdfname_loop:w
+  }
+\bool_lazy_or:nnTF
+  { \sys_if_engine_luatex_p: }
+  { \sys_if_engine_xetex_p: }
+  {
+    \cs_new:Npn \@@_convert_pdfname_inner:w #1
+      {
+        \quark_if_recursion_tail_stop:n {#1}
+        \int_compare:nNnTF { `#1 } > { "7F }
+          { \@@_convert_pdfname_bytes:n {#1} }
+          { \@@_escape_name_char:N #1 }
+        \@@_convert_pdfname_inner:w
+      }
+    \cs_new:Npn \@@_convert_pdfname_bytes:n #1
+      {
+        \exp_args:Ne \@@_convert_pdfname_bytes_aux:n
+          { \char_to_utfviii_bytes:n {`#1} }
+      }
+    \cs_new:Npn \@@_convert_pdfname_bytes_aux:n #1
+      { \@@_convert_pdfname_bytes_aux:nnnn #1 }
+    \cs_new:Npx \@@_convert_pdfname_bytes_aux:nnnn #1#2#3#4
+      {
+        \c_hash_str \exp_not:N \@@_output_hexadecimal:n {#1}
+        \c_hash_str \exp_not:N \@@_output_hexadecimal:n {#2}
+        \exp_not:N \tl_if_blank:nF {#3}
+          {
+            \c_hash_str \exp_not:N \@@_output_hexadecimal:n {#3}
+            \exp_not:N \tl_if_blank:nF {#4}
+              {
+                \c_hash_str \exp_not:N \@@_output_hexadecimal:n {#4}
+              }
+          }
+      }
+  }
+  {
+    \cs_new:Npn \@@_convert_pdfname_inner:w #1
+      {
+        \quark_if_recursion_tail_stop:n {#1}
+        \@@_escape_name_char:N #1
+        \@@_convert_pdfname_inner:w
+      }
+  }
+%    \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
 %    \begin{macrocode}
 %</initex|package>
 %    \end{macrocode}
diff --git a/l3kernel/testfiles/m3regex010.lvt b/l3kernel/testfiles/m3str-convert005.lvt
similarity index 50%
copy from l3kernel/testfiles/m3regex010.lvt
copy to l3kernel/testfiles/m3str-convert005.lvt
index 360e342f0..bd88598fe 100644
--- a/l3kernel/testfiles/m3regex010.lvt
+++ b/l3kernel/testfiles/m3str-convert005.lvt
@@ -1,5 +1,7 @@
 %
-% Copyright (C) 2014,2015,2017,2018 The LaTeX3 Project
+% Copyright (C) 2020 The LaTeX3 Project
+%
+
 \documentclass{minimal}
 \input{regression-test}
 
@@ -11,15 +13,19 @@
 \begin{document}
 
 \START
-\AUTHOR{Bruno Le Floch}
+\AUTHOR{Joseph Wright}
 \ExplSyntaxOn
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
-\TEST { LuaTeX~bug~which~leads~to~an~l3regex~bug }
+\char_set_catcode_other:N \%
+\char_set_catcode_other:N \#
+
+\TESTEXP { PDF~names }
   {
-    \regex_show:n { \^^@ \^^A }
+    \str_convert_pdfname:n { abczz } \NEWLINE
+    \str_convert_pdfname:n { brackets()[]{}<>xxx } \NEWLINE
+    \str_convert_pdfname:n { grüße€🦆 } \NEWLINE
   }
 
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \END
diff --git a/l3kernel/testfiles/m3token001.tlg b/l3kernel/testfiles/m3str-convert005.ptex.tlg
similarity index 78%
copy from l3kernel/testfiles/m3token001.tlg
copy to l3kernel/testfiles/m3str-convert005.ptex.tlg
index cd844648d..85d6d4ee0 100644
--- a/l3kernel/testfiles/m3token001.tlg
+++ b/l3kernel/testfiles/m3str-convert005.ptex.tlg
@@ -2,7 +2,9 @@ This is a generated file for the LaTeX (2e + expl3) validation system.
 Don't change this file in any respect.
 Author: Joseph Wright
 ============================================================
-TEST 1: Unicode NFD
+TEST 1: PDF names
 ============================================================
-A
+abczz
+brackets#28#29#5B#5D#7B#7D#3C#3Exxx
+gr#C3#BC#C3###AC##A6#86
 ============================================================
diff --git a/l3kernel/testfiles/m3token001.tlg b/l3kernel/testfiles/m3str-convert005.tlg
similarity index 76%
copy from l3kernel/testfiles/m3token001.tlg
copy to l3kernel/testfiles/m3str-convert005.tlg
index cd844648d..a1585222d 100644
--- a/l3kernel/testfiles/m3token001.tlg
+++ b/l3kernel/testfiles/m3str-convert005.tlg
@@ -2,7 +2,9 @@ This is a generated file for the LaTeX (2e + expl3) validation system.
 Don't change this file in any respect.
 Author: Joseph Wright
 ============================================================
-TEST 1: Unicode NFD
+TEST 1: PDF names
 ============================================================
-A
+abczz
+brackets#28#29#5B#5D#7B#7D#3C#3Exxx
+gr#C3#BC#C3#9Fe#E2#82#AC#F0#9F#A6#86
 ============================================================