[latex3-commits] [git/LaTeX3-latex3-latex3] master: Add expandable pdfname generation (#721) (0ca513ac6)

Thu May 28 13:20:09 CEST 2020

Repository : https://github.com/latex3/latex3
On branch  : master
Link       : https://github.com/latex3/latex3/commit/0ca513ac6f567203ae6d7310dd642be092a814cd

>---------------------------------------------------------------

commit 0ca513ac6f567203ae6d7310dd642be092a814cd
Author: Joseph Wright <joseph.wright at morningstar2.co.uk>
Date:   Thu May 28 12:20:09 2020 +0100

    Add expandable pdfname generation (#721)


>---------------------------------------------------------------

0ca513ac6f567203ae6d7310dd642be092a814cd
 l3kernel/CHANGELOG.md                              |  3 +
 l3kernel/l3str-convert.dtx                         | 70 ++++++++++++++++++++++
 l3kernel/testfiles/m3str-convert005.lvt            | 32 ++++++++++
 .../{m3token001.tlg => m3str-convert005.ptex.tlg}  |  5 +-
 .../{m3token001.tlg => m3str-convert005.tlg}       |  6 +-
 5 files changed, 112 insertions(+), 4 deletions(-)

diff --git a/l3kernel/CHANGELOG.md b/l3kernel/CHANGELOG.md
index 608fd1b57..3b095e30e 100644
--- a/l3kernel/CHANGELOG.md
+++ b/l3kernel/CHANGELOG.md
@@ -7,6 +7,9 @@ this project uses date-based 'snapshot' version identifiers.
 
 ## [Unreleased]
 
+### Added
+- `\str_convert_pdfname:n`
+
 ## [2020-05-15]
 
 ### Changed
diff --git a/l3kernel/l3str-convert.dtx b/l3kernel/l3str-convert.dtx
index 5836bea27..a6275b825 100644
--- a/l3kernel/l3str-convert.dtx
+++ b/l3kernel/l3str-convert.dtx
@@ -186,6 +186,19 @@
 %   encoding. Instead, the \meta{false code} is performed.
 % \end{function}
 %
+% \section{Conversion by expansion (for PDF contexts)}
+%
+% A small number of expandable functions are provided for use in PDF string/name
+% contexts. These \emph{assume UTF-8} and \emph{no escaping} in the input.
+%
+% \begin{function}[EXP]{\str_convert_pdfname:n}
+%   \begin{syntax}
+%     \cs{str_convert_pdfname:n} \meta{string}
+%   \end{syntax}
+%   As \cs{str_set_convert:Nnnn}, converts the \meta{string} on a byte-by-byte
+%   basis with non-ASCII codepoints  escaped using hashes.
+% \end{function}
+%
 % \section[Creating 8-bit mappings]{Creating $8$-bit mappings}
 %
 % \begin{function}{\str_declare_eight_bit_encoding:nnn}
@@ -2581,6 +2594,63 @@
 \group_end:
 %    \end{macrocode}
 %
+% \subsection{PDF names and strings by expansion}
+%
+% \begin{macro}[EXP]{\str_convert_pdfname:n}
+% \begin{macro}[EXP]{\@@_convert_pdfname:N}
+% \begin{macro}[EXP]
+%   {\@@_convert_pdfname_bytes:n, \@@_convert_pdfname_bytes_aux:n}
+% \begin{macro}[EXP]{\@@_convert_pdfname_bytes_aux:nnn}
+%   To convert to PDF names by expansion, we work purely on UTF-8 input. The
+%   first step is to make a string with \enquote{other} spaces,
+%   after which we use a simple token-by-token approach. In Unicode
+%   engines, we break down everything before one-byte codepoints, but for
+%   $8$-bit engines there is no need to worry. Actual escaping is covered
+%   by the same code as used in the non-expandable route.
+%    \begin{macrocode}
+\cs_new:Npn \str_convert_pdfname:n #1
+  {
+    \exp_args:Ne \tl_to_str:n
+      { \str_map_function:nN {#1} \@@_convert_pdfname:N }
+  }
+\bool_lazy_or:nnTF
+  { \sys_if_engine_luatex_p: }
+  { \sys_if_engine_xetex_p: }
+  {
+    \cs_new:Npn \@@_convert_pdfname:N #1
+      {
+        \int_compare:nNnTF { `#1 } > { "7F }
+          { \@@_convert_pdfname_bytes:n {#1} }
+          { \@@_escape_name_char:N #1 }
+      }
+    \cs_new:Npn \@@_convert_pdfname_bytes:n #1
+      {
+        \exp_args:Ne \@@_convert_pdfname_bytes_aux:n
+          { \char_to_utfviii_bytes:n {`#1} }
+      }
+    \cs_new:Npn \@@_convert_pdfname_bytes_aux:n #1
+      { \@@_convert_pdfname_bytes_aux:nnnn #1 }
+    \cs_new:Npx \@@_convert_pdfname_bytes_aux:nnnn #1#2#3#4
+      {
+        \c_hash_str \exp_not:N \@@_output_hexadecimal:n {#1}
+        \c_hash_str \exp_not:N \@@_output_hexadecimal:n {#2}
+        \exp_not:N \tl_if_blank:nF {#3}
+          {
+            \c_hash_str \exp_not:N \@@_output_hexadecimal:n {#3}
+            \exp_not:N \tl_if_blank:nF {#4}
+              {
+                \c_hash_str \exp_not:N \@@_output_hexadecimal:n {#4}
+              }
+          }
+      }
+  }
+  { \cs_new_eq:NN \@@_convert_pdfname:N \@@_escape_name_char:N }
+%    \end{macrocode}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+% \end{macro}
+%
 %    \begin{macrocode}
 %</initex|package>
 %    \end{macrocode}
diff --git a/l3kernel/testfiles/m3str-convert005.lvt b/l3kernel/testfiles/m3str-convert005.lvt
new file mode 100644
index 000000000..ff895a264
--- /dev/null
+++ b/l3kernel/testfiles/m3str-convert005.lvt
@@ -0,0 +1,32 @@
+%
+% Copyright (C) 2020 The LaTeX3 Project
+%
+
+\documentclass{minimal}
+\input{regression-test}
+
+\RequirePackage[enable-debug]{expl3}
+\ExplSyntaxOn
+\debug_on:n { check-declarations , deprecation , log-functions }
+\ExplSyntaxOff
+
+\begin{document}
+
+\START
+\AUTHOR{Joseph Wright}
+\ExplSyntaxOn
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%	
+
+\char_set_catcode_other:N \%
+\char_set_catcode_other:N \#
+
+\TESTEXP { PDF~names }
+  {
+    \str_convert_pdfname:n { abczz } \NEWLINE
+    \str_convert_pdfname:n { brackets()[]{}<>xxx } \NEWLINE
+    \sys_if_engine_ptex:F
+      { \str_convert_pdfname:n { grüße€🦆 } \NEWLINE }
+  }
+
+\END
diff --git a/l3kernel/testfiles/m3token001.tlg b/l3kernel/testfiles/m3str-convert005.ptex.tlg
similarity index 84%
copy from l3kernel/testfiles/m3token001.tlg
copy to l3kernel/testfiles/m3str-convert005.ptex.tlg
index cd844648d..b0f9e09db 100644
--- a/l3kernel/testfiles/m3token001.tlg
+++ b/l3kernel/testfiles/m3str-convert005.ptex.tlg
@@ -2,7 +2,8 @@ This is a generated file for the LaTeX (2e + expl3) validation system.
 Don't change this file in any respect.
 Author: Joseph Wright
 ============================================================
-TEST 1: Unicode NFD
+TEST 1: PDF names
 ============================================================
-A
+abczz
+brackets#28#29#5B#5D#7B#7D#3C#3Exxx
 ============================================================
diff --git a/l3kernel/testfiles/m3token001.tlg b/l3kernel/testfiles/m3str-convert005.tlg
similarity index 76%
copy from l3kernel/testfiles/m3token001.tlg
copy to l3kernel/testfiles/m3str-convert005.tlg
index cd844648d..a1585222d 100644
--- a/l3kernel/testfiles/m3token001.tlg
+++ b/l3kernel/testfiles/m3str-convert005.tlg
@@ -2,7 +2,9 @@ This is a generated file for the LaTeX (2e + expl3) validation system.
 Don't change this file in any respect.
 Author: Joseph Wright
 ============================================================
-TEST 1: Unicode NFD
+TEST 1: PDF names
 ============================================================
-A
+abczz
+brackets#28#29#5B#5D#7B#7D#3C#3Exxx
+gr#C3#BC#C3#9Fe#E2#82#AC#F0#9F#A6#86
 ============================================================