texlive[74600] Build/source/texk/dvipdfm-x: (x)dvipdfmx deterministic
commits+mseven at tug.org
commits+mseven at tug.org
Thu Mar 13 07:41:43 CET 2025
Revision: 74600
https://tug.org/svn/texlive?view=revision&revision=74600
Author: mseven
Date: 2025-03-13 07:41:42 +0100 (Thu, 13 Mar 2025)
Log Message:
-----------
(x)dvipdfmx deterministic font names
Modified Paths:
--------------
trunk/Build/source/texk/dvipdfm-x/ChangeLog
trunk/Build/source/texk/dvipdfm-x/configure.ac
trunk/Build/source/texk/dvipdfm-x/dvipdfmx.c
trunk/Build/source/texk/dvipdfm-x/dvipdfmx.h
trunk/Build/source/texk/dvipdfm-x/pdffont.c
Modified: trunk/Build/source/texk/dvipdfm-x/ChangeLog
===================================================================
--- trunk/Build/source/texk/dvipdfm-x/ChangeLog 2025-03-13 01:14:14 UTC (rev 74599)
+++ trunk/Build/source/texk/dvipdfm-x/ChangeLog 2025-03-13 06:41:42 UTC (rev 74600)
@@ -1,3 +1,9 @@
+2025-03-13 Max Chernoff <tex at maxchernoff.ca>
+
+ * dvipdfmx.c, dvipdfmx.h, pdffont.c: Use deterministic font names.
+ Report from Paulo Ney de Souza,
+ https://tug.org/pipermail/dvipdfmx/2025-March/000353.html
+
2025-03-07 Karl Berry <karl at tug.org>
* TL'25 release.
Modified: trunk/Build/source/texk/dvipdfm-x/configure.ac
===================================================================
--- trunk/Build/source/texk/dvipdfm-x/configure.ac 2025-03-13 01:14:14 UTC (rev 74599)
+++ trunk/Build/source/texk/dvipdfm-x/configure.ac 2025-03-13 06:41:42 UTC (rev 74600)
@@ -8,7 +8,7 @@
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
dnl
-AC_INIT([dvipdfm-x (TeX Live)], [20250205], [dvipdfmx at tug.org])
+AC_INIT([dvipdfm-x (TeX Live)], [20250313], [dvipdfmx at tug.org])
AC_PREREQ([2.65])
AC_CONFIG_SRCDIR([agl.c])
AC_CONFIG_AUX_DIR([../../build-aux])
Modified: trunk/Build/source/texk/dvipdfm-x/dvipdfmx.c
===================================================================
--- trunk/Build/source/texk/dvipdfm-x/dvipdfmx.c 2025-03-13 01:14:14 UTC (rev 74599)
+++ trunk/Build/source/texk/dvipdfm-x/dvipdfmx.c 2025-03-13 06:41:42 UTC (rev 74600)
@@ -127,7 +127,7 @@
static int has_paper_option = 0;
/* Input and output filenames */
-static char *dvi_filename = NULL, *pdf_filename = NULL;
+char *dvi_filename = NULL, *pdf_filename = NULL;
static void read_config_file (const char *config);
Modified: trunk/Build/source/texk/dvipdfm-x/dvipdfmx.h
===================================================================
--- trunk/Build/source/texk/dvipdfm-x/dvipdfmx.h 2025-03-13 01:14:14 UTC (rev 74599)
+++ trunk/Build/source/texk/dvipdfm-x/dvipdfmx.h 2025-03-13 06:41:42 UTC (rev 74600)
@@ -26,6 +26,7 @@
#define _DVIPDFMX_H_
extern const char *my_name;
+extern char *dvi_filename, *pdf_filename;
extern int extractbb(int argc, char *argv[]);
extern void read_config_special(const char **start, const char *end);
Modified: trunk/Build/source/texk/dvipdfm-x/pdffont.c
===================================================================
--- trunk/Build/source/texk/dvipdfm-x/pdffont.c 2025-03-13 01:14:14 UTC (rev 74599)
+++ trunk/Build/source/texk/dvipdfm-x/pdffont.c 2025-03-13 06:41:42 UTC (rev 74600)
@@ -31,6 +31,7 @@
#include "mem.h"
#include "dpxconf.h"
+#include "dpxcrypt.h"
#include "dpxfile.h"
#include "dpxutil.h"
@@ -51,6 +52,8 @@
#include "type0.h"
#include "tt_cmap.h"
+#include "dvipdfmx.h"
+
#include "pdffont.h"
#define MREC_HAS_TOUNICODE(m) ((m) && (m)->opt.tounicode)
@@ -61,17 +64,65 @@
PKFont_set_dpi(font_dpi);
}
+static union {
+ char p[sizeof(int)];
+ int* i;
+} unique_tag_count;
+
+/* This function used to be implemented as
+ *
+ * for (i = 0; i < 6; i++) {
+ * ch = rand() % 26;
+ * tag[i] = ch + 'A';
+ * }
+ * tag[6] = '\0';
+ *
+ * but this meant that the tag would change on every run, producing a
+ * non-deterministic PDF file. You could work around this by setting
+ * `SOURCE_DATE_EPOCH` in the environment (since the current time is used to
+ * seed `rand`), but that requires extra effort. Instead, we use an MD5 hash of
+ * the input (dvi) filename, the output (pdf) filename, and a counter that
+ * increments on each call to this function. This produces a deterministic tag
+ * for each document, provided that the input filename, the output filename, and
+ * the order/number of fonts remains the same.
+ *
+ * Why do we need this function in the first place? Well, since we are
+ * subsetting the fonts, this means that the "LM Roman 10" font in one document
+ * will not be the same as the "LM Roman 10" font in another document. This can
+ * cause problems when older/buggy PDF processors merge or embed multiple
+ * documents, since it's invalid to have two fonts with the same name and
+ * neither font is a strict subset/superset of the other.
+ *
+ * pdfTeX and LuaTeX solve this by hashing over the subsetting hash table, but
+ * this only works there since they only generate the PDF font name _after_
+ * creating the subset. (x)dvipdfmx generates the PDF font name as (almost) the
+ * very first step when including a font, so we couldn't use this method without
+ * extensive refactoring.
+ *
+ * The pdfTeX and LuaTeX methods guarantee that multiple incompatible subsets
+ * will never have the same name (barring hash collisions), and the prior `rand`
+ * method had the same guarantee (barring an _extremely_ unlikely RNG
+ * collision). This new method isn't quite as good since if the input and output
+ * are both pipes, then both filenames will be `NULL` and the tag will only
+ * depend on the counter. But I think that most PDF processors these days will
+ * properly check for font name collisions, so this is probably good enough.
+ */
void
pdf_font_make_uniqueTag (char *tag)
{
- int i;
- char ch;
+ MD5_CONTEXT state;
+ unsigned char digest[16];
+ unique_tag_count.i++;
- for (i = 0; i < 6; i++) {
- ch = rand() % 26;
- tag[i] = ch + 'A';
- }
- tag[6] = '\0';
+ MD5_init(&state);
+ if (dvi_filename)
+ MD5_write(&state, dvi_filename, strlen(dvi_filename));
+ if (pdf_filename)
+ MD5_write(&state, pdf_filename, strlen(pdf_filename));
+ MD5_write(&state, unique_tag_count.p, sizeof(unique_tag_count));
+ MD5_final(digest, &state);
+
+ snprintf(tag, 7, "%02X%02X%02X", digest[0], digest[1], digest[2]);
}
static void
More information about the tex-live-commits
mailing list.