[pdftex] Please make the CreationDate, ModDate and ID field deterministic

Maria Valentina Marin marivalenm at gmail.com
Sat Jul 11 12:33:04 CEST 2015


Hello,

I wanted to expand on the thread started by Nicolas Boulenguez:

http://tug.org/mailman/htdig/pdftex/2015-May/008940.html

Where they explain ways to make pdftex produce reproducible output.

I propose the attached patch which does not change the default behaviour
of pdftex but if the environment variable SOURCE_DATE_EPOCH is set it
causes pdftex to produce reproducible PDF files by modifying the
behaviour of the function initstarttime() and printID().

The environment variable SOURCE_DATE_EPOCH contains the unix epoch as an
integer [1]. The function printID was modified to obtain the time as in
the patch from Nicolas Boulenguez though in contrast to their patch the
ID still uses the output build directory as part of its hash. This was
done because the Debian reproducible builds team decided to not change
the path between builds, this makes the build path deterministic by
default. As far as Debian goes stripping off the path is not required.
Though we will not complain if you do =)

This environment variable was introduced by the Debian reproducible
builds team but it is meant to be used by any distribution. The package
help2man is now supporting this and we are also in the process of
persuading txt2man, epydoc, GCC, Doxygen and libxslt.

We are successfully using in our framework a modified version of pdftek
which includes this patch to build packages and test for reproducibility.

Thanks!
akira

P.S. I am starting a new thread because I could not find a way to reply
to the one Nicolas started.

[1] https://wiki.debian.org/ReproducibleBuilds/TimestampsProposal
-------------- next part --------------
--- a/texk/web2c/lib/texmfmp.c
+++ b/texk/web2c/lib/texmfmp.c
@@ -2869,6 +2869,7 @@ void pdftex_fail(const char *fmt, ...)
 }
 #endif /* not pdfTeX */
 
+static boolean start_time_set = false;
 static time_t start_time = 0;
 #define TIME_STR_SIZE 30
 char start_time_str[TIME_STR_SIZE];
@@ -2925,8 +2926,42 @@ static void makepdftime(time_t t, char *
 
 void initstarttime(void)
 {
-    if (start_time == 0) {
-        start_time = time((time_t *) NULL);
+	char *source_date_epoch;
+	unsigned long long epoch;
+	char *endptr;
+    if (!start_time_set) {
+        start_time_set = true;
+	source_date_epoch = getenv("SOURCE_DATE_EPOCH");
+	if (source_date_epoch)
+	{
+		errno = 0;
+		epoch = strtoull(source_date_epoch, &endptr, 10);
+		if ((errno == ERANGE && (epoch == ULLONG_MAX || epoch == 0))
+				|| (errno != 0 && epoch == 0))
+		{
+			fprintf(stderr, "Environment variable $SOURCE_DATE_EPOCH: strtoull: %s\n", strerror(errno));
+			uexit(EXIT_FAILURE);
+		}
+		if (endptr == source_date_epoch)
+		{
+			fprintf(stderr, "Environment variable $SOURCE_DATE_EPOCH: No digits were found: %s\n", endptr);
+			uexit(EXIT_FAILURE);
+		}
+		if (*endptr != '\0')
+		{
+			fprintf(stderr, "Environment variable $SOURCE_DATE_EPOCH: Trailing garbage: %s\n", endptr);
+			uexit(EXIT_FAILURE);
+		}
+		if (epoch > ULONG_MAX)
+		{
+			fprintf(stderr, "Environment variable $SOURCE_DATE_EPOCH: value must be smaller than or equal to: %lu but was found to be: %llu \n", ULONG_MAX  ,epoch);
+			uexit(EXIT_FAILURE);
+		}
+		start_time = epoch;
+	}
+	else {
+		start_time = time((time_t *) NULL);
+	}
         makepdftime(start_time, start_time_str);
     }
 }
--- a/texk/web2c/pdftexdir/utils.c
+++ b/texk/web2c/pdftexdir/utils.c
@@ -723,9 +723,6 @@ static void convertStringToHexString(con
  */
 void printID(strnumber filename)
 {
-    time_t t;
-    size_t size;
-    char time_str[32];
     md5_state_t state;
     md5_byte_t digest[16];
     char id[64];
@@ -734,9 +731,8 @@ void printID(strnumber filename)
     /* start md5 */
     md5_init(&state);
     /* get the time */
-    t = time(NULL);
-    size = strftime(time_str, sizeof(time_str), "%Y%m%dT%H%M%SZ", gmtime(&t));
-    md5_append(&state, (const md5_byte_t *) time_str, size);
+    initstarttime();
+    md5_append(&state, (const md5_byte_t *) start_time_str, strlen(start_time_str));
     /* get the file name */
     if (getcwd(pwd, sizeof(pwd)) == NULL)
         pdftex_fail("getcwd() failed (%s), path too long?", strerror(errno));


More information about the pdftex mailing list