texlive[65837] Build/source/texk/web2c/pdftexdir: fix for "Invalid

commits+karl at tug.org commits+karl at tug.org
Wed Feb 15 00:09:00 CET 2023


Revision: 65837
          http://tug.org/svn/texlive?view=revision&revision=65837
Author:   karl
Date:     2023-02-15 00:08:59 +0100 (Wed, 15 Feb 2023)
Log Message:
-----------
fix for "Invalid unicode ranges in CMap beginbfrange operator", pdftex r898

Revision Links:
--------------
    http://tug.org/svn/texlive?view=revision&revision=898

Modified Paths:
--------------
    trunk/Build/source/texk/web2c/pdftexdir/ChangeLog
    trunk/Build/source/texk/web2c/pdftexdir/NEWS
    trunk/Build/source/texk/web2c/pdftexdir/tounicode.c

Modified: trunk/Build/source/texk/web2c/pdftexdir/ChangeLog
===================================================================
--- trunk/Build/source/texk/web2c/pdftexdir/ChangeLog	2023-02-14 21:58:14 UTC (rev 65836)
+++ trunk/Build/source/texk/web2c/pdftexdir/ChangeLog	2023-02-14 23:08:59 UTC (rev 65837)
@@ -1,3 +1,14 @@
+2023-02-14  Thanh Han The  <hanthethanh at gmail.com>
+
+	* tounicode.c (set_glyph_unicode): take new glyph_unicode_entry arg.
+	(is_last_byte_valid): new fn.
+	(write_tounicode): stop writing range when last byte of a
+	beginbfrange is no longer valid, that is, >255.
+	Report from Ben JW:
+	  https://tug.org/pipermail/tex-live/2023-February/048845.html
+	and corresponding veraPDF issue:
+	  https://github.com/veraPDF/veraPDF-library/issues/1253#issuecomment-1420125850
+
 2023-02-14  Hironori Kitagawa  <h_kitagawa2001 at yahoo.co.jp>
 
 	* wcfname.test:

Modified: trunk/Build/source/texk/web2c/pdftexdir/NEWS
===================================================================
--- trunk/Build/source/texk/web2c/pdftexdir/NEWS	2023-02-14 21:58:14 UTC (rev 65836)
+++ trunk/Build/source/texk/web2c/pdftexdir/NEWS	2023-02-14 23:08:59 UTC (rev 65837)
@@ -17,6 +17,8 @@
 
 - bugfixes:
   - finish omission of /Info dict when \pdfomitinfodict is not 0.
+  - generated beginbfrange should no longer be invalid with certain
+    characters (that is, no longer have have a last byte >255).
   
 pdfTeX 3.141592653-2.6-1.40.24 (TeX Live 2022)
 - changes:

Modified: trunk/Build/source/texk/web2c/pdftexdir/tounicode.c
===================================================================
--- trunk/Build/source/texk/web2c/pdftexdir/tounicode.c	2023-02-14 21:58:14 UTC (rev 65836)
+++ trunk/Build/source/texk/web2c/pdftexdir/tounicode.c	2023-02-14 23:08:59 UTC (rev 65837)
@@ -189,7 +189,7 @@
  * taking into account tfmname; in case it returns
  * gp->code == UNI_EXTRA_STRING then the caller is responsible for freeing
  * gp->unicode_seq too */
-static void set_glyph_unicode(const char *s, const char* tfmname, 
+static void set_glyph_unicode(const char *s, const char* tfmname,
                               glyph_unicode_entry *gp)
 {
     char buf[SMALL_BUF_SIZE], buf2[SMALL_BUF_SIZE], *p;
@@ -314,7 +314,20 @@
     }
 }
 
+static boolean is_last_byte_valid(int srcCode1, int srcCode2, long code)
+{
+    /*
+       When defining ranges of this type, the value of the last byte in the
+       string shall be less than or equal to 255 − (srcCode2 − srcCode1). This
+       ensures that the last byte of the string shall not be incremented past
+       255; otherwise, the result of mapping is undefined.
+    */
+    char *s = strend(utf16be_str(code)) - 2;
+    long l = strtol(s, NULL, 16);
+    return l < 255 - (srcCode2 - srcCode1);
+}
 
+
 /* tfmname is without .tfm extension, but encname ends in .enc; */
 integer write_tounicode(char **glyph_names, const char *tfmname,
                         const char* encname)
@@ -346,7 +359,7 @@
             pdftex_warn("Dubious encoding file name: `%s'", encname);
     } else { /* this is a builtin encoding, so name is e.g. "cmr10-builtin" */
         assert(strlen(tfmname) + strlen(builtin_suffix) + 1 < SMALL_BUF_SIZE);
-        strcat(buf, builtin_suffix);    
+        strcat(buf, builtin_suffix);
     }
 
     objnum = pdfnewobjnum();
@@ -389,8 +402,10 @@
             i++;
         } else {                /* gtab[i].code >= 0 */
             j = i;
-            while (i < 256 && gtab[i + 1].code >= 0 &&
-                   gtab[i].code + 1 == gtab[i + 1].code)
+            while (i < 256 && gtab[i + 1].code >= 0
+                    && gtab[i].code + 1 == gtab[i + 1].code
+                    && is_last_byte_valid(j, i, gtab[i].code)
+                  )
                 i++;
             /* at this point i is the last entry of the subrange */
             i++;                /* move i to the next entry */



More information about the tex-live-commits mailing list.