texlive[69764] Build/source/texk/dvipdfm-x: dvipdfmx: Look up the

commits+ymorimi at tug.org commits+ymorimi at tug.org
Sat Feb 10 04:25:28 CET 2024


Revision: 69764
          https://tug.org/svn/texlive?view=revision&revision=69764
Author:   ymorimi
Date:     2024-02-10 04:25:28 +0100 (Sat, 10 Feb 2024)
Log Message:
-----------
dvipdfmx: Look up the TrueType cmap format 14 subtable

Modified Paths:
--------------
    trunk/Build/source/texk/dvipdfm-x/ChangeLog
    trunk/Build/source/texk/dvipdfm-x/cidtype2.c
    trunk/Build/source/texk/dvipdfm-x/sfnt.h
    trunk/Build/source/texk/dvipdfm-x/tt_cmap.c
    trunk/Build/source/texk/dvipdfm-x/tt_cmap.h

Modified: trunk/Build/source/texk/dvipdfm-x/ChangeLog
===================================================================
--- trunk/Build/source/texk/dvipdfm-x/ChangeLog	2024-02-10 02:19:51 UTC (rev 69763)
+++ trunk/Build/source/texk/dvipdfm-x/ChangeLog	2024-02-10 03:25:28 UTC (rev 69764)
@@ -1,3 +1,10 @@
+2024-02-10  Yukimasa Morimi  <h20y6m at yahoo.co.jp>
+
+	* cidtype2.c, sfnt.h, tt_cmap.{c,h}:
+	Look up the TrueType cmap format 14 subtable when mapping CID to
+	Unicode Variation Sequence in new Adobe-Japan1-UCS2 CMap.
+	https://github.com/texjporg/tex-jp-build/issues/155
+
 2024-02-10  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
 
 	* cidtype2.c: Fix an issue on glyph conversion of

Modified: trunk/Build/source/texk/dvipdfm-x/cidtype2.c
===================================================================
--- trunk/Build/source/texk/dvipdfm-x/cidtype2.c	2024-02-10 02:19:51 UTC (rev 69763)
+++ trunk/Build/source/texk/dvipdfm-x/cidtype2.c	2024-02-10 03:25:28 UTC (rev 69764)
@@ -439,7 +439,7 @@
 }
 
 static int32_t
-cid_to_code (CMap *cmap, CID cid, int unicode_cmap)
+cid_to_code (CMap *cmap, CID cid, int unicode_cmap, int32_t *puvs)
 {
   unsigned char        inbuf[2], outbuf[32];
   int                  inbytesleft = 2, outbytesleft = 32;
@@ -446,6 +446,8 @@
   const unsigned char *p;
   unsigned char       *q;
 
+  *puvs = -1;
+
   if (!cmap)
     return cid;
 
@@ -475,13 +477,9 @@
       /* Check following Variation Selectors */
       uvs = UC_UTF16BE_decode_char(&p, endptr);
       if (p == endptr && uvs >= 0xfe00 && uvs <= 0xfe0f) {
-          /* Combine CJK compatibility ideograph */
-          int32_t cci = UC_Combine_CJK_compatibility_ideograph(uc, uvs);
-          if (cci > 0)
-            return cci;
-          /* Ignore Standardized Variation Sequence */
-          WARN("Ignored Variation Selector: CID=%u mapped to U+%04X U+%04X", cid, uc, uvs);
-          return uc;
+        /* Standardized Variation Sequence */
+        *puvs = uvs;
+        return uc;
       }
       WARN("CID=%u mapped to non-single Unicode characters...", cid);
       return -1;
@@ -500,16 +498,12 @@
       uvs = UC_UTF16BE_decode_char(&p, endptr);
       if (p == endptr) {
         if (uvs >= 0xfe00 && uvs <= 0xfe0f) {
-          /* Combine CJK compatibility ideograph */
-          int32_t cci = UC_Combine_CJK_compatibility_ideograph(uc, uvs);
-          if (cci > 0)
-            return cci;
-          /* Ignore Standardized Variation Sequence */
-          WARN("Ignored Variation Selector: CID=%u mapped to U+%04X U+%04X", cid, uc, uvs);
+          /* Standardized Variation Sequence */
+          *puvs = uvs;
           return uc;
         } else if (uvs >= 0xe0100 && uvs <= 0xe01ef) {
-          /* Ignore Ideographic Variation Sequence */
-          WARN("Ignored Variation Selector: CID=%u mapped to U+%04X U+%04X", cid, uc, uvs);
+          /* Ideographic Variation Sequence */
+          *puvs = uvs;
           return uc;
         }
       }
@@ -528,8 +522,8 @@
       uvs = UC_UTF16BE_decode_char(&p, endptr);
       if (p == endptr) {
         if (uvs >= 0xe0100 && uvs <= 0xe01ef) {
-          /* Ignore Ideographic Variation Sequence */
-          WARN("Ignored Variation Selector: CID=%u mapped to U+%04X U+%04X", cid, uc, uvs);
+          /* Ideographic Variation Sequence */
+          *puvs = uvs;
           return uc;
         }
       }
@@ -575,6 +569,7 @@
   struct tt_glyphs *glyphs;
   CMap             *cmap = NULL;
   tt_cmap          *ttcmap = NULL;
+  tt_cmap          *ttcmap_uvs = NULL;
   ULONG             offset = 0;
   CID               cid, last_cid;
   unsigned char    *cidtogidmap;
@@ -728,6 +723,8 @@
         return -1;
       } else if (i <= WIN_UCS_INDEX_MAX) {
         unicode_cmap = 1;
+        /* Unicode Variation Sequences */
+        ttcmap_uvs = tt_cmap_read(sfont, 0, 5);
       } else {
         unicode_cmap = 0;
       }
@@ -806,7 +803,7 @@
   if (h_used_chars) {
     used_chars = h_used_chars;
     for (cid = 1; cid <= last_cid; cid++) {
-      int32_t  code;
+      int32_t  code, uvs = 0;
       uint16_t gid = 0;
 
       if (!is_used_char2(h_used_chars, cid))
@@ -822,11 +819,26 @@
         code = cid;
         break;
       case via_cid_to_code:
-        code = cid_to_code(cmap, cid, unicode_cmap);
+        code = cid_to_code(cmap, cid, unicode_cmap, &uvs);
         if (code < 0) {
           WARN("Unable to map CID to code: CID=%u", cid);
         } else {
-          gid  = tt_cmap_lookup(ttcmap, code);
+          if (ttcmap_uvs && uvs > 0) {
+            gid = tt_cmap_uvs_lookup(ttcmap_uvs, ttcmap, code, uvs);
+          }
+          if (gid == 0 && uvs >= 0xfe00 && uvs <= 0xfe0f) {
+            /* Standardized Variation Sequence */
+            /* Combine CJK compatibility ideograph */
+            int32_t code2 = UC_Combine_CJK_compatibility_ideograph(code, uvs);
+            if (code2 > 0)
+              gid = tt_cmap_lookup(ttcmap, code2);
+          }
+          if (gid == 0) {
+            gid = tt_cmap_lookup(ttcmap, code);
+            if (gid > 0 && uvs > 0) {
+              WARN("Ignored Variation Selector: CID=%u mapped to U+%04X U+%04X", cid, code, uvs);
+            }
+          }
 #ifdef FIX_CJK_UNIOCDE_SYMBOLS
           if (gid == 0 && unicode_cmap && code <= 0xFFFF) {
             int alt_code;
@@ -888,7 +900,7 @@
     }
 
     for (cid = 1; cid <= last_cid; cid++) {
-      int32_t  code;
+      int32_t  code, uvs = 0;
       uint16_t gid = 0;
 
       if (!is_used_char2(v_used_chars, cid))
@@ -912,11 +924,26 @@
         code = cid;
         break;
       case via_cid_to_code:
-        code = cid_to_code(cmap, cid, unicode_cmap);
+        code = cid_to_code(cmap, cid, unicode_cmap, &uvs);
         if (code < 0) {
           WARN("Unable to map CID to code: CID=%u", cid);
         } else {
-          gid  = tt_cmap_lookup(ttcmap, code);
+          if (ttcmap_uvs && uvs > 0) {
+            gid = tt_cmap_uvs_lookup(ttcmap_uvs, ttcmap, code, uvs);
+          }
+          if (gid == 0 && uvs >= 0xfe00 && uvs <= 0xfe0f) {
+            /* Standardized Variation Sequence */
+            /* Combine CJK compatibility ideograph */
+            int32_t code2 = UC_Combine_CJK_compatibility_ideograph(code, uvs);
+            if (code2 > 0)
+              gid = tt_cmap_lookup(ttcmap, code2);
+          }
+          if (gid == 0) {
+            gid = tt_cmap_lookup(ttcmap, code);
+            if (gid > 0 && uvs > 0) {
+              WARN("Ignored Variation Selector: CID=%u mapped to U+%04X U+%04X", cid, code, uvs);
+            }
+          }
 #ifdef FIX_CJK_UNIOCDE_SYMBOLS
           if (gid == 0 && unicode_cmap && code <= 0xFFFF) {
             int alt_code;
@@ -964,6 +991,7 @@
   ASSERT(used_chars);
 
   tt_cmap_release(ttcmap);
+  tt_cmap_release(ttcmap_uvs);
 
   if (font->cid.options.embed) {
     if (tt_build_tables(sfont, glyphs) < 0) {

Modified: trunk/Build/source/texk/dvipdfm-x/sfnt.h
===================================================================
--- trunk/Build/source/texk/dvipdfm-x/sfnt.h	2024-02-10 02:19:51 UTC (rev 69763)
+++ trunk/Build/source/texk/dvipdfm-x/sfnt.h	2024-02-10 03:25:28 UTC (rev 69764)
@@ -92,6 +92,8 @@
 #define sfnt_get_ulong(s)  ((ULONG)  get_unsigned_quad((s)->stream))
 #define sfnt_get_long(s)   ((LONG)   get_signed_quad  ((s)->stream))
 
+#define sfnt_get_uint24(s) ((ULONG)  get_unsigned_triple((s)->stream))
+
 #define sfnt_seek_set(s,o)   seek_absolute((s)->stream, (o))
 #define sfnt_read(b,l,s)     fread((b), 1, (l), (s)->stream)
 

Modified: trunk/Build/source/texk/dvipdfm-x/tt_cmap.c
===================================================================
--- trunk/Build/source/texk/dvipdfm-x/tt_cmap.c	2024-02-10 02:19:51 UTC (rev 69763)
+++ trunk/Build/source/texk/dvipdfm-x/tt_cmap.c	2024-02-10 03:25:28 UTC (rev 69764)
@@ -447,6 +447,114 @@
   return gid;
 }
 
+/* format 14: unicode variation sequences */
+
+struct variationSelector
+{
+  /* Variation selector */
+  ULONG   varSelector;
+
+  /* Default UVS table */
+  ULONG   numUnicodeValueRanges;
+  ULONG  *rangesStartUnicodeValue;
+  BYTE   *rangesAdditionalCount;
+
+  /* Non-Default UVS table */
+  ULONG   numUVSMappings;
+  ULONG  *uvsMappingsUnicodeValue;
+  USHORT *uvsMappingsGlyphID;
+};
+
+struct cmap14
+{
+  ULONG  numVarSelectorRecords;
+  struct variationSelector *varSelector;
+};
+
+/* ULONG length */
+static struct cmap14 *
+read_cmap14 (sfnt *sfont, ULONG offset, ULONG len)
+{
+  struct cmap14 *map;
+  ULONG *defaultUVSOffset, *nonDefaultUVSOffset;
+  ULONG  i, j;
+  
+  if (len < 4) {
+    WARN("invalid format 14 TT cmap subtable");
+    return NULL;
+  }
+
+  map =  NEW(1, struct cmap14);
+  map->numVarSelectorRecords = sfnt_get_ulong(sfont);
+  map->varSelector = NEW(map->numVarSelectorRecords, struct variationSelector);
+  defaultUVSOffset    = NEW(map->numVarSelectorRecords, ULONG);
+  nonDefaultUVSOffset = NEW(map->numVarSelectorRecords, ULONG);
+  /* Read VariationSelector Record */
+  for (i = 0; i < map->numVarSelectorRecords; i++) {
+    map->varSelector[i].varSelector = sfnt_get_uint24(sfont);
+    defaultUVSOffset[i]    = sfnt_get_ulong(sfont);
+    nonDefaultUVSOffset[i] = sfnt_get_ulong(sfont);
+  }
+  for (i = 0; i < map->numVarSelectorRecords; i++) {
+    /* Read DefaultUVS Table */
+    if (defaultUVSOffset[i] > 0) {
+      sfnt_seek_set(sfont, offset + defaultUVSOffset[i]);
+      map->varSelector[i].numUnicodeValueRanges   = sfnt_get_ulong(sfont);
+      map->varSelector[i].rangesStartUnicodeValue = NEW(map->varSelector[i].numUnicodeValueRanges, ULONG);
+      map->varSelector[i].rangesAdditionalCount   = NEW(map->varSelector[i].numUnicodeValueRanges, BYTE);
+      for (j = 0; j < map->varSelector[i].numUnicodeValueRanges; j++) {
+        map->varSelector[i].rangesStartUnicodeValue[j] = sfnt_get_uint24(sfont);
+        map->varSelector[i].rangesAdditionalCount[j]   = sfnt_get_byte(sfont);
+      }
+    } else {
+      map->varSelector[i].numUnicodeValueRanges   = 0;
+      map->varSelector[i].rangesStartUnicodeValue = NULL;
+      map->varSelector[i].rangesAdditionalCount   = NULL;
+    }
+    /* Read NonDefaultUVS Table */
+    if (nonDefaultUVSOffset[i] > 0) {
+      sfnt_seek_set(sfont, offset + nonDefaultUVSOffset[i]);
+      map->varSelector[i].numUVSMappings          = sfnt_get_ulong(sfont);
+      map->varSelector[i].uvsMappingsUnicodeValue = NEW(map->varSelector[i].numUVSMappings, ULONG);
+      map->varSelector[i].uvsMappingsGlyphID      = NEW(map->varSelector[i].numUVSMappings, USHORT);
+      for (j = 0; j < map->varSelector[i].numUVSMappings; j++) {
+        map->varSelector[i].uvsMappingsUnicodeValue[j] = sfnt_get_uint24(sfont);
+        map->varSelector[i].uvsMappingsGlyphID[j]      = sfnt_get_ushort(sfont);
+      }
+    } else {
+      map->varSelector[i].numUVSMappings          = 0;
+      map->varSelector[i].uvsMappingsUnicodeValue = NULL;
+      map->varSelector[i].uvsMappingsGlyphID      = NULL;
+    }
+  }
+
+  RELEASE(defaultUVSOffset);
+  RELEASE(nonDefaultUVSOffset);
+  return map;
+}
+
+static void
+release_cmap14 (struct cmap14 *map)
+{
+  ULONG  i;
+  if (map) {
+    if (map->varSelector) {
+      for (i = 0; i < map->numVarSelectorRecords; i++) {
+        if (map->varSelector[i].rangesStartUnicodeValue)
+          RELEASE(map->varSelector[i].rangesStartUnicodeValue);
+        if (map->varSelector[i].rangesAdditionalCount)
+          RELEASE(map->varSelector[i].rangesAdditionalCount);
+        if (map->varSelector[i].uvsMappingsUnicodeValue)
+          RELEASE(map->varSelector[i].uvsMappingsUnicodeValue);
+        if (map->varSelector[i].uvsMappingsGlyphID)
+          RELEASE(map->varSelector[i].uvsMappingsGlyphID);
+      }
+      RELEASE(map->varSelector);
+    }
+    RELEASE(map);
+  }
+}
+
 /* read cmap */
 tt_cmap *
 tt_cmap_read (sfnt *sfont, USHORT platform, USHORT encoding)
@@ -489,7 +597,7 @@
   if (cmap->format <= 6) {
     length         = sfnt_get_ushort(sfont);
     cmap->language = sfnt_get_ushort(sfont); /* language (Mac) */
-  } else {
+  } else if (cmap->format != 14) {
     if (sfnt_get_ushort(sfont) != 0) { /* reverved - 0 */
       WARN("Unrecognized cmap subtable format.");
       tt_cmap_release(cmap);
@@ -498,6 +606,9 @@
       length         = sfnt_get_ulong(sfont);
       cmap->language = sfnt_get_ulong(sfont);
     }
+  } else {
+    length         = sfnt_get_ulong(sfont);
+    cmap->language = 0;
   }
   
   switch(cmap->format) {
@@ -517,6 +628,9 @@
     /* WARN("UCS-4 TrueType cmap table..."); */
     cmap->map = read_cmap12(sfont, length);
     break;
+  case 14:
+    cmap->map = read_cmap14(sfont, offset, length);
+    break;
   default:
     WARN("Unrecognized OpenType/TrueType cmap format.");
     tt_cmap_release(cmap);
@@ -554,6 +668,9 @@
       case 12:
         release_cmap12(cmap->map);
         break;
+      case 14:
+        release_cmap14(cmap->map);
+        break;
       default:
         WARN("Unrecognized OpenType/TrueType cmap format: %d", cmap->format);
         break;
@@ -602,8 +719,44 @@
   return gid;
 }
 
+static USHORT
+lookup_cmap14 (struct cmap14 *map, tt_cmap* cmap_default, ULONG unicode, ULONG uvs)
+{
+  ULONG  i, j;
 
+  for (i = 0; i < map->numVarSelectorRecords; i++) {
+    if (map->varSelector[i].varSelector == uvs) {
+      for (j = 0; j < map->varSelector[i].numUnicodeValueRanges; j++) {
+        if (map->varSelector[i].rangesStartUnicodeValue[j] <= unicode &&
+            unicode <= map->varSelector[i].rangesStartUnicodeValue[j] + map->varSelector[i].rangesAdditionalCount[j])
+          return tt_cmap_lookup(cmap_default, unicode);
+      }
+      for (j = 0; j < map->varSelector[i].numUVSMappings; j++) {
+        if (map->varSelector[i].uvsMappingsUnicodeValue[j] == unicode)
+          return map->varSelector[i].uvsMappingsGlyphID[j];
+      }
+      return 0;
+    }
+  }
 
+  return 0;
+}
+
+USHORT
+tt_cmap_uvs_lookup(tt_cmap* cmap_uvs, tt_cmap* cmap_default, ULONG unicode, ULONG uvs)
+{
+  ASSERT(cmap_uvs);
+
+  if (cmap_uvs->format != 14) {
+    WARN("Unicode Variation Sequences in OpenType/TrueType cmap must be format 14.");
+    return 0;
+  }
+
+  return lookup_cmap14(cmap_uvs->map, cmap_default, unicode, uvs);
+}
+
+
+
 static unsigned char srange_min[2] = {0x00, 0x00};
 static unsigned char srange_max[2] = {0xff, 0xff};
 static unsigned char lrange_min[4] = {0x00, 0x00, 0x00, 0x00};

Modified: trunk/Build/source/texk/dvipdfm-x/tt_cmap.h
===================================================================
--- trunk/Build/source/texk/dvipdfm-x/tt_cmap.h	2024-02-10 02:19:51 UTC (rev 69763)
+++ trunk/Build/source/texk/dvipdfm-x/tt_cmap.h	2024-02-10 03:25:28 UTC (rev 69764)
@@ -61,6 +61,8 @@
 extern USHORT   tt_cmap_lookup  (tt_cmap *cmap, ULONG cc);
 extern void     tt_cmap_release (tt_cmap *cmap);
 
+extern USHORT tt_cmap_uvs_lookup(tt_cmap* cmap_uvs, tt_cmap* cmap_default, ULONG unicode, ULONG uvs);
+
 #include "pdfobj.h"
 
 /* Indirect reference */



More information about the tex-live-commits mailing list.