texlive[55980] Build/source/texk/dvipdfm-x: Add missing

commits+kakuto at tug.org commits+kakuto at tug.org
Wed Jul 29 00:26:35 CEST 2020


Revision: 55980
          http://tug.org/svn/texlive?view=revision&revision=55980
Author:   kakuto
Date:     2020-07-29 00:26:35 +0200 (Wed, 29 Jul 2020)
Log Message:
-----------
Add missing reset_xgstate. Fix CMap wmode setting. (S. Hirata)

Modified Paths:
--------------
    trunk/Build/source/texk/dvipdfm-x/ChangeLog
    trunk/Build/source/texk/dvipdfm-x/spc_pdfm.c
    trunk/Build/source/texk/dvipdfm-x/spc_xtx.c
    trunk/Build/source/texk/dvipdfm-x/tt_cmap.c

Modified: trunk/Build/source/texk/dvipdfm-x/ChangeLog
===================================================================
--- trunk/Build/source/texk/dvipdfm-x/ChangeLog	2020-07-28 22:09:24 UTC (rev 55979)
+++ trunk/Build/source/texk/dvipdfm-x/ChangeLog	2020-07-28 22:26:35 UTC (rev 55980)
@@ -1,3 +1,10 @@
+2020-07-29  Shunsaku Hirata  <shunsaku.hirata74 at gmail.com>
+
+	* spc_pdfm.c: Improve codes in unicode conversion and
+	annotation rect calculation.
+	* spc_xtx.c: Add missing reset_xgstate().
+	* tt_cmap.c: Add wmode to auto-generated CMap names.
+
 2020-07-28  Clerk Ma  <maqiyuan130324 at vip.qq.com>
 
 	* pdfobj.c: Simple modifications for LIBDPX.

Modified: trunk/Build/source/texk/dvipdfm-x/spc_pdfm.c
===================================================================
--- trunk/Build/source/texk/dvipdfm-x/spc_pdfm.c	2020-07-28 22:09:24 UTC (rev 55979)
+++ trunk/Build/source/texk/dvipdfm-x/spc_pdfm.c	2020-07-28 22:26:35 UTC (rev 55980)
@@ -427,94 +427,133 @@
  */
 #include "cmap.h"
 
-static int
-reencodestring (CMap *cmap, pdf_obj *instring)
+static size_t
+calculate_size_utf16 (const unsigned char *p, const unsigned char *endptr)
 {
-#define WBUF_SIZE 4096
-  unsigned char  wbuf[WBUF_SIZE];
-  unsigned char *obufcur;
-  const unsigned char *inbufcur;
-  int inbufleft, obufleft;
+  size_t len = 0;
 
-  if (!cmap || !instring)
-    return 0;
-
-  inbufleft = pdf_string_length(instring);
-  inbufcur  = pdf_string_value (instring);
-
-  wbuf[0]  = 0xfe;
-  wbuf[1]  = 0xff;
-  obufcur  = wbuf + 2;
-  obufleft = WBUF_SIZE - 2;
-
-  CMap_decode(cmap,
-	      &inbufcur, &inbufleft,
-	      &obufcur, &obufleft);
-
-  if (inbufleft > 0) {
-    return  -1;
+  while (p < endptr) {
+    unsigned char c = *p;
+    if (c < 0x80) {
+      len += 2;
+      p   += 1;
+    } else if (c < 0xE0) {
+      len += 2;
+      p   += 2;
+    } else if (c < 0xF0) {
+      len += 2;
+      p   += 3;
+    } else if (c < 0xF8) {
+      len += 4; /* Surrogate */
+      p   += 4;
+    } else if (c < 0xFC) {
+      len += 4; /* Surrogate */
+      p   += 5;
+    } else if (c < 0xFE) {
+      len += 4; /* Surrogate */
+      p   += 6;
+    }
   }
 
-  pdf_set_string(instring, wbuf, WBUF_SIZE - obufleft);
-
-  return 0;
+  return len;
 }
 
 static int
-maybe_reencode_utf8(pdf_obj *instring)
+reencode_string_from_utf8_to_utf16be (pdf_obj *instring)
 {
-  unsigned char       *inbuf;
-  int                  inlen;
-  int                  non_ascii = 0;
-  const unsigned char *cp;
-  unsigned char       *op;
-  unsigned char        wbuf[WBUF_SIZE];
+  int                  error = 0;
+  unsigned char       *strptr;
+  size_t               length;
+  int                  non_ascii;
+  const unsigned char *p, *endptr;
 
-  if (!instring)
-    return 0;
+  ASSERT(instring);
+  ASSERT(PDF_OBJ_STRINGTYPE(instring));
 
-  inlen = pdf_string_length(instring);
-  inbuf = pdf_string_value(instring);
+  strptr = pdf_string_value(instring);
+  length = pdf_string_length(instring);
 
   /* check if the input string is strictly ASCII */
-  for (cp = inbuf; cp < inbuf + inlen; ++cp) {
-    if (*cp > 127) {
-      non_ascii = 1;
-    }
+  p         = strptr;
+  endptr    = strptr + length;
+  non_ascii = 0;
+  for ( ; p < endptr; p++) {
+    if (*p > 127)
+      non_ascii++;
   }
   if (non_ascii == 0)
     return 0; /* no need to reencode ASCII strings */
 
-  /* Check if the input string is valid UTF8 string
-   * This routine may be called against non-text strings.
-   * We need to re-encode string only when string is a text string
-   * endcoded in UTF8.
-   */
-  if (!UC_UTF8_is_valid_string(inbuf, inbuf + inlen))
-    return 0;
-  else if (inbuf[0] == 0xfe && inbuf[1] == 0xff &&
-      UC_UTF16BE_is_valid_string(inbuf + 2, inbuf + inlen))
-    return 0; /* no need to reencode UTF16BE with BOM */
+  if (!UC_UTF8_is_valid_string(strptr, endptr)) {
+    error = -1;
+  } else {
+    unsigned char *q, *buf, *limptr;
+    size_t         len;
 
-  cp = inbuf;
-  op = wbuf;
-  *op++ = 0xfe;
-  *op++ = 0xff;
-  while (cp < inbuf + inlen) {
-    int32_t usv;
-    int     len;
+    p      = strptr;
+    /* Rough estimate of output length. */
+    len    = calculate_size_utf16(p, endptr) + 2;
+    buf    = NEW(len, unsigned char);
+    q      = buf;
+    limptr = buf + len;
+    q[0] = 0xfe; q[1] = 0xff;
+    q += 2;
+    while (p < endptr && q < limptr && !error) {
+      int32_t ucv;
+      size_t  count;
 
-    usv = UC_UTF8_decode_char((const unsigned char **)&cp, inbuf + inlen);
-    if (!UC_is_valid(usv))
-      return -1; /* out of valid Unicode range, give up (redundant) */
-    len = UC_UTF16BE_encode_char(usv, &op, wbuf + WBUF_SIZE);
-    if (len == 0)
-      return -1;
+      ucv = UC_UTF8_decode_char(&p, endptr);
+      if (!UC_is_valid(ucv)) {
+        error = -1;
+      } else {
+        count = UC_UTF16BE_encode_char(ucv, &q, limptr);
+        if (count == 0) {
+          error = -1;
+        }
+      }
+    }
+    if (!error)
+      pdf_set_string(instring, buf, q - buf);
+    RELEASE(buf);
   }
 
-  pdf_set_string(instring, wbuf, op - wbuf);
+  return error;
+}
 
-  return 0;
+static int
+reencode_string (CMap *cmap, pdf_obj *instring)
+{
+  int error = 0;
+
+  if (!instring || !PDF_OBJ_STRINGTYPE(instring))
+    return -1;
+  
+  if (cmap) {
+    unsigned char       *obuf;
+    unsigned char       *obufcur;
+    const unsigned char *inbufcur;
+    int                  inbufleft, obufleft, obufsize;
+
+    inbufleft = pdf_string_length(instring);
+    inbufcur  = pdf_string_value (instring);
+
+    obufsize  = inbufleft * 4 + 2;
+    obuf      = NEW(obufsize, unsigned char);
+    obuf[0]   = 0xfe;
+    obuf[1]   = 0xff;
+    obufcur   = obuf + 2;
+    obufleft  = obufsize - 2;
+
+    CMap_decode(cmap, &inbufcur, &inbufleft, &obufcur, &obufleft);
+
+    if (inbufleft > 0)
+      error = -1;
+    if (!error)
+      pdf_set_string(instring, obuf, obufsize - obufleft);
+    RELEASE(obuf);
+  }
+
+  return error;
 }
 
 /* The purpose of this routine is to check if given string object is
@@ -523,7 +562,7 @@
  * additional dictionary entries which is considered as a text string.
  */
 static int
-needreencode (pdf_obj *kp, pdf_obj *vp, struct tounicode *cd)
+need_reencode (pdf_obj *kp, pdf_obj *vp, struct tounicode *cd)
 {
   int      r = 0, i;
   pdf_obj *tk;
@@ -562,18 +601,14 @@
   case  PDF_STRING:
     if (cd && cd->cmap_id >= 0 && cd->taintkeys) {
       CMap *cmap = CMap_cache_get(cd->cmap_id);
-      if (needreencode(kp, vp, cd))
-        r = reencodestring(cmap, vp);
+      if (need_reencode(kp, vp, cd))
+        r = reencode_string(cmap, vp);
     } else if ((dpx_conf.compat_mode == dpx_mode_xdv_mode) && cd && cd->taintkeys) {
-      /* Please fix this... PDF string object is not always a text string.
-       * needreencode() is assumed to do a simple check if given string
-       * object is actually a text string.
-       */
-      if (needreencode(kp, vp, cd))
-        r = maybe_reencode_utf8(vp);
+      if (need_reencode(kp, vp, cd))
+        r = reencode_string_from_utf8_to_utf16be(vp);
     }
     if (r < 0) /* error occured... */
-      WARN("Failed to convert input string to UTF16...");
+      WARN("Input string conversion (to UTF16BE) failed for %s...", pdf_name_value(kp));
     break;
   case  PDF_DICT:
     r = pdf_foreach_dict(vp, modstrings, dp);
@@ -623,34 +658,10 @@
 static void
 set_rect (pdf_rect *rect, pdf_coord cp1, pdf_coord cp2, pdf_coord cp3, pdf_coord cp4)
 {
-  rect->llx = cp1.x;
-  if (cp2.x < rect->llx)
-    rect->llx = cp2.x;
-  if (cp3.x < rect->llx)
-    rect->llx = cp3.x;
-  if (cp4.x < rect->llx)
-    rect->llx = cp4.x;
-  rect->urx = cp1.x;
-  if (cp2.x > rect->urx)
-    rect->urx = cp2.x;
-  if (cp3.x > rect->urx)
-    rect->urx = cp3.x;
-  if (cp4.x > rect->urx)
-    rect->urx = cp4.x;
-  rect->lly = cp1.y;
-  if (cp2.y < rect->lly)
-    rect->lly = cp2.y;
-  if (cp3.y < rect->lly)
-    rect->lly = cp3.y;
-  if (cp4.y < rect->lly)
-    rect->lly = cp4.y;
-  rect->ury = cp1.y;
-  if (cp2.y > rect->ury)
-    rect->ury = cp2.y;
-  if (cp3.y > rect->ury)
-    rect->ury = cp3.y;
-  if (cp4.y > rect->ury)
-    rect->ury = cp4.y;
+  rect->llx = min4(cp1.x, cp2.x, cp3.x, cp4.x);
+  rect->lly = min4(cp1.y, cp2.y, cp3.y, cp4.y);
+  rect->urx = max4(cp1.x, cp2.x, cp3.x, cp4.x);
+  rect->ury = max4(cp1.y, cp2.y, cp3.y, cp4.y);
 }
 
 static int

Modified: trunk/Build/source/texk/dvipdfm-x/spc_xtx.c
===================================================================
--- trunk/Build/source/texk/dvipdfm-x/spc_xtx.c	2020-07-28 22:09:24 UTC (rev 55979)
+++ trunk/Build/source/texk/dvipdfm-x/spc_xtx.c	2020-07-28 22:26:35 UTC (rev 55980)
@@ -1,7 +1,7 @@
 /*  This is xdvipdfmx, an extended version of dvipdfmx,
     an eXtended version of dvipdfm by Mark A. Wicks.
 
-    Copyright (C) 2013-2019 by the dvipdfmx project team.
+    Copyright (C) 2013-2020 by the dvipdfmx project team.
 
     Copyright (c) 2006 SIL International
     Originally written by Jonathan Kew
@@ -138,9 +138,9 @@
   args->curptr = args->endptr;
 
   return  spc_handler_xtx_do_transform (spe->x_user, spe->y_user,
-      cos(value * M_PI / 180), sin(value * M_PI / 180),
-      -sin(value * M_PI / 180), cos(value * M_PI / 180),
-      0, 0);
+                                        cos(value * M_PI / 180), sin(value * M_PI / 180),
+                                        -sin(value * M_PI / 180), cos(value * M_PI / 180),
+                                        0, 0);
 }
 
 int
@@ -165,6 +165,7 @@
    */
   pdf_dev_reset_fonts(0);
   pdf_dev_reset_color(0);
+  pdf_dev_reset_xgstate(0);
 
   return  0;
 }

Modified: trunk/Build/source/texk/dvipdfm-x/tt_cmap.c
===================================================================
--- trunk/Build/source/texk/dvipdfm-x/tt_cmap.c	2020-07-28 22:09:24 UTC (rev 55979)
+++ trunk/Build/source/texk/dvipdfm-x/tt_cmap.c	2020-07-28 22:26:35 UTC (rev 55980)
@@ -1,6 +1,6 @@
 /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
 
-    Copyright (C) 2002-2019 by Jin-Hwan Cho and Shunsaku Hirata,
+    Copyright (C) 2002-2020 by Jin-Hwan Cho and Shunsaku Hirata,
     the dvipdfmx project team.
     
     This program is free software; you can redistribute it and/or modify
@@ -1615,7 +1615,7 @@
 
   /* Check if already loaded */
   cmap_name = NEW(strlen(map_name)+strlen("-GID")+5, char);
-  sprintf(cmap_name, "%s:%3d-GID", map_name, ttc_index);
+  sprintf(cmap_name, "%s:%d-%d-GID", map_name, ttc_index, wmode);
   cmap_id = CMap_cache_find(cmap_name);
   if (cmap_id >= 0) {
     RELEASE(cmap_name);



More information about the tex-live-commits mailing list.