texlive[52071] Build/source/texk/ptexenc: ptexenc.c: updates from H.

commits+hironobu at tug.org commits+hironobu at tug.org
Tue Sep 10 14:51:52 CEST 2019


Revision: 52071
          http://tug.org/svn/texlive?view=revision&revision=52071
Author:   hironobu
Date:     2019-09-10 14:51:51 +0200 (Tue, 10 Sep 2019)
Log Message:
-----------
ptexenc.c: updates from H. Kitagawa

Modified Paths:
--------------
    trunk/Build/source/texk/ptexenc/ChangeLog
    trunk/Build/source/texk/ptexenc/ptexenc.c

Modified: trunk/Build/source/texk/ptexenc/ChangeLog
===================================================================
--- trunk/Build/source/texk/ptexenc/ChangeLog	2019-09-10 00:57:39 UTC (rev 52070)
+++ trunk/Build/source/texk/ptexenc/ChangeLog	2019-09-10 12:51:51 UTC (rev 52071)
@@ -1,3 +1,13 @@
+2019-09-09  Hironori Kitagawa  <h_kitagawa2001 at yahoo.co.jp>
+
+	* ptexenc.c: Fix a bug that size of buf was insufficient, when
+	converting the file name from UTF-8 to the internal encoding.
+	This bug could cause "Abort trap: 6".
+	https://github.com/texjporg/tex-jp-build/issues/89
+	* ptexenc.c: Avoid to collapse some invalid UTF-8 sequences into
+	a "Japanese" character U+0027 (1-2-15 in JIS X 0213).
+	https://github.com/texjporg/tex-jp-build/pull/90
+
 2019-05-03  Karl Berry  <karl at tug.org>
 
 	* version.ac: 1.3.8/dev since TL'19 is released.

Modified: trunk/Build/source/texk/ptexenc/ptexenc.c
===================================================================
--- trunk/Build/source/texk/ptexenc/ptexenc.c	2019-09-10 00:57:39 UTC (rev 52070)
+++ trunk/Build/source/texk/ptexenc/ptexenc.c	2019-09-10 12:51:51 UTC (rev 52071)
@@ -673,7 +673,7 @@
         break;
     }
 
-    j = toBUFF(fromUCS(u));
+    j = (u != 0) ? toBUFF(fromUCS(u)) : 0;
     if (j == 0) { /* can't represent (typically umlaut o in EUC) */
         write_hex(i);
         if (i2 != EOF) write_hex(i2);
@@ -924,12 +924,12 @@
     return fclose(fp);
 }
 
-
+#define break_if_bad_utf8_second(k) if ((k<0x80)||(k>0xBF)) { i--; k='\0'; break; }
 unsigned char *ptenc_from_utf8_string_to_internal_enc(const unsigned char *is)
 {
     int i;
     long u = 0, j, len;
-    int i1 = EOF, i2 = EOF, i3 = EOF, i4 = EOF;
+    int i1, i2, i3, i4;
     unsigned char *buf, *buf_bak;
     long first_bak, last_bak;
 
@@ -943,7 +943,7 @@
     first = last = 0;
 
     for (i=0; i<strlen(is); i++) {
-        i1 = is[i];
+        i1 = is[i]; i2 = i3 = i4 = '\0';
         switch (UTF8length(i1)) {
         case 1:
             buffer[last++] = i1; /* ASCII */
@@ -950,12 +950,12 @@
             if (i1 == '\0') goto end;
             continue;
         case 2:
-            i2 = is[++i]; if (i2 == '\0') break;
+            i2 = is[++i]; break_if_bad_utf8_second(i2);
             u = UTF8BtoUCS(i1, i2);
             break;
         case 3:
-            i2 = is[++i]; if (i2 == '\0') break;
-            i3 = is[++i]; if (i3 == '\0') break;
+            i2 = is[++i]; break_if_bad_utf8_second(i2);
+            i3 = is[++i]; break_if_bad_utf8_second(i3);
             u = UTF8CtoUCS(i1, i2, i3);
             if (u == U_BOM) continue; /* just ignore */
             if (u == U_VOICED      && combin_voiced_sound(false)) continue;
@@ -962,9 +962,9 @@
             if (u == U_SEMI_VOICED && combin_voiced_sound(true))  continue;
             break;
         case 4:
-            i2 = is[++i]; if (i2 == '\0') break;
-            i3 = is[++i]; if (i3 == '\0') break;
-            i4 = is[++i]; if (i4 == '\0') break;
+            i2 = is[++i]; break_if_bad_utf8_second(i2);
+            i3 = is[++i]; break_if_bad_utf8_second(i3);
+            i4 = is[++i]; break_if_bad_utf8_second(i4);
             u = UTF8DtoUCS(i1, i2, i3, i4);
             break;
         default:
@@ -972,9 +972,9 @@
             break;
         }
 
-        j = toBUFF(fromUCS(u));
+        j = (u != 0) ? toBUFF(fromUCS(u)) : 0;
         if (j == 0) { /* can't represent in EUC/SJIS */
-            if (last+4>=len) buffer = xrealloc(buffer, len=last+64);
+            if (last+16>=len) buffer = buf = xrealloc(buffer, len=last+64);
             write_hex(i1);
             if (i2 != '\0') write_hex(i2);
             if (i3 != '\0') write_hex(i3);
@@ -982,7 +982,6 @@
         } else {
             write_multibyte(j);
         }
-        i2 = i3 = i4 = '\0';
     }
     buffer[last] = '\0';
  end:



More information about the tex-live-commits mailing list