texlive[52071] Build/source/texk/ptexenc: ptexenc.c: updates from H.
commits+hironobu at tug.org
commits+hironobu at tug.org
Tue Sep 10 14:51:52 CEST 2019
Revision: 52071
http://tug.org/svn/texlive?view=revision&revision=52071
Author: hironobu
Date: 2019-09-10 14:51:51 +0200 (Tue, 10 Sep 2019)
Log Message:
-----------
ptexenc.c: updates from H. Kitagawa
Modified Paths:
--------------
trunk/Build/source/texk/ptexenc/ChangeLog
trunk/Build/source/texk/ptexenc/ptexenc.c
Modified: trunk/Build/source/texk/ptexenc/ChangeLog
===================================================================
--- trunk/Build/source/texk/ptexenc/ChangeLog 2019-09-10 00:57:39 UTC (rev 52070)
+++ trunk/Build/source/texk/ptexenc/ChangeLog 2019-09-10 12:51:51 UTC (rev 52071)
@@ -1,3 +1,13 @@
+2019-09-09 Hironori Kitagawa <h_kitagawa2001 at yahoo.co.jp>
+
+ * ptexenc.c: Fix a bug that size of buf was insufficient, when
+ converting the file name from UTF-8 to the internal encoding.
+ This bug could cause "Abort trap: 6".
+ https://github.com/texjporg/tex-jp-build/issues/89
+ * ptexenc.c: Avoid to collapse some invalid UTF-8 sequences into
+ a "Japanese" character U+0027 (1-2-15 in JIS X 0213).
+ https://github.com/texjporg/tex-jp-build/pull/90
+
2019-05-03 Karl Berry <karl at tug.org>
* version.ac: 1.3.8/dev since TL'19 is released.
Modified: trunk/Build/source/texk/ptexenc/ptexenc.c
===================================================================
--- trunk/Build/source/texk/ptexenc/ptexenc.c 2019-09-10 00:57:39 UTC (rev 52070)
+++ trunk/Build/source/texk/ptexenc/ptexenc.c 2019-09-10 12:51:51 UTC (rev 52071)
@@ -673,7 +673,7 @@
break;
}
- j = toBUFF(fromUCS(u));
+ j = (u != 0) ? toBUFF(fromUCS(u)) : 0;
if (j == 0) { /* can't represent (typically umlaut o in EUC) */
write_hex(i);
if (i2 != EOF) write_hex(i2);
@@ -924,12 +924,12 @@
return fclose(fp);
}
-
+#define break_if_bad_utf8_second(k) if ((k<0x80)||(k>0xBF)) { i--; k='\0'; break; }
unsigned char *ptenc_from_utf8_string_to_internal_enc(const unsigned char *is)
{
int i;
long u = 0, j, len;
- int i1 = EOF, i2 = EOF, i3 = EOF, i4 = EOF;
+ int i1, i2, i3, i4;
unsigned char *buf, *buf_bak;
long first_bak, last_bak;
@@ -943,7 +943,7 @@
first = last = 0;
for (i=0; i<strlen(is); i++) {
- i1 = is[i];
+ i1 = is[i]; i2 = i3 = i4 = '\0';
switch (UTF8length(i1)) {
case 1:
buffer[last++] = i1; /* ASCII */
@@ -950,12 +950,12 @@
if (i1 == '\0') goto end;
continue;
case 2:
- i2 = is[++i]; if (i2 == '\0') break;
+ i2 = is[++i]; break_if_bad_utf8_second(i2);
u = UTF8BtoUCS(i1, i2);
break;
case 3:
- i2 = is[++i]; if (i2 == '\0') break;
- i3 = is[++i]; if (i3 == '\0') break;
+ i2 = is[++i]; break_if_bad_utf8_second(i2);
+ i3 = is[++i]; break_if_bad_utf8_second(i3);
u = UTF8CtoUCS(i1, i2, i3);
if (u == U_BOM) continue; /* just ignore */
if (u == U_VOICED && combin_voiced_sound(false)) continue;
@@ -962,9 +962,9 @@
if (u == U_SEMI_VOICED && combin_voiced_sound(true)) continue;
break;
case 4:
- i2 = is[++i]; if (i2 == '\0') break;
- i3 = is[++i]; if (i3 == '\0') break;
- i4 = is[++i]; if (i4 == '\0') break;
+ i2 = is[++i]; break_if_bad_utf8_second(i2);
+ i3 = is[++i]; break_if_bad_utf8_second(i3);
+ i4 = is[++i]; break_if_bad_utf8_second(i4);
u = UTF8DtoUCS(i1, i2, i3, i4);
break;
default:
@@ -972,9 +972,9 @@
break;
}
- j = toBUFF(fromUCS(u));
+ j = (u != 0) ? toBUFF(fromUCS(u)) : 0;
if (j == 0) { /* can't represent in EUC/SJIS */
- if (last+4>=len) buffer = xrealloc(buffer, len=last+64);
+ if (last+16>=len) buffer = buf = xrealloc(buffer, len=last+64);
write_hex(i1);
if (i2 != '\0') write_hex(i2);
if (i3 != '\0') write_hex(i3);
@@ -982,7 +982,6 @@
} else {
write_multibyte(j);
}
- i2 = i3 = i4 = '\0';
}
buffer[last] = '\0';
end:
More information about the tex-live-commits
mailing list