texlive[63602] Build/source/texk/ptexenc/ptexenc.c: ptexenc: refine

commits+takuji at tug.org commits+takuji at tug.org
Wed Jun 15 16:37:45 CEST 2022


Revision: 63602
          http://tug.org/svn/texlive?view=revision&revision=63602
Author:   takuji
Date:     2022-06-15 16:37:45 +0200 (Wed, 15 Jun 2022)
Log Message:
-----------
ptexenc: refine encoding detection

Modified Paths:
--------------
    trunk/Build/source/texk/ptexenc/ptexenc.c

Modified: trunk/Build/source/texk/ptexenc/ptexenc.c
===================================================================
--- trunk/Build/source/texk/ptexenc/ptexenc.c	2022-06-15 05:15:38 UTC (rev 63601)
+++ trunk/Build/source/texk/ptexenc/ptexenc.c	2022-06-15 14:37:45 UTC (rev 63602)
@@ -826,7 +826,8 @@
 #endif /* DEBUG */
     enc = xmalloc(sizeof(char)*18);
 
-    while ((k0 = fgetc(fp)) != EOF && maybe_sjis+maybe_euc+maybe_utf8>1) {
+    while ((k0 = fgetc(fp)) != EOF &&
+           (maybe_sjis+maybe_euc+maybe_utf8>1 || pos_db || pos_utf8)) {
         lbyte++;
         if (k0==ESC) {
             k0 = fgetc(fp);
@@ -858,15 +859,18 @@
                 if (maybe_sjis) {
                     cdb[1] = k0;
                     k1 = JIStoUCS2(SJIStoJIS(HILO(cdb[0],cdb[1])));
+#ifdef DEBUG
+                    fprintf(stderr, "Character for guess encoding: 0x%02X%02X", cdb[0], cdb[1]);
                     if (k1) {
-#ifdef DEBUG
                         i = UCStoUTF8S(k1, str0);
                         str0[i] = '\0';
-                        fprintf(stderr, "Character for guess encoding: 0x%02X%02X", cdb[0], cdb[1]);
                         fprintf(stderr, " sjis (%s)\n", str0);
+                    } else {
+                        fprintf(stderr, " not sjis\n");
+                    }
 #endif /* DEBUG */
+                    if (k1)
                         continue;
-                    }
                 }
                 maybe_sjis = 0;
             }
@@ -897,8 +901,8 @@
             }
             pos_db = 0;
 #ifdef DEBUG
+            fprintf(stderr, "Character for guess encoding: 0x%02X%02X", cdb[0], cdb[1]);
             if (maybe_sjis || maybe_euc) {
-                fprintf(stderr, "Character for guess encoding: 0x%02X%02X", cdb[0], cdb[1]);
                 if (maybe_sjis) {
                     i = UCStoUTF8S(k1, str0);
                     str0[i] = '\0';
@@ -910,6 +914,8 @@
                     fprintf(stderr, " euc (%s)", str0);
                 }
                 fprintf(stderr, "\n");
+            } else {
+                fprintf(stderr, " not sjis nor euc\n");
             }
 #endif /* DEBUG */
         }
@@ -955,8 +961,10 @@
         }
     }
 
-    if (pos_db)   maybe_sjis = maybe_euc = 0;
-    if (pos_utf8) maybe_utf8 = 0;
+    if (k0==EOF) {
+        if (pos_db)   maybe_sjis = maybe_euc = 0;
+        if (pos_utf8) maybe_utf8 = 0;
+    }
     if (is_ascii)
         strcpy(enc,"ASCII");
     else if (maybe_sjis+maybe_euc+maybe_utf8>1) {



More information about the tex-live-commits mailing list.