texlive[47967] Build/source/texk: pTeX: Support UTF-8 file names on

commits+takuji at tug.org commits+takuji at tug.org
Sat Jun 9 13:10:17 CEST 2018


Revision: 47967
          http://tug.org/svn/texlive?view=revision&revision=47967
Author:   takuji
Date:     2018-06-09 13:10:16 +0200 (Sat, 09 Jun 2018)
Log Message:
-----------
pTeX: Support UTF-8 file names on UTF-8 locale (Unix-like platform only)

Modified Paths:
--------------
    trunk/Build/source/texk/ptexenc/ChangeLog
    trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h
    trunk/Build/source/texk/ptexenc/ptexenc.c
    trunk/Build/source/texk/web2c/lib/ChangeLog
    trunk/Build/source/texk/web2c/lib/openclose.c
    trunk/Build/source/texk/web2c/lib/texmfmp.c

Modified: trunk/Build/source/texk/ptexenc/ChangeLog
===================================================================
--- trunk/Build/source/texk/ptexenc/ChangeLog	2018-06-09 00:19:12 UTC (rev 47966)
+++ trunk/Build/source/texk/ptexenc/ChangeLog	2018-06-09 11:10:16 UTC (rev 47967)
@@ -1,3 +1,11 @@
+2018-06-09  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
+
+	* ptexenc.c, ptexenc/ptexenc.h: Add new functions
+	ptenc_from_utf8_string_to_internal_enc(),
+	ptenc_from_internal_enc_string_to_utf8() and
+	ptenc_get_command_line_args() to support UTF-8 file names
+	on UTF-8 locale in pTeX (Unix-like platform only).
+
 2018-03-26  Akira Kakuto  <kakuto at fuk.kindai.ac.jp>
 
 	* ptexenc.c: Change default_kanji_enc from ENC_SJIS to ENC_UTF8

Modified: trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h
===================================================================
--- trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h	2018-06-09 00:19:12 UTC (rev 47966)
+++ trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h	2018-06-09 11:10:16 UTC (rev 47967)
@@ -92,6 +92,9 @@
 extern PTENCDLL void nkf_disable(void);
 extern PTENCDLL FILE *nkf_open(const char *path, const char *mode);
 extern PTENCDLL int nkf_close(FILE *fp);
+extern PTENCDLL unsigned char *ptenc_from_utf8_string_to_internal_enc(const unsigned char *is);
+extern PTENCDLL unsigned char *ptenc_from_internal_enc_string_to_utf8(const unsigned char *is);
+extern PTENCDLL int ptenc_get_command_line_args(int *p_ac, char ***p_av);
 #endif
 
 #endif /* PTEXENC_PTEXENC_H */

Modified: trunk/Build/source/texk/ptexenc/ptexenc.c
===================================================================
--- trunk/Build/source/texk/ptexenc/ptexenc.c	2018-06-09 00:19:12 UTC (rev 47966)
+++ trunk/Build/source/texk/ptexenc/ptexenc.c	2018-06-09 11:10:16 UTC (rev 47967)
@@ -935,4 +935,138 @@
     }
     return fclose(fp);
 }
+
+
+unsigned char *ptenc_from_utf8_string_to_internal_enc(const unsigned char *is)
+{
+    int i;
+    long u = 0, j, len;
+    int i1 = EOF, i2 = EOF, i3 = EOF, i4 = EOF;
+    unsigned char *buf, *buf_bak;
+    long first_bak, last_bak;
+
+    if (terminal_enc != ENC_UTF8 || is_internalUPTEX()) return NULL;
+    buf_bak = buffer;
+    first_bak = first;
+    last_bak = last;
+
+    len = strlen(is)+1;
+    buffer = buf = xmalloc(len);
+    first = last = 0;
+
+    for (i=0; i<strlen(is); i++) {
+        i1 = is[i];
+        switch (UTF8length(i1)) {
+        case 1:
+            buffer[last++] = i1; /* ASCII */
+            if (i1 == '\0') goto end;
+            continue;
+        case 2:
+            i2 = is[++i]; if (i2 == '\0') break;
+            u = UTF8BtoUCS(i1, i2);
+            break;
+        case 3:
+            i2 = is[++i]; if (i2 == '\0') break;
+            i3 = is[++i]; if (i3 == '\0') break;
+            u = UTF8CtoUCS(i1, i2, i3);
+            if (u == U_BOM) continue; /* just ignore */
+            if (u == U_VOICED      && combin_voiced_sound(false)) continue;
+            if (u == U_SEMI_VOICED && combin_voiced_sound(true))  continue;
+            break;
+        case 4:
+            i2 = is[++i]; if (i2 == '\0') break;
+            i3 = is[++i]; if (i3 == '\0') break;
+            i4 = is[++i]; if (i4 == '\0') break;
+            u = UTF8DtoUCS(i1, i2, i3, i4);
+            break;
+        default:
+            u = U_REPLACEMENT_CHARACTER;
+            break;
+        }
+
+        j = toBUFF(fromUCS(u));
+        if (j == 0) { /* can't represent in EUC/SJIS */
+            if (last+4>=len) buffer = xrealloc(buffer, len=last+64);
+            write_hex(i1);
+            if (i2 != '\0') write_hex(i2);
+            if (i3 != '\0') write_hex(i3);
+            if (i4 != '\0') write_hex(i4);
+        } else {
+            write_multibyte(j);
+        }
+        i2 = i3 = i4 = '\0';
+    }
+    buffer[last] = '\0';
+ end:
+    buffer = buf_bak;
+    first = first_bak;
+    last = last_bak;
+    return buf;
+}
+
+unsigned char *ptenc_from_internal_enc_string_to_utf8(const unsigned char *is)
+{
+    int i;
+    long u = 0, len;
+    int i1 = EOF, i2 = EOF;
+    unsigned char *buf, *buf_bak;
+    long first_bak, last_bak;
+
+    if (terminal_enc != ENC_UTF8 || is_internalUPTEX()) return NULL;
+    buf_bak = buffer;
+    first_bak = first;
+    last_bak = last;
+
+    len = strlen(is)+1;
+    buffer = buf = xmalloc(len*1.5);
+    first = last = 0;
+
+    for (i=0; i<strlen(is); i++) {
+        i1 = is[i];
+        switch (multibytelen(i1)) {
+        case 1:
+            buffer[last++] = i1; /* ASCII */
+            if (i1 == '\0') goto end;
+            continue;
+        case 2:
+            i2 = is[++i]; if (i2 == '\0') break;
+            u = JIStoUCS2(toJIS(HILO(i1,i2)));
+            break;
+        default:
+            u = U_REPLACEMENT_CHARACTER;
+            break;
+        }
+
+        write_multibyte(UCStoUTF8(u));
+    }
+    buffer[last] = '\0';
+ end:
+    buffer = buf_bak;
+    first = first_bak;
+    last = last_bak;
+    return buf;
+}
+
+int ptenc_get_command_line_args(int *p_ac, char ***p_av)
+{
+    int i, argc;
+    char **argv;
+
+    get_terminal_enc();
+    if (terminal_enc == ENC_UTF8 && !is_internalUPTEX()) {
+        argc = *p_ac;
+        argv = xmalloc(sizeof(char *)*(argc+1));
+        for (i=0; i<argc; i++) {
+            argv[i] = ptenc_from_utf8_string_to_internal_enc((*p_av)[i]);
+#ifdef DEBUG
+            fprintf(stderr, "Commandline arguments %d:(%s)\n", i, argv[i]);
+#endif /* DEBUG */
+        }
+        argv[argc] = NULL;
+        *p_av = argv;
+         return terminal_enc;
+    }
+    return 0;
+}
+
 #endif /* !WIN32 */

Modified: trunk/Build/source/texk/web2c/lib/ChangeLog
===================================================================
--- trunk/Build/source/texk/web2c/lib/ChangeLog	2018-06-09 00:19:12 UTC (rev 47966)
+++ trunk/Build/source/texk/web2c/lib/ChangeLog	2018-06-09 11:10:16 UTC (rev 47967)
@@ -1,3 +1,8 @@
+2018-06-09  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
+
+	* texmfmp.c, openclose.c: Support UTF-8 file names
+	on UTF-8 locale in pTeX (Unix-like platform only).
+
 2018-05-29  Akira Kakuto  <kakuto at fuk.kindai.ac.jp>
 
 	* texmfmp.c: Discard the changes for pdfTeX on 2018-05-11,

Modified: trunk/Build/source/texk/web2c/lib/openclose.c
===================================================================
--- trunk/Build/source/texk/web2c/lib/openclose.c	2018-06-09 00:19:12 UTC (rev 47966)
+++ trunk/Build/source/texk/web2c/lib/openclose.c	2018-06-09 11:10:16 UTC (rev 47967)
@@ -150,6 +150,9 @@
 open_input (FILE **f_ptr, int filefmt, const_string fopen_mode)
 {
     string fname = NULL;
+#if defined(PTEX) && !defined(WIN32)
+    string fname0;
+#endif
 #ifdef FUNNY_CORE_DUMP
     /* This only applies if a preloaded TeX/Metafont is being made;
        it allows automatic creation of the core dump (typing ^\ loses
@@ -170,6 +173,17 @@
        absolute.  This is because .aux and other such files will get
        written to the output directory, and we have to be able to read
        them from there.  We only look for the name as-is.  */
+
+#if defined(PTEX) && !defined(WIN32)
+    fname0 = ptenc_from_internal_enc_string_to_utf8(nameoffile + 1);
+    if (fname0) {
+        free (nameoffile);
+        namelength = strlen (fname0);
+        nameoffile = xmalloc (namelength + 2);
+        strcpy (nameoffile + 1, fname0);
+        free (fname0);
+    }
+#endif
     if (output_directory && !kpse_absolute_p (nameoffile+1, false)) {
         fname = concat3 (output_directory, DIR_SEP_STRING, nameoffile + 1);
         *f_ptr = fopen (fname, fopen_mode);
@@ -183,6 +197,13 @@
         }
 #endif
         if (*f_ptr) {
+#if defined(PTEX) && !defined(WIN32)
+            fname0 = ptenc_from_utf8_string_to_internal_enc(fname);
+            if (fname0) {
+                free (fname);
+                fname = fname0;
+            }
+#endif
             free (nameoffile);
             namelength = strlen (fname);
             nameoffile = xmalloc (namelength + 2);
@@ -205,7 +226,8 @@
                which we set `tex_input_type' to 0 in the change file.  */
             /* According to the pdfTeX people, pounding the disk for .vf files
                is overkill as well.  A more general solution would be nice. */
-            boolean must_exist = (filefmt != kpse_tex_format || texinputtype)
+            boolean must_exist;
+            must_exist = (filefmt != kpse_tex_format || texinputtype)
                     && (filefmt != kpse_vf_format);
             fname = kpse_find_file (nameoffile + 1,
                                     (kpse_file_format_type)filefmt,
@@ -229,21 +251,28 @@
                     fname[i] = 0;
                 }
 
+                /* This fopen is not allowed to fail. */
+#if defined(PTEX) && !defined(WIN32)
+                if (filefmt == kpse_tex_format ||
+                    filefmt == kpse_bib_format) {
+                    *f_ptr = nkf_open (fname, fopen_mode);
+                } else
+#endif
+                *f_ptr = xfopen (fname, fopen_mode);
+
                 /* kpse_find_file always returns a new string. */
+#if defined(PTEX) && !defined(WIN32)
+                fname0 = ptenc_from_utf8_string_to_internal_enc(fname);
+                if (fname0) {
+                    free (fname);
+                    fname = fname0;
+                }
+#endif
                 free (nameoffile);
                 namelength = strlen (fname);
                 nameoffile = xmalloc (namelength + 2);
                 strcpy (nameoffile + 1, fname);
                 free (fname);
-
-                /* This fopen is not allowed to fail. */
-#if defined(PTEX) && !defined(WIN32)
-                if (filefmt == kpse_tex_format ||
-                    filefmt == kpse_bib_format) {
-                    *f_ptr = nkf_open (nameoffile + 1, fopen_mode);
-                } else
-#endif
-                *f_ptr = xfopen (nameoffile + 1, fopen_mode);
             }
         }
     }
@@ -281,6 +310,9 @@
 open_output (FILE **f_ptr, const_string fopen_mode)
 {
     string fname;
+#if defined(PTEX) && !defined(WIN32)
+    string fname0;
+#endif
     boolean absolute = kpse_absolute_p(nameoffile+1, false);
 
     /* If we have an explicit output directory, use it. */
@@ -289,6 +321,13 @@
     } else {
         fname = nameoffile + 1;
     }
+#if defined(PTEX) && !defined(WIN32)
+    fname0 = ptenc_from_internal_enc_string_to_utf8(fname);
+    if (fname0) {
+        free(fname);
+        fname = fname0;
+    }
+#endif
 
     /* Is the filename openable as given?  */
     *f_ptr = fopen (fname, fopen_mode);
@@ -307,6 +346,13 @@
     /* If this succeeded, change nameoffile accordingly.  */
     if (*f_ptr) {
         if (fname != nameoffile + 1) {
+#if defined(PTEX) && !defined(WIN32)
+            fname0 = ptenc_from_utf8_string_to_internal_enc(fname);
+            if (fname0) {
+                free(fname);
+                fname = fname0;
+            }
+#endif
             free (nameoffile);
             namelength = strlen (fname);
             nameoffile = xmalloc (namelength + 2);

Modified: trunk/Build/source/texk/web2c/lib/texmfmp.c
===================================================================
--- trunk/Build/source/texk/web2c/lib/texmfmp.c	2018-06-09 00:19:12 UTC (rev 47966)
+++ trunk/Build/source/texk/web2c/lib/texmfmp.c	2018-06-09 11:10:16 UTC (rev 47967)
@@ -710,11 +710,14 @@
   enc = kpse_var_value("command_line_encoding");
   get_command_line_args_utf8(enc, &argc, &argv);
 #endif
+#if IS_pTeX && !IS_upTeX && !defined(WIN32)
+  ptenc_get_command_line_args(&argc, &argv);
+#endif
 
   /* If the user says --help or --version, we need to notice early.  And
      since we want the --ini option, have to do it before getting into
      the web (which would read the base file, etc.).  */
-#if (IS_upTeX || defined(XeTeX)) && defined(WIN32)
+#if ((IS_upTeX || defined(XeTeX)) && defined(WIN32)) || (IS_pTeX && !IS_upTeX && !defined(WIN32))
   parse_options (argc, argv);
 #else
   parse_options (ac, av);



More information about the tex-live-commits mailing list