texlive[47967] Build/source/texk: pTeX: Support UTF-8 file names on
commits+takuji at tug.org
commits+takuji at tug.org
Sat Jun 9 13:10:17 CEST 2018
Revision: 47967
http://tug.org/svn/texlive?view=revision&revision=47967
Author: takuji
Date: 2018-06-09 13:10:16 +0200 (Sat, 09 Jun 2018)
Log Message:
-----------
pTeX: Support UTF-8 file names on UTF-8 locale (Unix-like platform only)
Modified Paths:
--------------
trunk/Build/source/texk/ptexenc/ChangeLog
trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h
trunk/Build/source/texk/ptexenc/ptexenc.c
trunk/Build/source/texk/web2c/lib/ChangeLog
trunk/Build/source/texk/web2c/lib/openclose.c
trunk/Build/source/texk/web2c/lib/texmfmp.c
Modified: trunk/Build/source/texk/ptexenc/ChangeLog
===================================================================
--- trunk/Build/source/texk/ptexenc/ChangeLog 2018-06-09 00:19:12 UTC (rev 47966)
+++ trunk/Build/source/texk/ptexenc/ChangeLog 2018-06-09 11:10:16 UTC (rev 47967)
@@ -1,3 +1,11 @@
+2018-06-09 TANAKA Takuji <ttk at t-lab.opal.ne.jp>
+
+ * ptexenc.c, ptexenc/ptexenc.h: Add new functions
+ ptenc_from_utf8_string_to_internal_enc(),
+ ptenc_from_internal_enc_string_to_utf8() and
+ ptenc_get_command_line_args() to support UTF-8 file names
+ on UTF-8 locale in pTeX (Unix-like platform only).
+
2018-03-26 Akira Kakuto <kakuto at fuk.kindai.ac.jp>
* ptexenc.c: Change default_kanji_enc from ENC_SJIS to ENC_UTF8
Modified: trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h
===================================================================
--- trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h 2018-06-09 00:19:12 UTC (rev 47966)
+++ trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h 2018-06-09 11:10:16 UTC (rev 47967)
@@ -92,6 +92,9 @@
extern PTENCDLL void nkf_disable(void);
extern PTENCDLL FILE *nkf_open(const char *path, const char *mode);
extern PTENCDLL int nkf_close(FILE *fp);
+extern PTENCDLL unsigned char *ptenc_from_utf8_string_to_internal_enc(const unsigned char *is);
+extern PTENCDLL unsigned char *ptenc_from_internal_enc_string_to_utf8(const unsigned char *is);
+extern PTENCDLL int ptenc_get_command_line_args(int *p_ac, char ***p_av);
#endif
#endif /* PTEXENC_PTEXENC_H */
Modified: trunk/Build/source/texk/ptexenc/ptexenc.c
===================================================================
--- trunk/Build/source/texk/ptexenc/ptexenc.c 2018-06-09 00:19:12 UTC (rev 47966)
+++ trunk/Build/source/texk/ptexenc/ptexenc.c 2018-06-09 11:10:16 UTC (rev 47967)
@@ -935,4 +935,138 @@
}
return fclose(fp);
}
+
+
+unsigned char *ptenc_from_utf8_string_to_internal_enc(const unsigned char *is)
+{
+ int i;
+ long u = 0, j, len;
+ int i1 = EOF, i2 = EOF, i3 = EOF, i4 = EOF;
+ unsigned char *buf, *buf_bak;
+ long first_bak, last_bak;
+
+ if (terminal_enc != ENC_UTF8 || is_internalUPTEX()) return NULL;
+ buf_bak = buffer;
+ first_bak = first;
+ last_bak = last;
+
+ len = strlen(is)+1;
+ buffer = buf = xmalloc(len);
+ first = last = 0;
+
+ for (i=0; i<strlen(is); i++) {
+ i1 = is[i];
+ switch (UTF8length(i1)) {
+ case 1:
+ buffer[last++] = i1; /* ASCII */
+ if (i1 == '\0') goto end;
+ continue;
+ case 2:
+ i2 = is[++i]; if (i2 == '\0') break;
+ u = UTF8BtoUCS(i1, i2);
+ break;
+ case 3:
+ i2 = is[++i]; if (i2 == '\0') break;
+ i3 = is[++i]; if (i3 == '\0') break;
+ u = UTF8CtoUCS(i1, i2, i3);
+ if (u == U_BOM) continue; /* just ignore */
+ if (u == U_VOICED && combin_voiced_sound(false)) continue;
+ if (u == U_SEMI_VOICED && combin_voiced_sound(true)) continue;
+ break;
+ case 4:
+ i2 = is[++i]; if (i2 == '\0') break;
+ i3 = is[++i]; if (i3 == '\0') break;
+ i4 = is[++i]; if (i4 == '\0') break;
+ u = UTF8DtoUCS(i1, i2, i3, i4);
+ break;
+ default:
+ u = U_REPLACEMENT_CHARACTER;
+ break;
+ }
+
+ j = toBUFF(fromUCS(u));
+ if (j == 0) { /* can't represent in EUC/SJIS */
+ if (last+4>=len) buffer = xrealloc(buffer, len=last+64);
+ write_hex(i1);
+ if (i2 != '\0') write_hex(i2);
+ if (i3 != '\0') write_hex(i3);
+ if (i4 != '\0') write_hex(i4);
+ } else {
+ write_multibyte(j);
+ }
+ i2 = i3 = i4 = '\0';
+ }
+ buffer[last] = '\0';
+ end:
+ buffer = buf_bak;
+ first = first_bak;
+ last = last_bak;
+ return buf;
+}
+
+unsigned char *ptenc_from_internal_enc_string_to_utf8(const unsigned char *is)
+{
+ int i;
+ long u = 0, len;
+ int i1 = EOF, i2 = EOF;
+ unsigned char *buf, *buf_bak;
+ long first_bak, last_bak;
+
+ if (terminal_enc != ENC_UTF8 || is_internalUPTEX()) return NULL;
+ buf_bak = buffer;
+ first_bak = first;
+ last_bak = last;
+
+ len = strlen(is)+1;
+ buffer = buf = xmalloc(len*1.5);
+ first = last = 0;
+
+ for (i=0; i<strlen(is); i++) {
+ i1 = is[i];
+ switch (multibytelen(i1)) {
+ case 1:
+ buffer[last++] = i1; /* ASCII */
+ if (i1 == '\0') goto end;
+ continue;
+ case 2:
+ i2 = is[++i]; if (i2 == '\0') break;
+ u = JIStoUCS2(toJIS(HILO(i1,i2)));
+ break;
+ default:
+ u = U_REPLACEMENT_CHARACTER;
+ break;
+ }
+
+ write_multibyte(UCStoUTF8(u));
+ }
+ buffer[last] = '\0';
+ end:
+ buffer = buf_bak;
+ first = first_bak;
+ last = last_bak;
+ return buf;
+}
+
+int ptenc_get_command_line_args(int *p_ac, char ***p_av)
+{
+ int i, argc;
+ char **argv;
+
+ get_terminal_enc();
+ if (terminal_enc == ENC_UTF8 && !is_internalUPTEX()) {
+ argc = *p_ac;
+ argv = xmalloc(sizeof(char *)*(argc+1));
+ for (i=0; i<argc; i++) {
+ argv[i] = ptenc_from_utf8_string_to_internal_enc((*p_av)[i]);
+#ifdef DEBUG
+ fprintf(stderr, "Commandline arguments %d:(%s)\n", i, argv[i]);
+#endif /* DEBUG */
+ }
+ argv[argc] = NULL;
+ *p_av = argv;
+ return terminal_enc;
+ }
+ return 0;
+}
+
#endif /* !WIN32 */
Modified: trunk/Build/source/texk/web2c/lib/ChangeLog
===================================================================
--- trunk/Build/source/texk/web2c/lib/ChangeLog 2018-06-09 00:19:12 UTC (rev 47966)
+++ trunk/Build/source/texk/web2c/lib/ChangeLog 2018-06-09 11:10:16 UTC (rev 47967)
@@ -1,3 +1,8 @@
+2018-06-09 TANAKA Takuji <ttk at t-lab.opal.ne.jp>
+
+ * texmfmp.c, openclose.c: Support UTF-8 file names
+ on UTF-8 locale in pTeX (Unix-like platform only).
+
2018-05-29 Akira Kakuto <kakuto at fuk.kindai.ac.jp>
* texmfmp.c: Discard the changes for pdfTeX on 2018-05-11,
Modified: trunk/Build/source/texk/web2c/lib/openclose.c
===================================================================
--- trunk/Build/source/texk/web2c/lib/openclose.c 2018-06-09 00:19:12 UTC (rev 47966)
+++ trunk/Build/source/texk/web2c/lib/openclose.c 2018-06-09 11:10:16 UTC (rev 47967)
@@ -150,6 +150,9 @@
open_input (FILE **f_ptr, int filefmt, const_string fopen_mode)
{
string fname = NULL;
+#if defined(PTEX) && !defined(WIN32)
+ string fname0;
+#endif
#ifdef FUNNY_CORE_DUMP
/* This only applies if a preloaded TeX/Metafont is being made;
it allows automatic creation of the core dump (typing ^\ loses
@@ -170,6 +173,17 @@
absolute. This is because .aux and other such files will get
written to the output directory, and we have to be able to read
them from there. We only look for the name as-is. */
+
+#if defined(PTEX) && !defined(WIN32)
+ fname0 = ptenc_from_internal_enc_string_to_utf8(nameoffile + 1);
+ if (fname0) {
+ free (nameoffile);
+ namelength = strlen (fname0);
+ nameoffile = xmalloc (namelength + 2);
+ strcpy (nameoffile + 1, fname0);
+ free (fname0);
+ }
+#endif
if (output_directory && !kpse_absolute_p (nameoffile+1, false)) {
fname = concat3 (output_directory, DIR_SEP_STRING, nameoffile + 1);
*f_ptr = fopen (fname, fopen_mode);
@@ -183,6 +197,13 @@
}
#endif
if (*f_ptr) {
+#if defined(PTEX) && !defined(WIN32)
+ fname0 = ptenc_from_utf8_string_to_internal_enc(fname);
+ if (fname0) {
+ free (fname);
+ fname = fname0;
+ }
+#endif
free (nameoffile);
namelength = strlen (fname);
nameoffile = xmalloc (namelength + 2);
@@ -205,7 +226,8 @@
which we set `tex_input_type' to 0 in the change file. */
/* According to the pdfTeX people, pounding the disk for .vf files
is overkill as well. A more general solution would be nice. */
- boolean must_exist = (filefmt != kpse_tex_format || texinputtype)
+ boolean must_exist;
+ must_exist = (filefmt != kpse_tex_format || texinputtype)
&& (filefmt != kpse_vf_format);
fname = kpse_find_file (nameoffile + 1,
(kpse_file_format_type)filefmt,
@@ -229,21 +251,28 @@
fname[i] = 0;
}
+ /* This fopen is not allowed to fail. */
+#if defined(PTEX) && !defined(WIN32)
+ if (filefmt == kpse_tex_format ||
+ filefmt == kpse_bib_format) {
+ *f_ptr = nkf_open (fname, fopen_mode);
+ } else
+#endif
+ *f_ptr = xfopen (fname, fopen_mode);
+
/* kpse_find_file always returns a new string. */
+#if defined(PTEX) && !defined(WIN32)
+ fname0 = ptenc_from_utf8_string_to_internal_enc(fname);
+ if (fname0) {
+ free (fname);
+ fname = fname0;
+ }
+#endif
free (nameoffile);
namelength = strlen (fname);
nameoffile = xmalloc (namelength + 2);
strcpy (nameoffile + 1, fname);
free (fname);
-
- /* This fopen is not allowed to fail. */
-#if defined(PTEX) && !defined(WIN32)
- if (filefmt == kpse_tex_format ||
- filefmt == kpse_bib_format) {
- *f_ptr = nkf_open (nameoffile + 1, fopen_mode);
- } else
-#endif
- *f_ptr = xfopen (nameoffile + 1, fopen_mode);
}
}
}
@@ -281,6 +310,9 @@
open_output (FILE **f_ptr, const_string fopen_mode)
{
string fname;
+#if defined(PTEX) && !defined(WIN32)
+ string fname0;
+#endif
boolean absolute = kpse_absolute_p(nameoffile+1, false);
/* If we have an explicit output directory, use it. */
@@ -289,6 +321,13 @@
} else {
fname = nameoffile + 1;
}
+#if defined(PTEX) && !defined(WIN32)
+ fname0 = ptenc_from_internal_enc_string_to_utf8(fname);
+ if (fname0) {
+ free(fname);
+ fname = fname0;
+ }
+#endif
/* Is the filename openable as given? */
*f_ptr = fopen (fname, fopen_mode);
@@ -307,6 +346,13 @@
/* If this succeeded, change nameoffile accordingly. */
if (*f_ptr) {
if (fname != nameoffile + 1) {
+#if defined(PTEX) && !defined(WIN32)
+ fname0 = ptenc_from_utf8_string_to_internal_enc(fname);
+ if (fname0) {
+ free(fname);
+ fname = fname0;
+ }
+#endif
free (nameoffile);
namelength = strlen (fname);
nameoffile = xmalloc (namelength + 2);
Modified: trunk/Build/source/texk/web2c/lib/texmfmp.c
===================================================================
--- trunk/Build/source/texk/web2c/lib/texmfmp.c 2018-06-09 00:19:12 UTC (rev 47966)
+++ trunk/Build/source/texk/web2c/lib/texmfmp.c 2018-06-09 11:10:16 UTC (rev 47967)
@@ -710,11 +710,14 @@
enc = kpse_var_value("command_line_encoding");
get_command_line_args_utf8(enc, &argc, &argv);
#endif
+#if IS_pTeX && !IS_upTeX && !defined(WIN32)
+ ptenc_get_command_line_args(&argc, &argv);
+#endif
/* If the user says --help or --version, we need to notice early. And
since we want the --ini option, have to do it before getting into
the web (which would read the base file, etc.). */
-#if (IS_upTeX || defined(XeTeX)) && defined(WIN32)
+#if ((IS_upTeX || defined(XeTeX)) && defined(WIN32)) || (IS_pTeX && !IS_upTeX && !defined(WIN32))
parse_options (argc, argv);
#else
parse_options (ac, av);
More information about the tex-live-commits
mailing list