texlive[61692] Build/source/texk: [e][u]ptex: Distinguish 8-bit

commits+hironobu at tug.org commits+hironobu at tug.org
Sat Jan 22 18:03:22 CET 2022


Revision: 61692
          http://tug.org/svn/texlive?view=revision&revision=61692
Author:   hironobu
Date:     2022-01-22 18:03:22 +0100 (Sat, 22 Jan 2022)
Log Message:
-----------
[e][u]ptex: Distinguish 8-bit characters and Japanese characters (H. Kitagawa et al.)
For better support of LaTeX3 (expl3).
More details in TUGboat 41(2):329--334, 2020.

Modified Paths:
--------------
    trunk/Build/source/texk/mendexk/ChangeLog
    trunk/Build/source/texk/mendexk/fread.c
    trunk/Build/source/texk/ptexenc/ChangeLog
    trunk/Build/source/texk/ptexenc/configure
    trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h
    trunk/Build/source/texk/ptexenc/ptexenc/unicode.h
    trunk/Build/source/texk/ptexenc/ptexenc.c
    trunk/Build/source/texk/ptexenc/unicode.c
    trunk/Build/source/texk/ptexenc/version.ac
    trunk/Build/source/texk/web2c/ChangeLog
    trunk/Build/source/texk/web2c/eptexdir/ChangeLog
    trunk/Build/source/texk/web2c/eptexdir/eptex.defines
    trunk/Build/source/texk/web2c/eptexdir/eptex.ech
    trunk/Build/source/texk/web2c/eptexdir/etex.ch0
    trunk/Build/source/texk/web2c/eptexdir/etex.ch1
    trunk/Build/source/texk/web2c/eptexdir/pdfutils.ch
    trunk/Build/source/texk/web2c/euptexdir/ChangeLog
    trunk/Build/source/texk/web2c/euptexdir/euptex.ch1
    trunk/Build/source/texk/web2c/euptexdir/euptex.defines
    trunk/Build/source/texk/web2c/euptexdir/pdfstrcmp-eup-post.ch
    trunk/Build/source/texk/web2c/euptexdir/pdfstrcmp-eup-pre.ch
    trunk/Build/source/texk/web2c/lib/ChangeLog
    trunk/Build/source/texk/web2c/lib/openclose.c
    trunk/Build/source/texk/web2c/lib/texmfmp.c
    trunk/Build/source/texk/web2c/pmpostdir/ChangeLog
    trunk/Build/source/texk/web2c/pmpostdir/pmp.ch
    trunk/Build/source/texk/web2c/ptexdir/ChangeLog
    trunk/Build/source/texk/web2c/ptexdir/kanji.h
    trunk/Build/source/texk/web2c/ptexdir/ptex-base.ch
    trunk/Build/source/texk/web2c/ptexdir/ptex.defines
    trunk/Build/source/texk/web2c/ptexdir/ptex_version.h
    trunk/Build/source/texk/web2c/synctexdir/ChangeLog
    trunk/Build/source/texk/web2c/synctexdir/synctex.c
    trunk/Build/source/texk/web2c/texmfmem.h
    trunk/Build/source/texk/web2c/uptexdir/ChangeLog
    trunk/Build/source/texk/web2c/uptexdir/kanji.h
    trunk/Build/source/texk/web2c/uptexdir/uptex-m.ch
    trunk/Build/source/texk/web2c/uptexdir/uptex.defines

Added Paths:
-----------
    trunk/Build/source/texk/web2c/eptexdir/tests/printkanji-eptex.tex
    trunk/Build/source/texk/web2c/ptexdir/tests/filename_test.sh
    trunk/Build/source/texk/web2c/ptexdir/tests/printkanji.tex

Modified: trunk/Build/source/texk/mendexk/ChangeLog
===================================================================
--- trunk/Build/source/texk/mendexk/ChangeLog	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/mendexk/ChangeLog	2022-01-22 17:03:22 UTC (rev 61692)
@@ -1,3 +1,7 @@
+2022-01-22  Hironori Kitagawa  <h_kitagawa2001 at yahoo.co.jp>
+
+	* fread.c: Adapt to arguments of input_line2() in ptexenc-1.4.0.
+
 2022-01-18  Karl Berry  <karl at freefriends.org>
 
 	* COPYRIGHT,

Modified: trunk/Build/source/texk/mendexk/fread.c
===================================================================
--- trunk/Build/source/texk/mendexk/fread.c	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/mendexk/fread.c	2022-01-22 17:03:22 UTC (rev 61692)
@@ -567,7 +567,7 @@
 {
 	int c, len;
 
-	if ((len = input_line2(fp, (unsigned char *) buf, 0, size, &c)) == 0
+	if ((len = input_line2(fp, (unsigned char *) buf, NULL, 0, size, &c)) == 0
 		&& c != '\r' && c != '\n') return NULL;
 	if (c == '\n' || c == '\r') {
 		if (len+1 < size) strcat(buf+len, "\n");

Modified: trunk/Build/source/texk/ptexenc/ChangeLog
===================================================================
--- trunk/Build/source/texk/ptexenc/ChangeLog	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/ptexenc/ChangeLog	2022-01-22 17:03:22 UTC (rev 61692)
@@ -1,3 +1,10 @@
+2022-01-22  Hironori Kitagawa  <h_kitagawa2001 at yahoo.co.jp>
+
+	* ptexenc.c, ptexenc/ptexenc.h, unicode.c, ptexenc/unicode.h:
+	Distinguish 8-bit characters and Japanese characters by using
+	flag 0x100 for Japanese char.
+	* version.ac: Bump to 1.4.0.
+
 2022-01-18  Karl Berry  <karl at tug.org>
 
 	* version.ac: remove /dev, for TL22.

Modified: trunk/Build/source/texk/ptexenc/configure
===================================================================
--- trunk/Build/source/texk/ptexenc/configure	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/ptexenc/configure	2022-01-22 17:03:22 UTC (rev 61692)
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for ptexenc 1.3.11.
+# Generated by GNU Autoconf 2.71 for ptexenc 1.4.0.
 #
 # Report bugs to <tex-k at tug.org>.
 #
@@ -629,8 +629,8 @@
 # Identity of this package.
 PACKAGE_NAME='ptexenc'
 PACKAGE_TARNAME='ptexenc'
-PACKAGE_VERSION='1.3.11'
-PACKAGE_STRING='ptexenc 1.3.11'
+PACKAGE_VERSION='1.4.0'
+PACKAGE_STRING='ptexenc 1.4.0'
 PACKAGE_BUGREPORT='tex-k at tug.org'
 PACKAGE_URL=''
 
@@ -1375,7 +1375,7 @@
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures ptexenc 1.3.11 to adapt to many kinds of systems.
+\`configure' configures ptexenc 1.4.0 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1446,7 +1446,7 @@
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of ptexenc 1.3.11:";;
+     short | recursive ) echo "Configuration of ptexenc 1.4.0:";;
    esac
   cat <<\_ACEOF
 
@@ -1566,7 +1566,7 @@
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-ptexenc configure 1.3.11
+ptexenc configure 1.4.0
 generated by GNU Autoconf 2.71
 
 Copyright (C) 2021 Free Software Foundation, Inc.
@@ -2090,7 +2090,7 @@
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by ptexenc $as_me 1.3.11, which was
+It was created by ptexenc $as_me 1.4.0, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   $ $0$ac_configure_args_raw
@@ -2850,10 +2850,10 @@
 
 
 
-PTEXENCVERSION=1.3.11
+PTEXENCVERSION=1.4.0
 
 
-PTEXENC_LT_VERSINFO=4:11:3
+PTEXENC_LT_VERSINFO=5:0:4
 
 
 am__api_version='1.16'
@@ -8418,7 +8418,7 @@
 
 # Define the identity of the package.
  PACKAGE='ptexenc'
- VERSION='1.3.11'
+ VERSION='1.4.0'
 
 
 printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -14528,7 +14528,7 @@
 Report bugs to <bug-libtool at gnu.org>."
 
 lt_cl_version="\
-ptexenc config.lt 1.3.11
+ptexenc config.lt 1.4.0
 configured by $0, generated by GNU Autoconf 2.71.
 
 Copyright (C) 2011 Free Software Foundation, Inc.
@@ -16255,7 +16255,7 @@
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by ptexenc $as_me 1.3.11, which was
+This file was extended by ptexenc $as_me 1.4.0, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -16323,7 +16323,7 @@
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-ptexenc config.status 1.3.11
+ptexenc config.status 1.4.0
 configured by $0, generated by GNU Autoconf 2.71,
   with options \\"\$ac_cs_config\\"
 

Modified: trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h
===================================================================
--- trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h	2022-01-22 17:03:22 UTC (rev 61692)
@@ -32,6 +32,7 @@
 /* enable/disable UPTEX */
 extern PTENCDLL void enable_UPTEX (boolean enable);
 extern PTENCDLL void set_prior_file_enc(void);
+extern PTENCDLL void ptenc_ptex_mode(const boolean enable);
 
 /* get/set Kanji encoding by string */
 extern PTENCDLL const_string get_enc_string(void);
@@ -58,10 +59,15 @@
 
 /* internal (EUC/SJIS/UPTEX) from/to buffer (EUC/SJIS/UTF-8) code conversion */
 extern PTENCDLL int multistrlen(unsigned char *s, int len, int pos);
+extern PTENCDLL int multistrlenshort(unsigned short *s, int len, int pos);
 extern PTENCDLL int multibytelen (int first_byte);
 extern PTENCDLL long fromBUFF(unsigned char *s, int len, int pos);
+extern PTENCDLL long fromBUFFshort(unsigned short *s, int len, int pos);
 extern PTENCDLL long toBUFF(long inter);
 
+/* for outputting filename (*s) to the terminal */
+extern PTENCDLL int multistrlenfilename(unsigned short *s, int len, int pos);
+
 /* internal (EUC/SJIS/UPTEX) from/to DVI (JIS/UCS) code conversion */
 extern PTENCDLL long toDVI (long kcode);
 extern PTENCDLL long fromDVI (long kcode);
@@ -81,8 +87,8 @@
 extern PTENCDLL int fputs2(const char *s, FILE *fp);
 
 /* input line with encoding conversion */
-extern PTENCDLL long input_line2(FILE *fp, unsigned char *buff, long pos,
-				const long buffsize, int *lastchar);
+extern PTENCDLL long input_line2(FILE *fp, unsigned char *buff, unsigned char *buff2,
+                                long pos, const long buffsize, int *lastchar);
 
 /* set current encoding */
 extern PTENCDLL boolean setinfileenc(FILE *fp, const char *str);

Modified: trunk/Build/source/texk/ptexenc/ptexenc/unicode.h
===================================================================
--- trunk/Build/source/texk/ptexenc/ptexenc/unicode.h	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/ptexenc/ptexenc/unicode.h	2022-01-22 17:03:22 UTC (rev 61692)
@@ -14,6 +14,7 @@
 
 extern int UTF8length(int first_byte);
 extern int UTF8Slength(unsigned char *buff, int buff_len);
+extern int UTF8Slengthshort(unsigned short *buff, int buff_len);
 extern long UTF8StoUCS(unsigned char *s);
 extern long PTENCDLL UCStoUTF8(long ucs);
 

Modified: trunk/Build/source/texk/ptexenc/ptexenc.c
===================================================================
--- trunk/Build/source/texk/ptexenc/ptexenc.c	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/ptexenc/ptexenc.c	2022-01-22 17:03:22 UTC (rev 61692)
@@ -26,6 +26,7 @@
 
 static int default_kanji_enc;
 static boolean UPTEX_enabled;
+static boolean ptex_mode = false;
 static boolean prior_file_enc = false;
 
 #define ESC '\033'
@@ -145,6 +146,13 @@
     return terminal_enc;
 }
 
+/* enable ptex mode (use flag 0x100 for Japanese char) */
+void ptenc_ptex_mode (const boolean enable)
+{
+   //fprintf(stderr, "ptenc_ptex_mode is called! (%d)\n", enable);
+   ptex_mode = enable;
+}
+
 /* enable/disable UPTEX */
 void enable_UPTEX (boolean enable)
 {
@@ -253,16 +261,37 @@
 }
 
 /* multi-byte char length in s[pos] */
-int multistrlen(unsigned char *s, int len, int pos)
+#define DEFINE_MULTISTRLEN(SUFF,TYPE) \
+int multistrlen ## SUFF(TYPE *s, int len, int pos) \
+{ \
+    s += pos; len -= pos; \
+    if (is_internalUPTEX()) { \
+        int ret = UTF8Slength ## SUFF(s, len); \
+        if (ret < 0) return 1; \
+        return ret; \
+    } \
+    if (len < 2) return 1; \
+    if (is_internalSJIS()) { \
+        if (isSJISkanji1(s[0]) && isSJISkanji2(s[1])) return 2; \
+    } else { /* EUC */ \
+        if (isEUCkanji1(s[0])  && isEUCkanji2(s[1]))  return 2; \
+    } \
+    return 1; \
+}
+DEFINE_MULTISTRLEN(,unsigned char);
+DEFINE_MULTISTRLEN(short,unsigned short);
+
+/* for outputting filename (*s) to the terminal */
+int multistrlenfilename(unsigned short *s, int len, int pos)
 {
     s += pos; len -= pos;
-    if (is_internalUPTEX()) {
-        int ret = UTF8Slength(s, len);
+    if (terminal_enc == ENC_UTF8) {
+        int ret = UTF8Slengthshort(s, len);
         if (ret < 0) return 1;
         return ret;
     }
     if (len < 2) return 1;
-    if (is_internalSJIS()) {
+    if (terminal_enc == ENC_SJIS) {
         if (isSJISkanji1(s[0]) && isSJISkanji2(s[1])) return 2;
     } else { /* EUC */
         if (isEUCkanji1(s[0])  && isEUCkanji2(s[1]))  return 2;
@@ -300,6 +329,14 @@
     return s[0];
 }
 
+long fromBUFFshort(unsigned short *s, int len, int pos)
+{
+    unsigned char sc[6];
+    s += pos; len -= pos;
+    for (int i=0;i<(len<6 ? len : 6);i++) sc[i]=0xFF&s[i];
+    return fromBUFF(sc, (len<6 ? len : 6), 0);
+}
+
 /* internal (EUC/SJIS/UPTEX) to buffer (EUC/SJIS/UTF-8) code conversion */
 long toBUFF(long kcode)
 {
@@ -473,6 +510,10 @@
 
 /* putc() with code conversion */
 int putc2(int c, FILE *fp)
+/*
+  c in [0,255]:  writes the character c, without code conversion
+  c in [256,511]: writes the character c-256, with code conversion
+*/
 {
     static int num[NOFILE];
         /* 0    : not in Kanji
@@ -481,7 +522,6 @@
     static unsigned char store[NOFILE][4];
     const int fd = fileno(fp);
     int ret = c, output_enc;
-
 #ifdef WIN32
     if ((fp == stdout || fp == stderr) && (_isatty(fd) || !prior_file_enc)) {
         output_enc = ENC_UTF8;
@@ -493,34 +533,43 @@
     } else
         output_enc = get_file_enc();
 #endif
-    if (num[fd] > 0) {        /* multi-byte char */
-        if (is_internalUPTEX() && iskanji1(c)) { /* error */
-            ret = flush(store[fd], num[fd], fp);
-            num[fd] = 0;
-        }
-        store[fd][num[fd]] = c;
-        num[fd]++;
-        if (multistrlen(store[fd], num[fd], 0) == num[fd]) {
-            long i = fromBUFF(store[fd], num[fd], 0);
-            ret = put_multibyte(toENC(i, output_enc), fp);
-            num[fd] = -1;
-        } else if ((is_internalUPTEX() && num[fd] == 4) ||
-                   (!is_internalUPTEX() && num[fd] == 2)) { /* error */
-            ret = flush(store[fd], num[fd], fp);
-            num[fd] = -1;
-        }
-    } else if (iskanji1(c)) { /* first multi-byte char */
-        if (num[fd] == 0 && output_enc == ENC_JIS) {
-            ret = put_multibyte(KANJI_IN, fp);
-        }
-        store[fd][0] = c;
-        num[fd] = 1;
-    } else {                  /* ASCII */
+    if (ptex_mode && (c<256)) {
         if (num[fd] < 0 && output_enc == ENC_JIS) {
             put_multibyte(KANJI_OUT, fp);
         }
         ret = putc(c, fp);
         num[fd] = 0;
+    } else {
+        c &= 0xFF;
+        if (num[fd] > 0) {        /* multi-byte char */
+            if (is_internalUPTEX() && iskanji1(c)) { /* error */
+                ret = flush(store[fd], num[fd], fp);
+                num[fd] = 0;
+            }
+            store[fd][num[fd]] = c;
+            num[fd]++;
+            if (multistrlen(store[fd], num[fd], 0) == num[fd]) {
+                long i = fromBUFF(store[fd], num[fd], 0);
+                ret = put_multibyte(toENC(i, output_enc), fp);
+                num[fd] = -1;
+            } else if ((is_internalUPTEX() && num[fd] == 4) ||
+                (!is_internalUPTEX() && num[fd] == 2)) { /* error */
+                ret = flush(store[fd], num[fd], fp);
+                num[fd] = -1;
+            }
+        } else if (iskanji1(c)) { /* first multi-byte char */
+            if (num[fd] == 0 && output_enc == ENC_JIS) {
+                ret = put_multibyte(KANJI_IN, fp);
+            }
+            store[fd][0] = c;
+            num[fd] = 1;
+        } else {                  /* ASCII */
+            if (num[fd] < 0 && output_enc == ENC_JIS) {
+                put_multibyte(KANJI_OUT, fp);
+            }
+            ret = putc(c, fp);
+            num[fd] = 0;
+        }
     }
     return ret;
 }
@@ -745,8 +794,8 @@
                                   other: determined */
 
 /* input line with encoding conversion */
-long input_line2(FILE *fp, unsigned char *buff, long pos,
-                 const long buffsize, int *lastchar)
+long input_line2(FILE *fp, unsigned char *buff, unsigned char *buff2,
+                 long pos, const long buffsize, int *lastchar)
 {
     long i = 0;
     static boolean injis = false;
@@ -826,6 +875,9 @@
     if (i == EOF || i == '\n' || i == '\r') injis = false;
     if (lastchar != NULL) *lastchar = i;
 
+    if (buff2!= NULL) for (i=pos; i<=last; i++) buff2[i] = 0;
+    /* buff2 is initialized */
+
     return last;
 }
 
@@ -925,6 +977,8 @@
 }
 
 #define break_if_bad_utf8_second(k) if ((k<0x80)||(k>0xBF)) { i--; k='\0'; break; }
+#define write_hex_if_not_ascii(c) \
+   if ((c>=0x20)&&(c<=0x7E)) buffer[last++]=c; else write_hex(c);
 unsigned char *ptenc_from_utf8_string_to_internal_enc(const unsigned char *is)
 {
     int i;
@@ -975,10 +1029,10 @@
         j = (u != 0) ? toBUFF(fromUCS(u)) : 0;
         if (j == 0) { /* can't represent in EUC/SJIS */
             if (last+16>=len) buffer = buf = xrealloc(buffer, len=last+64);
-            write_hex(i1);
-            if (i2 != '\0') write_hex(i2);
-            if (i3 != '\0') write_hex(i3);
-            if (i4 != '\0') write_hex(i4);
+            write_hex_if_not_ascii(i1);
+            if (i2 != '\0') write_hex_if_not_ascii(i2);
+            if (i3 != '\0') write_hex_if_not_ascii(i3);
+            if (i4 != '\0') write_hex_if_not_ascii(i4);
         } else {
             write_multibyte(j);
         }
@@ -1005,7 +1059,7 @@
     last_bak = last;
 
     len = strlen(is)+1;
-    buffer = buf = xmalloc(len*1.5);
+    buffer = buf = xmalloc(len*4);
     first = last = 0;
 
     for (i=0; i<strlen(is); i++) {
@@ -1015,15 +1069,21 @@
             buffer[last++] = i1; /* ASCII */
             if (i1 == '\0') goto end;
             continue;
-        case 2:
-            i2 = is[++i]; if (i2 == '\0') break;
-            u = JIStoUCS2(toJIS(HILO(i1,i2)));
+        case 2: /* i1: not ASCII */
+            i2 = is[++i];
+            if (i2 == '\0') {
+              write_hex(i1); continue;
+            } else {
+              u = JIStoUCS2(toJIS(HILO(i1,i2)));
+              if (u==0) {
+                write_hex(i1); write_hex_if_not_ascii(i2); continue;
+              }
+            }
             break;
-        default:
+        default: /* reachable only if internal code is uptex */
             u = U_REPLACEMENT_CHARACTER;
             break;
         }
-
         write_multibyte(UCStoUTF8(u));
     }
     buffer[last] = '\0';

Modified: trunk/Build/source/texk/ptexenc/unicode.c
===================================================================
--- trunk/Build/source/texk/ptexenc/unicode.c	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/ptexenc/unicode.c	2022-01-22 17:03:22 UTC (rev 61692)
@@ -43,20 +43,23 @@
 
 
 /* with strict range check */
-int UTF8Slength(unsigned char *buff, int buff_len)
-{
-    int i, len;
-
-    len = UTF8length(buff[0]);
-    if (len < 0) return -2; /* illegal */
-    if (len > buff_len) return -3; /* overflow */
-    for (i=0; i<len; i++) {
-        if (!isUTF8(len, 1+i, buff[i])) return -1; /* not UTF-8 */
-    }
-    return len;
+#define DEFINE_UTF8SLENGTH(SUFF,TYPE) \
+int UTF8Slength ## SUFF(TYPE *buff, int buff_len) \
+{ \
+    int i, len; \
+    len = UTF8length(buff[0]); \
+    if (len < 0) return -2; /* illegal */ \
+    if (len > buff_len) return -3; /* overflow */ \
+    for (i=0; i<len; i++) { \
+        if (!isUTF8(len, 1+i, buff[i])) return -1; /* not UTF-8 */ \
+    } \
+    return len; \
 }
 
+DEFINE_UTF8SLENGTH(, unsigned char)
+DEFINE_UTF8SLENGTH(short, unsigned short)
 
+
 /* WITHOUT strict range check */
 long UTF8StoUCS(unsigned char *s)
 {

Modified: trunk/Build/source/texk/ptexenc/version.ac
===================================================================
--- trunk/Build/source/texk/ptexenc/version.ac	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/ptexenc/version.ac	2022-01-22 17:03:22 UTC (rev 61692)
@@ -10,4 +10,4 @@
 dnl see kpathsea/version.ac.
 dnl
 dnl This file is m4-included from configure.ac.
-m4_define([ptexenc_version], [1.3.11])
+m4_define([ptexenc_version], [1.4.0])

Modified: trunk/Build/source/texk/web2c/ChangeLog
===================================================================
--- trunk/Build/source/texk/web2c/ChangeLog	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/ChangeLog	2022-01-22 17:03:22 UTC (rev 61692)
@@ -1,3 +1,8 @@
+2022-01-22  Hironori Kitagawa  <h_kitagawa2001 at yahoo.co.jp>
+
+	* texmfmem.h: Change type of str_pool to unsigned short for pTeX.
+	  We use only 0--511, and use flag 0x100 for Japanese char.
+
 2022-01-21  Karl Berry  <karl at freefriends.org>
 
 	* ac/web2c.ac (kpse_tex_progs): enable hitex by default.

Modified: trunk/Build/source/texk/web2c/eptexdir/ChangeLog
===================================================================
--- trunk/Build/source/texk/web2c/eptexdir/ChangeLog	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/eptexdir/ChangeLog	2022-01-22 17:03:22 UTC (rev 61692)
@@ -1,3 +1,12 @@
+2022-01-22  Hironori Kitagawa  <h_kitagawa2001 at yahoo.co.jp>
+
+	* eptex.ech, etex.ch0, etex.ch1, pdfutils.ch:
+	  Change type of str_pool to unsigned short.
+	  We use only 0--511, and use flag 0x100 for Japanese char.
+	  More details in TUGboat 41(2):329--334, 2020.
+	* eptex.defines: Add multistrlen{short,filename}, fromBUFFshort.
+	* tests/printkanji-eptex.tex: Added.
+
 2022-01-10  Karl Berry  <karl at freefriends.org>
 
 	* am/eptex.am: do not silence tangle-sh.

Modified: trunk/Build/source/texk/web2c/eptexdir/eptex.defines
===================================================================
--- trunk/Build/source/texk/web2c/eptexdir/eptex.defines	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/eptexdir/eptex.defines	2022-01-22 17:03:22 UTC (rev 61692)
@@ -11,7 +11,10 @@
 
 @define function iskanji1 ();
 @define function multistrlen ();
+ at define function multistrlenshort ();
+ at define function multistrlenfilename ();
 @define function fromBUFF ();
+ at define function fromBUFFshort ();
 @define function toBUFF ();
 
 @define function fromDVI ();

Modified: trunk/Build/source/texk/web2c/eptexdir/eptex.ech
===================================================================
--- trunk/Build/source/texk/web2c/eptexdir/eptex.ech	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/eptexdir/eptex.ech	2022-01-22 17:03:22 UTC (rev 61692)
@@ -472,12 +472,7 @@
     if k = false then
       begin begin_diagnostic;
       print_nl("Unknown encoding `");
-      case selector of
-      term_and_log: begin wterm(stringcast(name_of_file + 1));
-        wlog(stringcast(name_of_file + 1)); end;
-      log_only:  wlog(stringcast(name_of_file + 1));
-      term_only: wterm(stringcast(name_of_file + 1));
-      endcases;
+      slow_print(cur_area); slow_print(cur_name); slow_print(cur_ext);
       print("'"); end_diagnostic(false);
       end
     end
@@ -649,6 +644,16 @@
     add_glue_ref(space_ptr(r)); add_glue_ref(xspace_ptr(r));
 @z
 
+ at x e-pTeX: pseudo file
+    buffer[last]:=w.b0; buffer[last+1]:=w.b1;
+    buffer[last+2]:=w.b2; buffer[last+3]:=w.b3;
+ at y
+    buffer[last]:=w.b0 mod @"100; buffer[last+1]:=w.b1 mod @"100;
+    buffer[last+2]:=w.b2 mod @"100; buffer[last+3]:=w.b3 mod @"100;@/
+    buffer2[last]:=0; buffer2[last+1]:=0;
+    buffer2[last+2]:=0; buffer2[last+3]:=0;
+ at z
+
 @x e-pTeX: \readline
 @ @<Handle \.{\\readline} and |goto done|@>=
 if j=1 then
@@ -672,8 +677,9 @@
   buffer[m]:=info(p) mod @'400; incr(m); p:=link(p);
 @y
   if check_kanji(info(p)) then {|wchar_token|}
-    begin buffer[m]:=Hi(info(p)); incr(m);
-    end;
+    begin buffer[m]:=Hi(info(p)); buffer2[m]:=1; incr(m); buffer2[m]:=1;
+    end
+  else buffer2[m]:=0;
   buffer[m]:=Lo(info(p)); incr(m); p:=link(p);
 @z
 

Modified: trunk/Build/source/texk/web2c/eptexdir/etex.ch0
===================================================================
--- trunk/Build/source/texk/web2c/eptexdir/etex.ch0	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/eptexdir/etex.ch0	2022-01-22 17:03:22 UTC (rev 61692)
@@ -15,6 +15,12 @@
 @d int_pars=web2c_int_pars {total number of integer parameters}
 @z
 
+ at x [18.???] pTeX: ensure buffer2[]=0 in primitive
+  for j:=0 to l-1 do buffer[first+j]:=so(str_pool[k+j]);
+ at y
+  for j:=0 to l-1 do buffer[j]:=so(str_pool[k+j]);
+ at z
+
 @x [26.413]
   {fetch an internal parameter}
 label exit;

Modified: trunk/Build/source/texk/web2c/eptexdir/etex.ch1
===================================================================
--- trunk/Build/source/texk/web2c/eptexdir/etex.ch1	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/eptexdir/etex.ch1	2022-01-22 17:03:22 UTC (rev 61692)
@@ -21,6 +21,14 @@
 @d etex_int_base=web2c_int_pars {base for \eTeX's integer parameters}
 @z
 
+ at x [18]
+  for j:=0 to l-1 do begin
+    buffer[j]:=Lo(so(str_pool[k+j])); buffer2[j]:=Hi(so(str_pool[k+j])); end;
+ at y
+  for j:=0 to l-1 do begin
+    buffer[first+j]:=Lo(so(str_pool[k+j])); buffer2[first+j]:=Hi(so(str_pool[k+j])); end;
+ at z
+
 @x
 @d eTeX_int=badness_code+1 {first of \eTeX\ codes for integers}
 @y

Modified: trunk/Build/source/texk/web2c/eptexdir/pdfutils.ch
===================================================================
--- trunk/Build/source/texk/web2c/eptexdir/pdfutils.ch	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/eptexdir/pdfutils.ch	2022-01-22 17:03:22 UTC (rev 61692)
@@ -1783,22 +1783,22 @@
 @x
 procedure print_kanji(@!s:KANJI_code); {prints a single character}
 begin
-if s>255 then
-  begin print_char(Hi(s)); print_char(Lo(s));
+if s>@"FF then
+  begin print_char(@"100+Hi(s)); print_char(@"100+Lo(s));
   end else print_char(s);
 end;
 @y
 procedure print_kanji(@!s:integer); {prints a single character}
 begin
-if s>255 then begin
+if s>@"FF then begin
   if isprint_utf8 then begin
     s:=UCStoUTF8(toUCS(s));
-    if BYTE1(s)<>0 then print_char(BYTE1(s));
-    if BYTE2(s)<>0 then print_char(BYTE2(s));
-    if BYTE3(s)<>0 then print_char(BYTE3(s));
-                        print_char(BYTE4(s));
+    if BYTE1(s)<>0 then print_char(@"100+BYTE1(s));
+    if BYTE2(s)<>0 then print_char(@"100+BYTE2(s));
+    if BYTE3(s)<>0 then print_char(@"100+BYTE3(s));
+                        print_char(@"100+BYTE4(s));
   end
-  else begin print_char(Hi(s)); print_char(Lo(s)); end;
+  else begin print_char(@"100+Hi(s)); print_char(@"100+Lo(s)); end;
 end
 else print_char(s);
 end;
@@ -1853,6 +1853,7 @@
 label done;
 var s1, s2: str_number;
     i1, i2, j1, j2: pool_pointer;
+    c1, c2: integer;
     save_cur_cs: pointer;
 begin
     save_cur_cs:=cur_cs; call_func(scan_toks(false, true));
@@ -1866,14 +1867,10 @@
     i2 := str_start[s2];
     j2 := str_start[s2 + 1];
     while (i1 < j1) and (i2 < j2) do begin
-        if str_pool[i1] < str_pool[i2] then begin
-            cur_val := -1;
-            goto done;
-        end;
-        if str_pool[i1] > str_pool[i2] then begin
-            cur_val := 1;
-            goto done;
-        end;
+        if str_pool[i1]>=@"100 then c1:=str_pool[i1]-@"100 else c1:=str_pool[i1];
+        if str_pool[i2]>=@"100 then c2:=str_pool[i2]-@"100 else c2:=str_pool[i2];
+        if c1<c2 then begin cur_val := -1; goto done; end
+        else if c1>c2 then begin cur_val := 1; goto done; end;
         incr(i1);
         incr(i2);
     end;

Added: trunk/Build/source/texk/web2c/eptexdir/tests/printkanji-eptex.tex
===================================================================
--- trunk/Build/source/texk/web2c/eptexdir/tests/printkanji-eptex.tex	                        (rev 0)
+++ trunk/Build/source/texk/web2c/eptexdir/tests/printkanji-eptex.tex	2022-01-22 17:03:22 UTC (rev 61692)
@@ -0,0 +1,68 @@
+%あaª
+\tracingstats=1000
+\noautoxspacing\scrollmode
+\tracingscantokens=1
+\everyeof{\noexpand}
+\font\x=ec-lmtt10 \x
+\immediate\openout1=\jobname.out
+\def\MSG#1{%
+  \message{\string{MSG #1\string}}%
+  \immediate\write17{\string{TOT #1\string}}%
+  \immediate\write1{#1}%
+}
+\def\head#1{\message{■#1.}\par\noindent\hbox{■\null}#1.\par}
+\message{^^J}
+\count255="80
+\loop\ifnum\count255<"100
+  \catcode\count255=11\relax
+  \advance\count255 1\relax
+\repeat
+
+%========
+\head{detokenize}
+\def\A{^^c5^^bf ſ 顛 }
+
+\detokenize\expandafter{\meaning\A}
+\MSG{\detokenize\expandafter{\meaning\A}}
+
+%========
+\head{scantokens}
+\def\c{捉^^c2^^aaª}
+
+\scantokens{\noexpand\^^a4^^a2あ^^e3^^81^^82.^^a4^^a2あ^^e3^^81^^82.\c}
+\def\a{\scantokens{\noexpand\^^a4^^a2あ^^e3^^81^^82.^^a4^^a2あ^^e3^^81^^82.\c}}
+
+\edef\b{\scantokens{\noexpand\^^a4^^a2あ^^e3^^81^^82.^^a4^^a2あ^^e3^^81^^82.\c}}
+\MSG{\a:\b:\meaning\b}
+
+\def\あ{a}
+\MSG{
+  \ifcsname あ\endcsname Y\csname あ\endcsname\else n\fi,
+  \ifcsname ^^e3^^81^^82\endcsname Y\csname ^^e3^^81^^82\endcsname\else n\fi,
+  \ifcsname ^^a4^^a2\endcsname Y\csname ^^a4^^a2\endcsname\else n\fi}
+
+%========
+\head{pdfstrcmp}
+
+\MSG{あ \pdfstrcmp{あ}{^^e3^^81^^82} ^^e3^^81^^82}
+\MSG{あ \pdfstrcmp{あ}{^^a4^^a2} ^^a4^^a2}
+\MSG{捉 \pdfstrcmp{捉}{^^c2^^aa} ^^c2^^aa}
+\MSG{捉 \pdfstrcmp{捉}{ª} ^^c2^^aa}
+\MSG{ª \pdfstrcmp{ª}{^^c2^^aa} ^^c2^^aa}
+
+%========
+\head{other pdf* util.}
+
+\MSG{\detokenize\expandafter{\pdffiledump length 20{\jobname.tex}}}
+
+\MSG{\pdfmdfivesum{}}
+\MSG{\pdfmdfivesum{あ}}
+\MSG{\pdfmdfivesum{^^e3^^81^^82}}
+\MSG{\pdfcreationdate}
+\MSG{\pdffilemoddate{\jobname.tex}}
+\MSG{\pdffilesize{\jobname.tex}}
+\MSG{\the\pdfelapsedtime}
+\MSG{\expanded{^^c2^^aaª捉.^^e3^^81^^82あ^^a4^^a2}}
+
+\immediate\closeout1
+\bye


Property changes on: trunk/Build/source/texk/web2c/eptexdir/tests/printkanji-eptex.tex
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: trunk/Build/source/texk/web2c/euptexdir/ChangeLog
===================================================================
--- trunk/Build/source/texk/web2c/euptexdir/ChangeLog	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/euptexdir/ChangeLog	2022-01-22 17:03:22 UTC (rev 61692)
@@ -1,3 +1,11 @@
+2022-01-22  Hironori Kitagawa  <h_kitagawa2001 at yahoo.co.jp>
+
+	* euptex.ch1, pdfstrcmp-eup-post.ch, pdfstrcmp-eup-pre.ch:
+	  Change type of str_pool to unsigned short.
+	  We use only 0--511, and use flag 0x100 for Japanese char.
+	  More details in TUGboat 41(2):329--334, 2020.
+	* euptex.defines: Add multistrlen{short,filename}, fromBUFFshort.
+
 2022-01-10  Karl Berry  <karl at freefriends.org>
 
 	* am/euptex.am: do not silence tangle-sh.

Modified: trunk/Build/source/texk/web2c/euptexdir/euptex.ch1
===================================================================
--- trunk/Build/source/texk/web2c/euptexdir/euptex.ch1	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/euptexdir/euptex.ch1	2022-01-22 17:03:22 UTC (rev 61692)
@@ -67,19 +67,23 @@
 @z
 
 @x e-pTeX: ifcsname l.28620
-    begin buffer[m]:=Hi(info(p)); incr(m);
-    end;
+    begin buffer[m]:=Hi(info(p)); buffer2[m]:=1; incr(m); buffer2[m]:=1;
+    end
+  else buffer2[m]:=0;
   buffer[m]:=Lo(info(p)); incr(m); p:=link(p);
 @y
     begin
-    if BYTE1(toBUFF(info(p) mod max_cjk_val))<>0 then begin buffer[m]:=BYTE1(toBUFF(info(p) mod max_cjk_val)); incr(m); end;
-    if BYTE2(toBUFF(info(p) mod max_cjk_val))<>0 then begin buffer[m]:=BYTE2(toBUFF(info(p) mod max_cjk_val)); incr(m); end;
-    if BYTE3(toBUFF(info(p) mod max_cjk_val))<>0 then begin buffer[m]:=BYTE3(toBUFF(info(p) mod max_cjk_val)); incr(m); end;
-                              buffer[m]:=BYTE4(toBUFF(info(p) mod max_cjk_val)); incr(m);
+    if BYTE1(toBUFF(info(p) mod max_cjk_val))<>0 then
+      begin buffer[m]:=BYTE1(toBUFF(info(p) mod max_cjk_val)); buffer2[m]:=1; incr(m); end;
+    if BYTE2(toBUFF(info(p) mod max_cjk_val))<>0 then
+      begin buffer[m]:=BYTE2(toBUFF(info(p) mod max_cjk_val)); buffer2[m]:=1; incr(m); end;
+    if BYTE3(toBUFF(info(p) mod max_cjk_val))<>0 then
+      begin buffer[m]:=BYTE3(toBUFF(info(p) mod max_cjk_val)); buffer2[m]:=1; incr(m); end;
+    buffer[m]:=BYTE4(toBUFF(info(p) mod max_cjk_val)); buffer2[m]:=1; incr(m);
     p:=link(p);
     end
   else
-    begin buffer[m]:=info(p) mod max_char_val; incr(m); p:=link(p);
+    begin buffer[m]:=info(p) mod max_char_val; buffer2[m]:=0; incr(m); p:=link(p);
     end;
 @z
 

Modified: trunk/Build/source/texk/web2c/euptexdir/euptex.defines
===================================================================
--- trunk/Build/source/texk/web2c/euptexdir/euptex.defines	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/euptexdir/euptex.defines	2022-01-22 17:03:22 UTC (rev 61692)
@@ -16,7 +16,10 @@
 @define function ismultichr ();
 
 @define function multistrlen ();
+ at define function multistrlenshort ();
+ at define function multistrlenfilename ();
 @define function fromBUFF ();
+ at define function fromBUFFshort ();
 @define function toBUFF ();
 
 @define function fromDVI ();

Modified: trunk/Build/source/texk/web2c/euptexdir/pdfstrcmp-eup-post.ch
===================================================================
--- trunk/Build/source/texk/web2c/euptexdir/pdfstrcmp-eup-post.ch	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/euptexdir/pdfstrcmp-eup-post.ch	2022-01-22 17:03:22 UTC (rev 61692)
@@ -17,11 +17,9 @@
 @z
 
 @x
-       check_kcat_code(cc) then
-    begin if (cc=not_cjk) then cc:=other_kchar;
+    if (cc=not_cjk) then cc:=other_kchar;
 @y
-       ((cat>=kanji)or check_kcat_code(cc)) then
-    begin if cat>=kanji then cc:=cat else if (cc=not_cjk) then cc:=other_kchar;
+    if cat>=kanji then cc:=cat else if (cc=not_cjk) then cc:=other_kchar;
 @z
 
 @x
@@ -97,15 +95,15 @@
 @x
 procedure print_kanji(@!s:integer); {prints a single character}
 begin
-if s>255 then begin
+if s>@"FF then begin
   if isprint_utf8 then begin
     s:=UCStoUTF8(toUCS(s));
-    if BYTE1(s)<>0 then print_char(BYTE1(s));
-    if BYTE2(s)<>0 then print_char(BYTE2(s));
-    if BYTE3(s)<>0 then print_char(BYTE3(s));
-                        print_char(BYTE4(s));
+    if BYTE1(s)<>0 then print_char(@"100+BYTE1(s));
+    if BYTE2(s)<>0 then print_char(@"100+BYTE2(s));
+    if BYTE3(s)<>0 then print_char(@"100+BYTE3(s));
+                        print_char(@"100+BYTE4(s));
   end
-  else begin print_char(Hi(s)); print_char(Lo(s)); end;
+  else begin print_char(@"100+Hi(s)); print_char(@"100+Lo(s)); end;
 end
 else print_char(s);
 end;
@@ -114,9 +112,9 @@
 begin
 if isprint_utf8 then s:=UCStoUTF8(toUCS(s mod max_cjk_val))
 else s:=toBUFF(s mod max_cjk_val);
-if BYTE1(s)<>0 then print_char(BYTE1(s));
-if BYTE2(s)<>0 then print_char(BYTE2(s));
-if BYTE3(s)<>0 then print_char(BYTE3(s));
-                    print_char(BYTE4(s));
+if BYTE1(s)<>0 then print_char(@"100+BYTE1(s));
+if BYTE2(s)<>0 then print_char(@"100+BYTE2(s));
+if BYTE3(s)<>0 then print_char(@"100+BYTE3(s));
+                    print_char(@"100+BYTE4(s));
 end;
 @z

Modified: trunk/Build/source/texk/web2c/euptexdir/pdfstrcmp-eup-pre.ch
===================================================================
--- trunk/Build/source/texk/web2c/euptexdir/pdfstrcmp-eup-pre.ch	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/euptexdir/pdfstrcmp-eup-pre.ch	2022-01-22 17:03:22 UTC (rev 61692)
@@ -10,16 +10,16 @@
 procedure print_kanji(@!s:KANJI_code); {prints a single character}
 begin
 s:=toBUFF(s mod max_cjk_val);
-if BYTE1(s)<>0 then print_char(BYTE1(s));
-if BYTE2(s)<>0 then print_char(BYTE2(s));
-if BYTE3(s)<>0 then print_char(BYTE3(s));
-                    print_char(BYTE4(s));
+if BYTE1(s)<>0 then print_char(@"100+BYTE1(s));
+if BYTE2(s)<>0 then print_char(@"100+BYTE2(s));
+if BYTE3(s)<>0 then print_char(@"100+BYTE3(s));
+                    print_char(@"100+BYTE4(s));
 end;
 @y
 procedure print_kanji(@!s:KANJI_code); {prints a single character}
 begin
-if s>255 then
-  begin print_char(Hi(s)); print_char(Lo(s));
+if s>@"FF then
+  begin print_char(@"100+Hi(s)); print_char(@"100+Lo(s));
   end else print_char(s);
 end;
 @z

Modified: trunk/Build/source/texk/web2c/lib/ChangeLog
===================================================================
--- trunk/Build/source/texk/web2c/lib/ChangeLog	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/lib/ChangeLog	2022-01-22 17:03:22 UTC (rev 61692)
@@ -1,3 +1,9 @@
+2022-01-22  Hironori Kitagawa  <h_kitagawa2001 at yahoo.co.jp>
+
+	* openclose.c (open_input): Reencode nameoffile to utf8 for pTeX.
+	* texmfmp.c: Change type of str_pool to unsigned short for pTeX.
+	  We use only 0--511, and use flag 0x100 for Japanese char.
+
 2022-01-01  Akira Kakuto  <kakuto at jcom.zaq.ne.jp>
 
 	* printversion.c: Update copyright year.

Modified: trunk/Build/source/texk/web2c/lib/openclose.c
===================================================================
--- trunk/Build/source/texk/web2c/lib/openclose.c	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/lib/openclose.c	2022-01-22 17:03:22 UTC (rev 61692)
@@ -281,6 +281,13 @@
                                     must_exist);
             if (fname) {
                 fullnameoffile = xstrdup(fname);
+#if defined(PTEX) && !defined(WIN32)
+                fname0 = ptenc_from_utf8_string_to_internal_enc(fullnameoffile);
+                if (fname0) {
+                    free (fullnameoffile);
+                    fullnameoffile = fname0;
+                }
+#endif
                 /* If we found the file in the current directory, don't leave
                    the `./' at the beginning of `nameoffile', since it looks
                    dumb when `tex foo' says `(./foo.tex ... )'.  On the other

Modified: trunk/Build/source/texk/web2c/lib/texmfmp.c
===================================================================
--- trunk/Build/source/texk/web2c/lib/texmfmp.c	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/lib/texmfmp.c	2022-01-22 17:03:22 UTC (rev 61692)
@@ -596,7 +596,14 @@
   int allow = 0;
   char *safecmd = NULL;
   char *cmdname = NULL;
+#if IS_pTeX && !defined(WIN32)
+  char *cmd2;
+#endif
   int status = 0;
+#if IS_pTeX && !defined(WIN32)
+  cmd2 = (char *)ptenc_from_internal_enc_string_to_utf8((unsigned char *)cmd);
+  if (!cmd2) cmd2=cmd;
+#endif
 
   if (shellenabledp <= 0) {
     return 0;
@@ -606,10 +613,18 @@
   if (restrictedshell == 0)
     allow = 1;
   else
+#if IS_pTeX && !defined(WIN32)
+    allow = shell_cmd_is_allowed (cmd2, &safecmd, &cmdname);
+#else
     allow = shell_cmd_is_allowed (cmd, &safecmd, &cmdname);
+#endif
 
   if (allow == 1)
+#if IS_pTeX && !defined(WIN32)
+    status = system (cmd2);
+#else
     status = system (cmd);
+#endif
   else if (allow == 2) {
 /*
   command including a character '|' is not allowed in
@@ -627,6 +642,9 @@
   if (status != 0)
     fprintf(stderr,"system returned with code %d\n", status); 
 
+#if IS_pTeX && !defined(WIN32)
+  if (cmd!=cmd2) free(cmd2);
+#endif
   if (safecmd)
     free (safecmd);
   if (cmdname)
@@ -779,7 +797,7 @@
 
 #if IS_pTeX
   kpse_set_program_name (argv[0], NULL);
-  initkanji ();
+  initkanji (); ptenc_ptex_mode(true);
 #endif
 #if (defined(XeTeX) || defined(pdfTeX)) && defined(WIN32)
   kpse_set_program_name (argv[0], NULL);
@@ -1457,7 +1475,11 @@
     {
     unsigned i;
     for (i=0; i<len; i++)
+#if IS_pTeX
+      name[i] =  0xFF&strpool[i+strstart[outputfilename]];
+#else
       name[i] =  strpool[i+strstartar[outputfilename - 65536L]];
+#endif
     }
 #endif
     name[len] = 0;
@@ -2502,7 +2524,8 @@
 
   /* Recognize either LF or CR as a line terminator.  */
 #if IS_pTeX
-  last = input_line2(f, (unsigned char *)buffer, first, bufsize, &i);
+  last = input_line2(f, (unsigned char *)buffer, (unsigned char *)buffer2,
+                     first, bufsize, &i);
 #else
 #ifdef WIN32
   if (f != Poptr && fileno (f) != fileno (stdin)) {
@@ -3035,7 +3058,7 @@
   }
 #else /* ! XeTeX */
   while (len-- > 0)
-    strpool[poolptr++] = *s++;
+    strpool[poolptr++] = 0xFF&(*s++);
 #endif /* ! XeTeX */
 
   return makestring();
@@ -3053,9 +3076,19 @@
 strnumber
 getjobname(strnumber name)
 {
-    strnumber ret = name;
+    strnumber ret = name; int i, l, p;
     if (c_job_name != NULL)
       ret = maketexstring(c_job_name);
+#if IS_pTeX
+    i = strstart[ret]; l = strstart[ret+1];
+    while (i<l)
+     {
+        p = multistrlenshort(strpool, l, i);
+        if (p>1)
+             for (int j=i+p; i<j; i++) strpool[i] = (0xFF&strpool[i])+0x100;
+        else i++;
+     }
+#endif /* IS_pTeX */
     return ret;
 }
 #endif
@@ -3140,13 +3173,17 @@
   len = strstartar[s + 1 - 65536L] - strstartar[s - 65536L];
 #endif
   name = (string)xmalloc (len + 1);
-#if !defined(Aleph)
+#if !defined(Aleph) && !IS_pTeX
   strncpy (name, (string)&strpool[strstart[s]], len);
 #else
   {
   poolpointer i;
   /* Don't use strncpy.  The strpool is not made up of chars. */
+#if IS_pTeX
+  for (i=0; i<len; i++) name[i] =  0xFF&strpool[i+strstart[s]];
+#else
   for (i=0; i<len; i++) name[i] =  strpool[i+strstartar[s - 65536L]];
+#endif
   }
 #endif
   name[len] = 0;
@@ -3389,7 +3426,11 @@
     }
     p = cstrbuf;
     for (i = 0; i < l; i++)
+#if IS_pTeX
+        *p++ = 0xFF&strpool[i + strstart[s]];
+#else
         *p++ = strpool[i + strstart[s]];
+#endif
     *p = 0;
     return cstrbuf;
 }
@@ -3422,7 +3463,7 @@
 getcreationdate(void)
 {
     size_t len;
-#if defined(XeTeX)
+#if defined(XeTeX) || IS_pTeX
     int i;
 #endif
     initstarttime();
@@ -3438,7 +3479,7 @@
         return;
     }
 
-#if defined(XeTeX)
+#if defined(XeTeX) || IS_pTeX
     for (i = 0; i < len; i++)
         strpool[poolptr++] = (uint16_t)start_time_str[i];
 #else
@@ -3472,7 +3513,7 @@
             poolptr = poolsize;
             /* error by str_toks that calls str_room(1) */
         } else {
-#if defined(XeTeX)
+#if defined(XeTeX) || IS_pTeX
             int i;
 
             for (i = 0; i < len; i++)
@@ -3518,7 +3559,7 @@
             poolptr = poolsize;
             /* error by str_toks that calls str_room(1) */
         } else {
-#if defined(XeTeX)
+#if defined(XeTeX) || IS_pTeX
             for (i = 0; i < len; i++)
                 strpool[poolptr++] = (uint16_t)buf[i];
 #else
@@ -3537,7 +3578,7 @@
 {
     FILE *f;
     int read, i;
-#if defined(XeTeX)
+#if defined(XeTeX) || IS_pTeX
     unsigned char *readbuffer;
     char strbuf[3];
     int j, k;
@@ -3544,7 +3585,7 @@
 #else
     poolpointer data_ptr;
     poolpointer data_end;
-#endif /* XeTeX */
+#endif /* XeTeX || IS_pTeX */
     char *file_name;
 
     if (length == 0) {
@@ -3575,7 +3616,7 @@
         xfree(file_name);
         return;
     }
-#if defined(XeTeX)
+#if defined(XeTeX) || IS_pTeX
     readbuffer = (unsigned char *)xmalloc (length + 1);
     read = fread(readbuffer, sizeof(char), length, f);
     fclose(f);
@@ -3603,7 +3644,7 @@
         check_nprintf(i, 3);
         poolptr += i;
     }
-#endif /* XeTeX */
+#endif /* XeTeX || IS_pTeX */
     xfree(file_name);
 }
 
@@ -3637,7 +3678,7 @@
     md5_byte_t digest[DIGEST_SIZE];
     char outbuf[2 * DIGEST_SIZE + 1];
     int len = 2 * DIGEST_SIZE;
-#if defined(XeTeX)
+#if defined(XeTeX) || IS_pTeX
     char *xname;
     int i;
 #endif
@@ -3673,7 +3714,7 @@
     } else {
         /* s contains the data */
         md5_init(&state);
-#if defined(XeTeX)
+#if defined(XeTeX) || IS_pTeX
         xname = gettexstring (s);
         md5_append(&state,
                    (md5_byte_t *) xname,
@@ -3692,7 +3733,7 @@
         return;
     }
     convertStringToHexString((char *) digest, outbuf, DIGEST_SIZE);
-#if defined(XeTeX)
+#if defined(XeTeX) || IS_pTeX
     for (i = 0; i < 2 * DIGEST_SIZE; i++)
         strpool[poolptr++] = (uint16_t)outbuf[i];
 #else

Modified: trunk/Build/source/texk/web2c/pmpostdir/ChangeLog
===================================================================
--- trunk/Build/source/texk/web2c/pmpostdir/ChangeLog	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/pmpostdir/ChangeLog	2022-01-22 17:03:22 UTC (rev 61692)
@@ -1,3 +1,7 @@
+2022-01-22  Hironori Kitagawa  <h_kitagawa2001 at yahoo.co.jp>
+
+	* pmp.ch: Adapt to arguments of input_line2() in ptexenc-1.4.0.
+
 2022-01-18  Karl Berry  <karl at freefriends.org>
 
 	* pmp.ch: Sync with the version 2.02 mp.w (three times).

Modified: trunk/Build/source/texk/web2c/pmpostdir/pmp.ch
===================================================================
--- trunk/Build/source/texk/web2c/pmpostdir/pmp.ch	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/pmpostdir/pmp.ch	2022-01-22 17:03:22 UTC (rev 61692)
@@ -100,7 +100,7 @@
 @y
 static boolean mp_input_ln (MP mp, void *f ) {
   int i = EOF;
-  mp->last = input_line2((FILE *)f, mp->buffer, mp->first, mp->buf_size, &i);
+  mp->last = input_line2((FILE *)f, mp->buffer, NULL, mp->first, mp->buf_size, &i);
   if (i == EOF && errno != EINTR && mp->last == mp->first)
     return false;
   if (i != EOF && i != '\n' && i != '\r') {

Modified: trunk/Build/source/texk/web2c/ptexdir/ChangeLog
===================================================================
--- trunk/Build/source/texk/web2c/ptexdir/ChangeLog	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/ptexdir/ChangeLog	2022-01-22 17:03:22 UTC (rev 61692)
@@ -1,3 +1,14 @@
+2022-01-22  Hironori Kitagawa  <h_kitagawa2001 at yahoo.co.jp>
+
+	* ptex-base.ch: Change type of str_pool to unsigned short.
+	  We use only 0--511, and use flag 0x100 for Japanese char.
+	  More details in TUGboat 41(2):329--334, 2020.
+	* kanji.h: Adapt to arguments of input_line2() in ptexenc-1.4.0
+	  (for pBibTeX).
+	* ptex.defines: Add multistrlen{short,filename}, fromBUFFshort.
+	* ptex_version.h: Version p4.0.0.
+	* tests/filename_test.sh, tests/printkanji.tex: Added.
+
 2022-01-10  Karl Berry  <karl at freefriends.org>
 
 	* am/ptex.am: do not silence tangle-sh.

Modified: trunk/Build/source/texk/web2c/ptexdir/kanji.h
===================================================================
--- trunk/Build/source/texk/web2c/ptexdir/kanji.h	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/ptexdir/kanji.h	2022-01-22 17:03:22 UTC (rev 61692)
@@ -65,9 +65,9 @@
 #endif /* !PRESERVE_FPUTS */
 
 #ifdef PBIBTEX
-#define inputline2(fp,buff,pos,size,ptr) input_line2(fp,buff,pos,size,ptr)
+#define inputline2(fp,buff,pos,size,ptr) input_line2(fp,buff,NULL,pos,size,ptr)
 #else
-#define inputline2(fp,buff,pos,size) input_line2(fp,buff,pos,size,NULL)
+#define inputline2(fp,buff,pos,size) input_line2(fp,buff,NULL,pos,size,NULL)
 #endif
 
 extern void dump_kanji (gzFile fp);

Modified: trunk/Build/source/texk/web2c/ptexdir/ptex-base.ch
===================================================================
--- trunk/Build/source/texk/web2c/ptexdir/ptex-base.ch	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/ptexdir/ptex-base.ch	2022-01-22 17:03:22 UTC (rev 61692)
@@ -67,6 +67,10 @@
 %                  based on TeX 3.141592653 (for TL21).
 % (2021-06-25) HY  pTeX p3.9.1 Various fixes.
 % (2021-06-20) HK  pTeX p3.10.0 Add \ucs and \toucs.
+% (2022-01-22) HK  pTeX p4.0.0 Distinguish 8-bit characters and Japanese characters
+%                  for better support of LaTeX3 (expl3).
+%                  Requires ptexenc version 1.4.0.
+%                  More details in TUGboat 41(2):329--334, 2020.
 
 @x
 % Here is TeX material that gets inserted after \input webmac
@@ -79,10 +83,10 @@
 @d banner==TeX_banner
 @d banner_k==TeX_banner_k
 @y
- at d pTeX_version=3
- at d pTeX_minor_version=10
+ at d pTeX_version=4
+ at d pTeX_minor_version=0
 @d pTeX_revision==".0"
- at d pTeX_version_string=='-p3.10.0' {current \pTeX\ version}
+ at d pTeX_version_string=='-p4.0.0' {current \pTeX\ version}
 @#
 @d pTeX_banner=='This is pTeX, Version 3.141592653',pTeX_version_string
 @d pTeX_banner_k==pTeX_banner
@@ -97,8 +101,23 @@
 @y
 @!ASCII_code=0..255; {eight-bit numbers}
 @!KANJI_code=0..65535; {sixteen-bit numbers}
+@!ext_ASCII_code=0..32768; { only use 0--511 }
 @z
 
+ at x pTeX: xchr
+xchr: array [ASCII_code] of text_char;
+   { specifies conversion of output characters }
+ at y
+xchr: array [ext_ASCII_code] of ext_ASCII_code;
+   { specifies conversion of output characters }
+ at z
+
+ at x pTeX: xchr
+for i:=@'177 to @'377 do xchr[i]:=i;
+ at y
+for i:=@'177 to @'777 do xchr[i]:=i;
+ at z
+
 @x [3.??] l.870 - pTeX:
 @!eight_bits=0..255; {unsigned one-byte quantity}
 @y
@@ -114,6 +133,36 @@
 @ Kanji code handling.
 @z
 
+ at x [3.??] pTeX
+@<Glob...@>=
+@!buffer:^ASCII_code; {lines of characters being read}
+ at y
+In \pTeX, we use another array |buffer2[]| to indicate which byte
+is a part of a Japanese character.
+|buffer2[]| is initialized to zero in reading one line from a file
+(|input_ln|). |buffer2[i]| is set to one when |buffer[i]| is known
+to be a part of a Japanese character, in |get_next| routine.
+
+@<Glob...@>=
+@!buffer:^ASCII_code; {lines of characters being read}
+@!buffer2:^ASCII_code;
+ at z
+
+ at x [4]
+@!packed_ASCII_code = 0..255; {elements of |str_pool| array}
+ at y
+@!packed_ASCII_code = 0..32768; {elements of |str_pool| array}
+  { 256..511 are used by Japanese characters }
+ at z
+
+ at x [4] pTeX: str_eq_buf
+while j<str_start[s+1] do
+  begin if so(str_pool[j])<>buffer[k] then
+ at y
+while j<str_start[s+1] do
+  begin if so(str_pool[j])<>buffer2[k]*@"100+buffer[k] then
+ at z
+
 @x [4.47] l.1325 - pTeX:
 @!init function get_strings_started:boolean; {initializes the string pool,
   but returns |false| if something goes wrong}
@@ -126,19 +175,11 @@
 var k,@!l:KANJI_code; {small indices or counters}
 @z
 
- at x [4.49] l.1384 - pTeX:
-@<Character |k| cannot be printed@>=
-  (k<" ")or(k>"~")
- at y
-@<Character |k| cannot be printed@>=
-   not (ismultiprn(k) or xprn[k])
- at z
-
 @x [5.54] l.1514 - pTeX: Global variables
 @!trick_buf:array[0..ssup_error_line] of ASCII_code; {circular buffer for
   pseudoprinting}
 @y
-@!trick_buf:array[0..ssup_error_line] of ASCII_code; {circular buffer for
+@!trick_buf:array[0..ssup_error_line] of ext_ASCII_code; {circular buffer for
   pseudoprinting}
 @!trick_buf2:array[0..ssup_error_line] of 0..2; {pTeX: buffer for KANJI}
 @!kcode_pos: 0..2; {pTeX: denotes whether first byte or second byte of KANJI}
@@ -212,14 +253,16 @@
 no_print: do_nothing;
 pseudo: if tally<trick_count then trick_buf[tally mod error_line]:=s;
 @y
-procedure print_char(@!s:ASCII_code); {prints a single character}
+procedure print_char(@!s:ext_ASCII_code); {prints a single character}
 label exit; {label is not used but nonetheless kept (for other changes?)}
 begin if @<Character |s| is the current new-line character@> then
  if selector<pseudo then
   begin print_ln; return;
   end;
-if kcode_pos=1 then kcode_pos:=2
-else if iskanji1(xchr[s]) then
+if s>@"1FF then s:=s mod 256;
+if s<256 then kcode_pos:=0
+else if kcode_pos=1 then kcode_pos:=2
+else if iskanji1(xchr[s-256]) then
   begin kcode_pos:=1;
   if (selector=term_and_log)or(selector=log_only) then
     if file_offset>=max_print_line-1 then
@@ -299,6 +342,7 @@
       if selector<pseudo then
         begin print_ln; return;
         end;
+    if xprn[s] then begin print_char(s); return; end;
     nl:=new_line_char; new_line_char:=-1;
       {temporarily disable new-line character}
     j:=str_start[s];
@@ -314,6 +358,64 @@
 exit:end;
 @z
 
+ at x
+procedure slow_print(@!s:integer); {prints string |s|}
+var j:pool_pointer; {current character code position}
+begin if (s>=str_ptr) or (s<256) then print(s)
+else begin j:=str_start[s];
+  while j<str_start[s+1] do
+    begin print(so(str_pool[j])); incr(j);
+    end;
+  end;
+end;
+ at y
+procedure slow_print(@!s:integer); {prints string |s|}
+var j:pool_pointer; {current character code position}
+c:integer;
+begin if (s>=str_ptr) or (s<256) then print(s)
+else begin j:=str_start[s];
+  while j<str_start[s+1] do
+    begin c:=so(str_pool[j]);
+    if c>=@"100 then print_char(c) else print(c); incr(j);
+    end;
+  end;
+end;
+
+procedure slow_print_filename(@!s:integer);
+  {prints string |s| which represents filename, without code conversion}
+var i,j,l:pool_pointer; p:integer;
+begin if (s>=str_ptr) or (s<256) then print(s)
+else begin i:=str_start[s]; l:=str_start[s+1];
+  while i<l do begin
+    p:=multistrlenshort(str_pool, l, i);
+    if p<>1 then
+      begin for j:=i to i+p-1 do print_char(@"100+(so(str_pool[j]) mod @"100));
+      i:=i+p; end
+    else begin print(so(str_pool[i]) mod @"100); incr(i); end;
+    end;
+  end;
+end;
+
+procedure print_quoted(@!s:integer);
+  {prints string |s| which represents filename,
+   omitting quotes and with code conversion}
+var i,l:pool_pointer; j,p:integer;
+begin if s<>0 then begin
+  i:=str_start[s]; l:=str_start[s+1];
+  while i<l do begin
+    p:=multistrlenshort(str_pool, l, i);
+    if p<>1 then begin
+      for j:=i to i+p-1 do print_char(@"100+(so(str_pool[j]) mod @"100));
+      i:=i+p; end
+    else begin
+      if so(str_pool[i])<>"""" then print(so(str_pool[i]) mod @"100);
+      incr(i); end;
+    end;
+  end;
+end;
+
+ at z
+
 @x [5.61] l.1656 - pTeX:
 @<Initialize the output...@>=
 if src_specials_p or file_line_error_style_p or parse_first_line_p then
@@ -341,6 +443,18 @@
 @ Old versions of \TeX\ needed a procedure called |print_ASCII| whose function
 @z
 
+ at x [5.??] - pTeX: term_input
+ at p procedure term_input; {gets a line from the terminal}
+ at y
+ at p procedure@?print_unread_buffer_with_ptenc; forward;@t\2@>@/
+procedure term_input; {gets a line from the terminal}
+ at z
+ at x [5.??] - pTeX: term_input
+if last<>first then for k:=first to last-1 do print(buffer[k]);
+ at y
+if last<>first then print_unread_buffer_with_ptenc(first,last);
+ at z
+
 @x
 @d max_halfword==@"FFFFFFF {largest allowable value in a |halfword|}
 @y
@@ -1452,7 +1566,13 @@
 tats
 @z
 
- at x [17.???] l.???? - pTeX multibyte control symbol
+ at x [18] buffer2
+for k:=j to j+l-1 do append_char(buffer[k]);
+ at y
+for k:=j to j+l-1 do append_char(buffer2[k]*@"100+buffer[k]);
+ at z
+
+ at x [18.???] l.???? - pTeX multibyte control symbol
 procedure print_cs(@!p:integer); {prints a purported control sequence}
 @y
 procedure print_cs(@!p:integer); {prints a purported control sequence}
@@ -1459,7 +1579,7 @@
 var j, l:pool_pointer; @!cat:0..max_char_code;
 @z
 
- at x
+ at x [18.???]
 else  begin print_esc(text(p));
   print_char(" ");
   end;
@@ -1467,8 +1587,8 @@
 else  begin l:=text(p);
   print_esc(l); j:=str_start[l]; l:=str_start[l+1];
   if l>j+1 then begin
-    if l-j=multistrlen(ustringcast(str_pool), l, j) then
-      begin cat:=kcat_code(kcatcodekey(fromBUFF(ustringcast(str_pool), l, j)));
+    if (str_pool[j]>=@"100)and(l-j=multistrlenshort(str_pool, l, j)) then
+      begin cat:=kcat_code(kcatcodekey(fromBUFFshort(str_pool, l, j)));
       if (cat<>other_kchar) then print_char(" ");
       end
     else print_char(" "); end
@@ -1476,6 +1596,13 @@
   end;
 @z
 
+ at x [18.???] pTeX: ensure buffer2[]=0 in primitive
+  for j:=0 to l-1 do buffer[j]:=so(str_pool[k+j]);
+ at y
+  for j:=0 to l-1 do begin
+    buffer[j]:=Lo(so(str_pool[k+j])); buffer2[j]:=Hi(so(str_pool[k+j])); end;
+ at z
+
 @x [18.265] l.5903 - pTeX: \jfont \tfont
 primitive("font",def_font,0);@/
 @!@:font_}{\.{\\font} primitive@>
@@ -1634,6 +1761,27 @@
 if trick_buf2[(p-1) mod error_line]=1 then p:=p-1;
 @z
 
+ at x pTeX: buffer
+if j>0 then for i:=start to j-1 do
+  begin if i=loc then set_trick_count;
+  print(buffer[i]);
+  end
+ at y
+if j>0 then begin
+  i:=start;
+  if (loc<=j-1)and(start<=loc) then begin
+    for i:=start to loc-1 do
+      if buffer2[i]>0 then
+        print_char(@"100*buffer2[i]+buffer[i]) else print(buffer[i]);
+        set_trick_count; print_unread_buffer_with_ptenc(loc,j);
+    end
+  else
+    for i:=start to j-1 do
+      if buffer2[i]>0 then
+        print_char(@"100*buffer2[i]+buffer[i]) else print(buffer[i]);
+  end
+ at z
+
 @x [22.319] l.7157 - pTeX: adjust kanji code token
 @ @<Pseudoprint the token list@>=
 begin_pseudoprint;
@@ -1660,6 +1808,13 @@
 done1:
 @z
 
+ at x [23.???] pTeX: init the input routines
+first:=buf_size; repeat buffer[first]:=0; decr(first); until first=0;
+ at y
+first:=buf_size; repeat buffer[first]:=0; buffer2[first]:=0; decr(first); until first=0;
+ at z
+
+
 @x [24.341] l.7479 - pTeX: set last_chr
 @!cat:0..max_char_code; {|cat_code(cur_chr)|, usually}
 @y
@@ -1681,6 +1836,8 @@
     if multistrlen(ustringcast(buffer), limit+1, loc-1)=2 then
       begin cur_chr:=fromBUFF(ustringcast(buffer), limit+1, loc-1);
       cur_cmd:=kcat_code(kcatcodekey(cur_chr));
+      for l:=loc-1 to loc-2+multistrlen(ustringcast(buffer), limit+1, loc-1) do
+        buffer2[l]:=1;
       incr(loc);
       end
     else reswitch: cur_cmd:=cat_code(cur_chr);
@@ -1769,7 +1926,10 @@
 begin if loc>limit then cur_cs:=null_cs {|state| is irrelevant in this case}
 else  begin k:=loc; cur_chr:=buffer[k]; incr(k);
   if multistrlen(ustringcast(buffer), limit+1, k-1)=2 then
-    begin cat:=kcat_code(kcatcodekey(fromBUFF(ustringcast(buffer), limit+1, k-1))); incr(k);
+    begin cat:=kcat_code(kcatcodekey(fromBUFF(ustringcast(buffer), limit+1, k-1)));
+    for l:=k-1 to k-2+multistrlen(ustringcast(buffer), limit+1, k-1) do
+      buffer2[l]:=1;
+    incr(k);
     end
   else cat:=cat_code(cur_chr);
 start_cs:
@@ -1832,10 +1992,10 @@
       end
     else if c<@'100 then buffer[k-1]:=c+@'100
     else buffer[k-1]:=c-@'100;
-    limit:=limit-d; first:=first-d;
+    buffer2[k-1]:=0; limit:=limit-d; first:=first-d;
     l:=k; cur_chr:=buffer[k-1]; cat:=cat_code(cur_chr);
     while l<=limit do
-      begin buffer[l]:=buffer[l+d]; incr(l);
+      begin buffer[l]:=buffer[l+d]; buffer2[l]:=buffer2[l+d]; incr(l);
       end;
     goto start_cs;
     end;
@@ -1858,7 +2018,10 @@
 @ @<Scan ahead in the buffer...@>=
 begin repeat cur_chr:=buffer[k]; incr(k);
   if multistrlen(ustringcast(buffer), limit+1, k-1)=2 then
-    begin cat:=kcat_code(kcatcodekey(fromBUFF(ustringcast(buffer), limit+1, k-1))); incr(k);
+    begin cat:=kcat_code(kcatcodekey(fromBUFF(ustringcast(buffer), limit+1, k-1)));
+    for l:=k-1 to k-2+multistrlen(ustringcast(buffer), limit+1, k-1) do
+      buffer2[l]:=1;
+    incr(k);
     end
   else cat:=cat_code(cur_chr);
   while (buffer[k]=cur_chr)and(cat=sup_mark)and(k<limit) do
@@ -1874,11 +2037,11 @@
       else cur_chr:=c-@'100;
       cat:=cat_code(cur_chr);
       if (cat=letter)or(cat=sup_mark) then
-        begin buffer[k-1]:=cur_chr;
+        begin buffer[k-1]:=cur_chr; buffer2[k-1]:=0;
         limit:=limit-d; first:=first-d;
         l:=k;
         while l<=limit do
-          begin buffer[l]:=buffer[l+d]; incr(l);
+          begin buffer[l]:=buffer[l+d]; buffer2[l]:=buffer2[l+d]; incr(l);
           end;
         end;
       end;
@@ -1951,6 +2114,23 @@
   end
 @z
 
+ at x [24] pTeX: firm_up_the_line
+  if start<limit then for k:=start to limit-1 do print(buffer[k]);
+  first:=limit; prompt_input("=>"); {wait for user response}
+ at .=>@>
+  if last>first then
+    begin for k:=first to last-1 do {move line down in buffer}
+      buffer[k+start-first]:=buffer[k];
+ at y
+  if start<limit then for k:=start to limit-1 do
+    if buffer2[k]>=@"100 then print_char(buffer[k]) else print(buffer[k]);
+  first:=limit; prompt_input("=>"); {wait for user response}
+ at .=>@>
+  if last>first then
+    begin for k:=first to last-1 do {move line down in buffer}
+      begin buffer[k+start-first]:=buffer[k]; buffer2[k+start-first]:=buffer2[k]; end;
+ at z
+
 @x [24.365] l.7935 - pTeX: get_token
 @p procedure get_token; {sets |cur_cmd|, |cur_chr|, |cur_tok|}
 begin no_new_control_sequence:=false; get_next; no_new_control_sequence:=true;
@@ -1982,6 +2162,12 @@
     end;
   buffer[j]:=info(p) mod @'400; incr(j); p:=link(p);
   end;
+if j>first+1 then
+  begin no_new_control_sequence:=false; cur_cs:=id_lookup(first,j-first);
+  no_new_control_sequence:=true;
+  end
+else if j=first then cur_cs:=null_cs {the list is empty}
+else cur_cs:=single_base+buffer[first] {the list has length one}
 @y
 @ @<Look up the characters of list |r| in the hash table...@>=
 j:=first; p:=link(r);
@@ -1993,10 +2179,17 @@
 @:TeX capacity exceeded buffer size}{\quad buffer size@>
     end;
   if check_kanji(info(p)) then {|wchar_token|}
-    begin buffer[j]:=Hi(info(p)); incr(j);
-    end;
+    begin buffer[j]:=Hi(info(p)); buffer2[j]:=1; incr(j); buffer2[j]:=1;
+    end
+  else buffer2[j]:=0;
   buffer[j]:=Lo(info(p)); incr(j); p:=link(p);
   end;
+if j>first+1 then
+  begin no_new_control_sequence:=false; cur_cs:=id_lookup(first,j-first);
+  no_new_control_sequence:=true;
+  end
+else if j=first then cur_cs:=null_cs {the list is empty}
+else cur_cs:=single_base+buffer[first] {the list has length one}
 @z
 
 @x [25.380] l.8221 - pTeX: get_x_token
@@ -2363,8 +2556,8 @@
   if t=" " then t:=space_token
   else t:=other_token+t;
 @y
-  if multistrlen(ustringcast(str_pool), pool_ptr, k)=2 then
-    begin t:=fromBUFF(ustringcast(str_pool), pool_ptr, k); incr(k);
+  if t>=@"100 then
+    begin t:=fromBUFFshort(str_pool, pool_ptr, k); incr(k);
     end
   else if t=" " then t:=space_token
   else t:=other_token+t;
@@ -2651,6 +2844,28 @@
 end;
 @z
 
+ at x [29.518] - print_quoted in pTeX is already defined
+ at d print_quoted(#) == {print string |#|, omitting quotes}
+if #<>0 then
+  for j:=str_start[#] to str_start[#+1]-1 do
+    if so(str_pool[j])<>"""" then
+      print(so(str_pool[j]))
+
+ at y
+ at z
+
+ at x
+ at d append_to_name(#)==begin c:=#; if not (c="""") then begin incr(k);
+  if k<=file_name_size then name_of_file[k]:=xchr[c];
+  end end
+ at y
+ at d append_to_name(#)==begin if (#)>=@"100 then c:=(#)-@"100 else c:=#;
+  { Since the type of |c| is |ASCII_code|, above if-statement might not be needed }
+  if not (c="""") then begin incr(k);
+  if k<=file_name_size then name_of_file[k]:=xchr[c];
+  end end
+ at z
+
 @x [29.526] l.10668 - pTeX: scan file name
 loop at +begin if (cur_cmd>other_char)or(cur_chr>255) then {not a character}
     begin back_input; goto done;
@@ -2669,8 +2884,8 @@
 loop at +begin
   if (cur_cmd=kanji)or(cur_cmd=kana)or(cur_cmd=other_kchar) then {is kanji}
     begin str_room(2);
-    append_char(Hi(cur_chr)); {kanji upper byte}
-    append_char(Lo(cur_chr)); {kanji lower byte}
+    append_char(@"100+Hi(cur_chr)); {kanji upper byte}
+    append_char(@"100+Lo(cur_chr)); {kanji lower byte}
     end
   else if (cur_cmd>other_char)or(cur_chr>255) then {not an alphabet}
     begin back_input; goto done;
@@ -2686,6 +2901,15 @@
 skip_mode:=true;
 @z
 
+ at x [29.???] open_log_file
+if buffer[l]=end_line_char then decr(l);
+for k:=1 to l do print(buffer[k]);
+print_ln; {now the transcript file contains the first line of input}
+ at y
+if buffer[l]=end_line_char then decr(l); print_unread_buffer_with_ptenc(1,l+1);
+print_ln; {now the transcript file contains the first line of input}
+ at z
+
 @x [29.536] l.10834 - pTeX:
 begin
 if src_specials_p or file_line_error_style_p or parse_first_line_p
@@ -2705,6 +2929,14 @@
   wlog(')');
 @z
 
+ at x [29.???] pTeX - start_input
+print_char("("); incr(open_parens);
+slow_print(full_source_filename_stack[in_open]); update_terminal;
+ at y
+print_char("("); incr(open_parens);
+slow_print_filename(full_source_filename_stack[in_open]); update_terminal;
+ at z
+
 @x [30.560] l.10968 - pTeX:
 This is called BigEndian order.
 @!@^BigEndian order@>
@@ -6268,6 +6500,24 @@
 show_mode: @<Show the current japanese processing mode@>;
 @z
 
+ at x dump
+@!format_engine: ^text_char;
+ at y
+@!w: four_quarters; {four ASCII codes}
+@!format_engine: ^text_char;
+ at z
+
+ at x undump
+@!format_engine: ^text_char;
+@!dummy_xord: ASCII_code;
+@!dummy_xchr: text_char;
+ at y
+@!w: four_quarters; {four ASCII codes}
+@!format_engine: ^text_char;
+@!dummy_xord: ASCII_code;
+@!dummy_xchr: ext_ASCII_code;
+ at z
+
 @x
 libc_free(format_engine);@/
 @y
@@ -6282,6 +6532,28 @@
 undump_kanji(fmt_file);
 @z
 
+ at x
+dump_things(str_pool[0], pool_ptr);
+ at y
+for k:=0 to str_ptr do dump_int(str_start[k]);
+k:=0;
+while k+4<pool_ptr do
+  begin dump_four_ASCII; k:=k+4;
+  end;
+k:=pool_ptr-4; dump_four_ASCII;
+ at z
+
+ at x
+undump_things(str_pool[0], pool_ptr);
+ at y
+for k:=0 to str_ptr do undump(0)(pool_ptr)(str_start[k]);
+k:=0;
+while k+4<pool_ptr do
+  begin undump_four_ASCII; k:=k+4;
+  end;
+k:=pool_ptr-4; undump_four_ASCII;
+ at z
+
 @x l.24982
 font_info:=xmalloc_array(fmemory_word, font_mem_size);
 @y
@@ -6339,6 +6611,13 @@
 undump_things(char_base[null_font], font_ptr+1-null_font);
 @z
 
+ at x
+  buffer:=xmalloc_array (ASCII_code, buf_size);
+ at y
+  buffer:=xmalloc_array (ASCII_code, buf_size);
+  buffer2:=xmalloc_array (ASCII_code, buf_size);
+ at z
+
 @x l.25363 - pTeX
   font_info:=xmalloc_array (fmemory_word, font_mem_size);
 @y
@@ -6396,6 +6675,40 @@
 end
 @z
 
+ at x [53.????] \write18{foo} (write_out in tex.ch)
+@!d:integer; {number of characters in incomplete current string}
+ at y
+@!k:integer; {loop indices}
+@!d:integer; {number of characters in incomplete current string}
+ at z
+
+ at x [53.????] \write18{foo} (write_out in tex.ch)
+  for d:=0 to cur_length-1 do
+    begin {|print| gives up if passed |str_ptr|, so do it by hand.}
+    print(so(str_pool[str_start[str_ptr]+d])); {N.B.: not |print_char|}
+    end;
+ at y
+  for d:=0 to cur_length-1 do
+    begin {|print| gives up if passed |str_ptr|, so do it by hand.}
+    if so(str_pool[str_start[str_ptr]+d])>=@"100 then
+    print_char(so(str_pool[str_start[str_ptr]+d]))
+    else print(so(str_pool[str_start[str_ptr]+d])); {N.B.: not |print_char|}
+    end;
+ at z
+
+ at x [53.????] ignore "flag bit" in str_pool for system(3)
+      runsystem_ret := runsystem(conststringcast(addressof(
+                                              str_pool[str_start[str_ptr]])));
+ at y
+      if name_of_file then libc_free(name_of_file);
+      name_of_file := xmalloc(cur_length * 3 + 2);
+      k := 0;
+      for d:=0 to cur_length-1 do
+        append_to_name(str_pool[str_start[str_ptr]+d]);
+      name_of_file[k+1] := 0;
+      runsystem_ret := runsystem(conststringcast(name_of_file+1));
+ at z
+
 @x [53.????] Implement \immediate, inhibit_glue_flag
   begin p:=tail; do_extension; {append a whatsit node}
   out_what(tail); {do the action immediately}
@@ -6439,6 +6752,31 @@
 @!debug debug_format_file:=true; @+gubed;
 @z
 
+ at x pTeX: xchr
+  if eight_bit_p then
+    for k:=0 to 255 do
+      xprn[k]:=1;
+end;
+ at y
+  if eight_bit_p then
+    for k:=0 to 255 do
+      xprn[k]:=1;
+end;
+for k:=256 to 511 do xchr[k]:=k;
+ at z
+
+ at x [54/web2c.???] scan_file_name_braced
+  for i:=str_start[s] to str_start[s+1]-1 do
+    dummy := more_name(str_pool[i]); {add each read character to the current file name}
+ at y
+  for i:=str_start[s] to str_start[s+1]-1 do
+    if str_pool[i]>=@"100 then
+      begin str_room(1); append_char(str_pool[i]);
+      end
+    else
+      dummy := more_name(str_pool[i]); {add each read character to the current file name}
+ at z
+
 @x l.26984 - pTeX
 @* \[54] System-dependent changes.
 @y
@@ -7501,8 +7839,8 @@
 @ @<Basic printing...@>=
 procedure print_kanji(@!s:KANJI_code); {prints a single character}
 begin
-if s>255 then
-  begin print_char(Hi(s)); print_char(Lo(s));
+if s>@"FF then
+  begin print_char(@"100+Hi(s)); print_char(@"100+Lo(s));
   end else print_char(s);
 end;
 
@@ -7536,5 +7874,25 @@
   end;
 end;
 
+@ This procedure is used in printing the second line in showing contexts.
+This part is not read by |get_next| yet, so we don't know which bytes
+are part of Japaense characters when the procedure is called.
+
+@<Basic printing...@>=
+procedure print_unread_buffer_with_ptenc(@!f, @!l: integer);
+{ print |buffer[f..l-1]| with code conversion }
+var @!i,@!j: pool_pointer; @!p: integer;
+begin
+  i:=f;
+  while i<l do begin
+    p:=multistrlen(ustringcast(buffer), l, i);
+    if p<>1 then
+      begin for j:=i to i+p-1 do
+        print_char(@"100+buffer[j]);
+      i:=i+p; end
+    else begin print(buffer[i]); incr(i); end;
+  end;
+end;
+
 @* \[56] System-dependent changes.
 @z

Modified: trunk/Build/source/texk/web2c/ptexdir/ptex.defines
===================================================================
--- trunk/Build/source/texk/web2c/ptexdir/ptex.defines	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/ptexdir/ptex.defines	2022-01-22 17:03:22 UTC (rev 61692)
@@ -7,7 +7,10 @@
 
 @define function iskanji1 ();
 @define function multistrlen ();
+ at define function multistrlenshort ();
+ at define function multistrlenfilename ();
 @define function fromBUFF ();
+ at define function fromBUFFshort ();
 @define function toBUFF ();
 
 @define function fromDVI ();

Modified: trunk/Build/source/texk/web2c/ptexdir/ptex_version.h
===================================================================
--- trunk/Build/source/texk/web2c/ptexdir/ptex_version.h	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/ptexdir/ptex_version.h	2022-01-22 17:03:22 UTC (rev 61692)
@@ -1 +1 @@
-#define PTEX_VERSION "p3.10.0"
+#define PTEX_VERSION "p4.0.0"

Added: trunk/Build/source/texk/web2c/ptexdir/tests/filename_test.sh
===================================================================
--- trunk/Build/source/texk/web2c/ptexdir/tests/filename_test.sh	                        (rev 0)
+++ trunk/Build/source/texk/web2c/ptexdir/tests/filename_test.sh	2022-01-22 17:03:22 UTC (rev 61692)
@@ -0,0 +1,45 @@
+#!/bin/bash
+cat <<'EOF' > filename_日本語.tex
+% UTF-8 encoding
+\def\TEST#1{\ifx#1\relax\else\immediate\write16{[\meaning#1]}\expandafter\TEST\fi}
+\expandafter\TEST\jobname\relax
+
+\immediate\openout0=\jobname.txt
+\immediate\write0{日本語}
+\immediate\closeout0
+
+\openin0=\jobname.txt
+\ifeof0
+  \immediate\write16{File `\jobname.txt' not found.}
+\else
+  \read0 to \TEXT
+  \immediate\write16{TEXT=\TEXT}
+\fi
+\closein0
+
+\input \jobname.txt
+
+\end
+
+EOF
+
+ln -s filename_日本語.tex "filename_日本a.tex"
+eptex -synctex=1 "\input {filename_日本a}"
+ls *.synctex.gz; rm *.synctex.gz
+euptex -synctex=1 "\input {filename_日本a}"
+ls *.synctex.gz; rm *.synctex.gz
+ls "filename_日本a".*
+rm "filename_日本a".*
+ln -s filename_日本語.tex filename_日本ßſa.tex
+euptex -synctex=1 filename_日本ßſa
+ls *.synctex.gz; rm *.synctex.gz
+ls filename_日本ßſa.*
+rm filename_日本ßſa.*
+
+eptex -synctex=1 "-jobname=漢ßſa" filename_日本語
+ls *.synctex.gz; rm *.synctex.gz
+ls 漢*; rm 漢*
+euptex -synctex=1 "-jobname=漢ßſa" filename_日本語
+ls *.synctex.gz; rm *.synctex.gz
+ls 漢*; rm 漢*
+rm filename_日本語.tex


Property changes on: trunk/Build/source/texk/web2c/ptexdir/tests/filename_test.sh
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+LF
\ No newline at end of property
Added: trunk/Build/source/texk/web2c/ptexdir/tests/printkanji.tex
===================================================================
--- trunk/Build/source/texk/web2c/ptexdir/tests/printkanji.tex	                        (rev 0)
+++ trunk/Build/source/texk/web2c/ptexdir/tests/printkanji.tex	2022-01-22 17:03:22 UTC (rev 61692)
@@ -0,0 +1,168 @@
+%\scrollmode
+\tracingstats=1000
+\noautoxspacing
+\newlinechar=`\^^J
+\font\x=ec-lmtt10 \x
+\immediate\openout1=\jobname.out
+\def\MSG#1{%
+  \message{\string{MSG #1\string}}%
+  \immediate\write17{\string{TOT #1\string}}%
+  \immediate\write1{#1}%
+}
+\def\head#1{\message{■#1.}\par\noindent\hbox{■\null}#1.\par}
+\message{^^J}
+
+%================
+\def\A{^^c5^^bf ſ 顛 }
+A: \A
+
+\MSG{\A}
+\show\A
+
+%================
+\par
+\head{\string\meaning}
+
+\edef\B{\meaning\A}
+\meaning\B
+
+\B
+\MSG{\meaning\B}
+\show\B
+
+%================
+\head{\string\jobname}
+\edef\C{:\jobname:}
+\MSG{*あ*\jobname *\C*\meaning\C*}
+\message{^^J}
+
+%================
+\catcode`\^^c5=11
+\catcode`\^^bf=11
+\catcode`\^^e1=11
+\catcode`\^^e3=11
+\catcode`\^^81=11
+\catcode`\^^82=11
+
+\head{oneletter}
+
+\string\^^c4.\string\^^c5.\string\^^ff.
+\count0=`\^^c5%
+\MSG{\string\catcode`\string\^^c5 = \the\count0}%
+\count0=`^^c5%
+\MSG{\string\catcode`^^c5 = \the\count0}%
+\count0=`^^c5^^bf%
+\MSG{\string\catcode`^^c5^^bf = \the\count0}%
+\count0=`顛%
+\MSG{\string\catcode`顛 = \the\count0}%
+
+%================
+\head{csname1}
+
+{\def\顛{hoge}\def\^^c5^^bf{piyo}
+\show\顛
+\show\^^c5^^bf
+\expandafter\show\csname ^^c5^^bf\endcsname
+\MSG{\string\顛=>\meaning\顛}
+\MSG{\string\^^c5^^bf=>\meaning\^^c5^^bf}
+\MSG{\expandafter\string\csname ^^c5^^bf\endcsname
+  =>\expandafter\meaning\csname ^^c5^^bf\endcsname}}
+
+\string\^^c5^^bf
+\expandafter\string\csname ^^c5^^bf\endcsname,
+\expandafter\string\csname ſ\endcsname,
+\expandafter\string\csname 顛\endcsname
+\MSG{\string\^^c5^^bf,
+  \expandafter\string\csname ^^c5^^bf\endcsname,
+  \expandafter\string\csname ſ\endcsname,
+  \expandafter\string\csname 顛\endcsname.}
+
+%================
+
+\def\あ{hoge}
+\message{^^J}
+\def\TEST#1#2{%
+  \expandafter\def\csname#2\endcsname{piyo}
+  \par\toks0={#2}
+  \expandafter\string\csname #1\endcsname => \csname #1\endcsname,\par
+  \expandafter\string\csname #2\endcsname => \csname #2\endcsname,\par
+  \expandafter\string\csname \the\toks0\endcsname => \csname \the\toks0\endcsname.
+  \MSG{%
+    \expandafter\string\csname #1\endcsname => \csname #1\endcsname,
+    \expandafter\string\csname #2\endcsname => \csname #2\endcsname.
+    \expandafter\string\csname \the\toks0\endcsname => \csname \the\toks0\endcsname.
+  }%
+}
+\ifnum\euc"A4A2="A4A2\relax
+  \TEST{あ}{^^a4^^a2}
+\else
+  \TEST{あ}{^^e3^^81^^82}
+\fi
+
+%================
+\head{0xFF}
+\catcode"FF=11
+\message{^^J}
+
+\def\^^ff^^c5^^ff^^bf{あ}
+\edef\E{(\string\^^ff^^c5^^ff^^bf:\meaning\^^ff^^c5^^ff^^bf)}
+\^^ff^^c5^^ff^^bf, \E, \string\^^ff^^c5^^ff^^bf, \meaning\E.
+\MSG{\^^ff^^c5^^ff^^bf, \A, \string\^^ff^^c5^^ff^^bf, \meaning\E.}
+
+\def\^^ff{い}
+\edef\E{(\string\^^ff :\meaning\^^ff)}
+\^^ff, \E, \string\^^ff, \meaning\E.
+\MSG{\^^ff, \E, \string\^^ff, \meaning\E.}
+
+\immediate\closeout1
+
+%========
+\head{contexts}
+
+\errorcontextlines=10000
+\def\@{\A\undefined}
+\edef\a{^^c5^^bf a\A\noexpand\@ ſ 顛 }
+\edef\+#1{\noexpand\^^c5^^bf\noexpand\顛\noexpand\^^ff^^c5^^ff^^bf#1
+  \noexpand\^^ff\meaning\A\noexpand\^^c5^^bf\noexpand\顛}
+\+\a
+
+\^^ff^^c5^^ff^^bf ^^c5^^bf \^^c5^^bf ſ顛\a \^^ff ^^ff\^^c5^^bf
+
+\^^c5\^^ff\^^c4\^^fe
+
+\catcode`\^^c5=11
+\catcode`\^^be=11
+\catcode`\^^bf=11
+\catcode`\^^bd=11
+\catcode`\^^ff=11
+\font\x=ec-lmtt10 \x\scrollmode
+
+\def\^^c5^^bf{a}
+\def\顛{b}
+\def\転{c}
+\message{\string\^^c5^^bf \string\^^ff^^c5}
+
+\def\b{\ž.\ſ.\Ž..\転.\顛.\貼.}
+\b
+
+\ž.\ſ.\Ž..\転.\顛.\貼.
+
+
+%========
+\head{csname2}
+
+SHOW \show\ſ.\show\顛.%
+\expandafter\show\csname ſ\endcsname.%
+\expandafter\show\csname 顛\endcsname
+
+STRING \string\ſ.\string\顛.%
+\expandafter\string\csname ſ\endcsname.%
+\expandafter\string\csname 顛\endcsname
+
+MEANING \meaning\ſ.\meaning\顛.%
+\expandafter\meaning\csname ſ\endcsname.%
+\expandafter\meaning\csname 顛\endcsname.%
+
+\message{\string\^^e3^^81^^82 \string\あ}
+
+\end


Property changes on: trunk/Build/source/texk/web2c/ptexdir/tests/printkanji.tex
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Modified: trunk/Build/source/texk/web2c/synctexdir/ChangeLog
===================================================================
--- trunk/Build/source/texk/web2c/synctexdir/ChangeLog	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/synctexdir/ChangeLog	2022-01-22 17:03:22 UTC (rev 61692)
@@ -1,3 +1,7 @@
+2022-01-22  Hironori Kitagawa  <h_kitagawa2001 at yahoo.co.jp>
+
+	* synctex.c: Reencode nameoffile to utf8 for pTeX.
+
 2021-12-16  Luigi Scarso  <luigi.scarso at gmail.com>
 
 	* synctex_main.c [WIN32]: #include <kpathsea/progname.h>

Modified: trunk/Build/source/texk/web2c/synctexdir/synctex.c
===================================================================
--- trunk/Build/source/texk/web2c/synctexdir/synctex.c	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/synctexdir/synctex.c	2022-01-22 17:03:22 UTC (rev 61692)
@@ -301,7 +301,27 @@
 #endif /* pdfTeX ... */
 #endif /* _WIN32 */
 
+#if defined(pTeX) || defined(upTeX) || defined(epTeX) || defined(eupTeX)
+#define IS_pTeX 1
+#else
+#define IS_pTeX 0
+#endif
+
 /*  This macro layer was added to take luatex into account as suggested by T. Hoekwater. */
+# if IS_pTeX && !defined(_WIN32)
+char *SYNCTEX_GET_JOB_NAME()
+{
+   char *tmp = gettexstring(jobname);
+   char *tmpa = ptenc_from_internal_enc_string_to_utf8(tmp);
+   if (tmpa) { SYNCTEX_FREE(tmp); return tmpa; } else return tmp;
+}
+char *SYNCTEX_GET_LOG_NAME()
+{
+   char *tmp = gettexstring(texmflogname);
+   char *tmpa = ptenc_from_internal_enc_string_to_utf8(tmp);
+   if (tmpa) { SYNCTEX_FREE(tmp); return tmpa; } else return tmp;
+}
+# else
 #   if !defined(SYNCTEX_GET_JOB_NAME)
 #       define SYNCTEX_GET_JOB_NAME() (gettexstring(jobname))
 #   endif
@@ -308,6 +328,7 @@
 #   if !defined(SYNCTEX_GET_LOG_NAME)
 #       define SYNCTEX_GET_LOG_NAME() (gettexstring(texmflogname))
 #   endif
+# endif
 #   if !defined(SYNCTEX_CURRENT_TAG)
 #       define SYNCTEX_CURRENT_TAG (curinput.synctextagfield)
 #   endif

Modified: trunk/Build/source/texk/web2c/texmfmem.h
===================================================================
--- trunk/Build/source/texk/web2c/texmfmem.h	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/texmfmem.h	2022-01-22 17:03:22 UTC (rev 61692)
@@ -63,7 +63,7 @@
 */
 
 /* Aleph is sufficiently different to separate the definition. */
-#if !defined(Aleph) && !defined(epTeX) && !defined(eupTeX) && !defined(upTeX)
+#if !defined(Aleph) && !defined(pTeX) && !defined(epTeX) && !defined(eupTeX) && !defined(upTeX)
 
 typedef union
 {
@@ -181,7 +181,7 @@
 #define qqqq v.QQQQ
 #endif
 
-#else /* Aleph || epTeX || eupTeX || upTeX */
+#else /* Aleph || pTeX || epTeX || eupTeX || upTeX */
 
 typedef union
 {
@@ -257,4 +257,4 @@
 
 #define gr gg.GLUE
 
-#endif /* Aleph || epTeX || eupTeX || upTeX */
+#endif /* Aleph || pTeX || epTeX || eupTeX || upTeX */

Modified: trunk/Build/source/texk/web2c/uptexdir/ChangeLog
===================================================================
--- trunk/Build/source/texk/web2c/uptexdir/ChangeLog	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/uptexdir/ChangeLog	2022-01-22 17:03:22 UTC (rev 61692)
@@ -1,3 +1,12 @@
+2022-01-22  Hironori Kitagawa  <h_kitagawa2001 at yahoo.co.jp>
+
+	* uptex-m.ch: Change type of str_pool to unsigned short.
+	  We use only 0--511, and use flag 0x100 for Japanese char.
+	  More details in TUGboat 41(2):329--334, 2020.
+	* kanji.h: Adapt to arguments of input_line2() in ptexenc-1.4.0
+	  (for upBibTeX).
+	* uptex.defines: Add multistrlen{short,filename}, fromBUFFshort.
+
 2022-01-10  Karl Berry  <karl at freefriends.org>
 
 	* am/uptex.am: do not silence tangle-sh.

Modified: trunk/Build/source/texk/web2c/uptexdir/kanji.h
===================================================================
--- trunk/Build/source/texk/web2c/uptexdir/kanji.h	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/uptexdir/kanji.h	2022-01-22 17:03:22 UTC (rev 61692)
@@ -59,9 +59,9 @@
 #endif /* !PRESERVE_FPUTS */
 
 #ifdef UPBIBTEX
-#define inputline2(fp,buff,pos,size,ptr) input_line2(fp,buff,pos,size,ptr)
+#define inputline2(fp,buff,pos,size,ptr) input_line2(fp,buff,NULL,pos,size,ptr)
 #else
-#define inputline2(fp,buff,pos,size) input_line2(fp,buff,pos,size,NULL)
+#define inputline2(fp,buff,pos,size) input_line2(fp,buff,NULL,pos,size,NULL)
 #endif
 
 extern void init_kanji (const_string file_str, const_string internal_str);

Modified: trunk/Build/source/texk/web2c/uptexdir/uptex-m.ch
===================================================================
--- trunk/Build/source/texk/web2c/uptexdir/uptex-m.ch	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/uptexdir/uptex-m.ch	2022-01-22 17:03:22 UTC (rev 61692)
@@ -115,9 +115,9 @@
 @z
 
 @x
-if kcode_pos=1 then kcode_pos:=2
+else if kcode_pos=1 then kcode_pos:=2
 @y
-if (kcode_pos=1)or((kcode_pos>=@'11)and(kcode_pos<=@'12))
+else if (kcode_pos=1)or((kcode_pos>=@'11)and(kcode_pos<=@'12))
    or((kcode_pos>=@'21)and(kcode_pos<=@'23)) then incr(kcode_pos)
 @z
 
@@ -388,6 +388,8 @@
     if multistrlen(ustringcast(buffer), limit+1, loc-1)=2 then
       begin cur_chr:=fromBUFF(ustringcast(buffer), limit+1, loc-1);
       cur_cmd:=kcat_code(kcatcodekey(cur_chr));
+      for l:=loc-1 to loc-2+multistrlen(ustringcast(buffer), limit+1, loc-1) do
+        buffer2[l]:=1;
       incr(loc);
       end
     else reswitch: cur_cmd:=cat_code(cur_chr);
@@ -397,6 +399,8 @@
     cur_cmd:=kcat_code(kcatcodekey(cur_chr));
     if (multistrlen(ustringcast(buffer), limit+1, loc)>1) and check_kcat_code(cur_cmd) then begin
       if (cur_cmd=not_cjk) then cur_cmd:=other_kchar;
+      for l:=loc to loc-1+multistrlen(ustringcast(buffer), limit+1, loc) do
+        buffer2[l]:=1;
       loc:=loc+multistrlen(ustringcast(buffer), limit+1, loc) end
     else begin
       cur_chr:=buffer[loc]; incr(loc);
@@ -428,7 +432,10 @@
 @x
 else  begin k:=loc; cur_chr:=buffer[k]; incr(k);
   if multistrlen(ustringcast(buffer), limit+1, k-1)=2 then
-    begin cat:=kcat_code(kcatcodekey(fromBUFF(ustringcast(buffer), limit+1, k-1))); incr(k);
+    begin cat:=kcat_code(kcatcodekey(fromBUFF(ustringcast(buffer), limit+1, k-1)));
+    for l:=k-1 to k-2+multistrlen(ustringcast(buffer), limit+1, k-1) do
+      buffer2[l]:=1;
+    incr(k);
     end
   else cat:=cat_code(cur_chr);
 start_cs:
@@ -439,6 +446,8 @@
   cat:=kcat_code(kcatcodekey(cur_chr));
   if (multistrlen(ustringcast(buffer), limit+1, k)>1) and check_kcat_code(cat) then begin
     if (cat=not_cjk) then cat:=other_kchar;
+    for l:=k to k-1+multistrlen(ustringcast(buffer), limit+1, k) do
+      buffer2[l]:=1;
     k:=k+multistrlen(ustringcast(buffer), limit+1, k) end
   else begin {not multi-byte char}
     cur_chr:=buffer[k];
@@ -464,7 +473,10 @@
 @x
 begin repeat cur_chr:=buffer[k]; incr(k);
   if multistrlen(ustringcast(buffer), limit+1, k-1)=2 then
-    begin cat:=kcat_code(kcatcodekey(fromBUFF(ustringcast(buffer), limit+1, k-1))); incr(k);
+    begin cat:=kcat_code(kcatcodekey(fromBUFF(ustringcast(buffer), limit+1, k-1)));
+    for l:=k-1 to k-2+multistrlen(ustringcast(buffer), limit+1, k-1) do
+      buffer2[l]:=1;
+    incr(k);
     end
   else cat:=cat_code(cur_chr);
 @y
@@ -473,6 +485,8 @@
   cat:=kcat_code(kcatcodekey(cur_chr));
   if (multistrlen(ustringcast(buffer), limit+1, k)>1) and check_kcat_code(cat) then begin
     if (cat=not_cjk) then cat:=other_kchar;
+    for l:=k to k-1+multistrlen(ustringcast(buffer), limit+1, k) do
+      buffer2[l]:=1;
     k:=k+multistrlen(ustringcast(buffer), limit+1, k) end
   else begin {not multi-byte char}
     cur_chr:=buffer[k];
@@ -518,20 +532,21 @@
 
 @x
   if check_kanji(info(p)) then {|wchar_token|}
-    begin buffer[j]:=Hi(info(p)); incr(j);
-    end;
+    begin buffer[j]:=Hi(info(p)); buffer2[j]:=1; incr(j); buffer2[j]:=1;
+    end
+  else buffer2[j]:=0;
   buffer[j]:=Lo(info(p)); incr(j); p:=link(p);
 @y
   if check_kanji(info(p)) then {|wchar_token|}
     begin t:=toBUFF(info(p) mod max_cjk_val);
-    if BYTE1(t)<>0 then begin buffer[j]:=BYTE1(t); incr(j); end;
-    if BYTE2(t)<>0 then begin buffer[j]:=BYTE2(t); incr(j); end;
-    if BYTE3(t)<>0 then begin buffer[j]:=BYTE3(t); incr(j); end;
-                              buffer[j]:=BYTE4(t); incr(j);
+    if BYTE1(t)<>0 then begin buffer[j]:=BYTE1(t); buffer2[j]:=1; incr(j); end;
+    if BYTE2(t)<>0 then begin buffer[j]:=BYTE2(t); buffer2[j]:=1; incr(j); end;
+    if BYTE3(t)<>0 then begin buffer[j]:=BYTE3(t); buffer2[j]:=1; incr(j); end;
+                              buffer[j]:=BYTE4(t); buffer2[j]:=1; incr(j);
     p:=link(p);
     end
   else
-    begin buffer[j]:=info(p) mod max_char_val; incr(j); p:=link(p);
+    begin buffer[j]:=info(p) mod max_char_val; buffer2[j]:=0; incr(j); p:=link(p);
     end;
 @z
 
@@ -606,8 +621,8 @@
 p:=temp_head; link(p):=null; k:=b;
 while k<pool_ptr do
   begin t:=so(str_pool[k]);
-  if multistrlen(ustringcast(str_pool), pool_ptr, k)=2 then
-    begin t:=fromBUFF(ustringcast(str_pool), pool_ptr, k); incr(k);
+  if t>=@"100 then
+    begin t:=fromBUFFshort(str_pool, pool_ptr, k); incr(k);
     end
   else if t=" " then t:=space_token
   else t:=other_token+t;
@@ -618,15 +633,15 @@
 begin str_room(1);
 p:=temp_head; link(p):=null; k:=b;
 while k<pool_ptr do
-  begin t:=fromBUFF(ustringcast(str_pool), pool_ptr, k);
-  cc:=kcat_code(kcatcodekey(t));
-  if (multistrlen(ustringcast(str_pool), pool_ptr, k)>1)and
-       check_kcat_code(cc) then
-    begin if (cc=not_cjk) then cc:=other_kchar;
-	  t:=t+cc*max_cjk_val;
-	  k:=k+multistrlen(ustringcast(str_pool), pool_ptr, k)-1;
+  begin  t:=so(str_pool[k]);
+  if t>=@"180 then { there is no |wchar_token| whose code is 0--127. }
+    begin t:=fromBUFFshort(str_pool, pool_ptr, k); cc:=kcat_code(kcatcodekey(t));
+    if (cc=not_cjk) then cc:=other_kchar;
+    t:=t+cc*max_cjk_val;
+    k:=k+multistrlenshort(str_pool, pool_ptr, k)-1;
     end
   else begin t:=so(str_pool[k]);
+    if t>=@"100 then t:=t-@"100;
     if t=" " then t:=space_token
     else t:=other_token+t;
   end;
@@ -709,16 +724,16 @@
 @x
   if (cur_cmd=kanji)or(cur_cmd=kana)or(cur_cmd=other_kchar) then {is kanji}
     begin str_room(2);
-    append_char(Hi(cur_chr)); {kanji upper byte}
-    append_char(Lo(cur_chr)); {kanji lower byte}
+    append_char(@"100+Hi(cur_chr)); {kanji upper byte}
+    append_char(@"100+Lo(cur_chr)); {kanji lower byte}
 @y
   if (cur_cmd>=kanji)and(cur_cmd<=hangul) then {|wchar_token|}
     begin str_room(4); {4 is maximum}
     cur_chr:=toBUFF(cur_chr);
-    if BYTE1(cur_chr)<>0 then append_char(BYTE1(cur_chr));
-    if BYTE2(cur_chr)<>0 then append_char(BYTE2(cur_chr));
-    if BYTE3(cur_chr)<>0 then append_char(BYTE3(cur_chr));
-                              append_char(BYTE4(cur_chr));
+    if BYTE1(cur_chr)<>0 then append_char(@"100+BYTE1(cur_chr));
+    if BYTE2(cur_chr)<>0 then append_char(@"100+BYTE2(cur_chr));
+    if BYTE3(cur_chr)<>0 then append_char(@"100+BYTE3(cur_chr));
+                              append_char(@"100+BYTE4(cur_chr));
 @z
 
 @x
@@ -1220,17 +1235,17 @@
 @x
 procedure print_kanji(@!s:KANJI_code); {prints a single character}
 begin
-if s>255 then
-  begin print_char(Hi(s)); print_char(Lo(s));
+if s>@"FF then
+  begin print_char(@"100+Hi(s)); print_char(@"100+Lo(s));
   end else print_char(s);
 @y
 procedure print_kanji(@!s:KANJI_code); {prints a single character}
 begin
 s:=toBUFF(s mod max_cjk_val);
-if BYTE1(s)<>0 then print_char(BYTE1(s));
-if BYTE2(s)<>0 then print_char(BYTE2(s));
-if BYTE3(s)<>0 then print_char(BYTE3(s));
-                    print_char(BYTE4(s));
+if BYTE1(s)<>0 then print_char(@"100+BYTE1(s));
+if BYTE2(s)<>0 then print_char(@"100+BYTE2(s));
+if BYTE3(s)<>0 then print_char(@"100+BYTE3(s));
+                    print_char(@"100+BYTE4(s));
 end;
 
 function check_kcat_code(@!ct:integer):integer;

Modified: trunk/Build/source/texk/web2c/uptexdir/uptex.defines
===================================================================
--- trunk/Build/source/texk/web2c/uptexdir/uptex.defines	2022-01-22 16:56:44 UTC (rev 61691)
+++ trunk/Build/source/texk/web2c/uptexdir/uptex.defines	2022-01-22 17:03:22 UTC (rev 61692)
@@ -17,8 +17,11 @@
 @define function ismultichr ();
 
 @define function multistrlen ();
+ at define function multistrlenshort ();
+ at define function multistrlenfilename ();
 @define function multibytelen ();
 @define function fromBUFF ();
+ at define function fromBUFFshort ();
 @define function toBUFF ();
 
 @define function fromDVI ();



More information about the tex-live-commits mailing list.