texlive[69798] Build/source/texk/ptexenc: ptexenc: Add function

commits+takuji at tug.org commits+takuji at tug.org
Sun Feb 11 09:19:53 CET 2024


Revision: 69798
          https://tug.org/svn/texlive?view=revision&revision=69798
Author:   takuji
Date:     2024-02-11 09:19:52 +0100 (Sun, 11 Feb 2024)
Log Message:
-----------
ptexenc: Add function guessing end-of-line code

Modified Paths:
--------------
    trunk/Build/source/texk/ptexenc/ChangeLog
    trunk/Build/source/texk/ptexenc/Makefile.am
    trunk/Build/source/texk/ptexenc/Makefile.in
    trunk/Build/source/texk/ptexenc/c-auto.in
    trunk/Build/source/texk/ptexenc/configure
    trunk/Build/source/texk/ptexenc/ptekf.c
    trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h
    trunk/Build/source/texk/ptexenc/ptexenc.c
    trunk/Build/source/texk/ptexenc/version.ac

Added Paths:
-----------
    trunk/Build/source/texk/ptexenc/tests/ptekf-eol.test

Modified: trunk/Build/source/texk/ptexenc/ChangeLog
===================================================================
--- trunk/Build/source/texk/ptexenc/ChangeLog	2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/ChangeLog	2024-02-11 08:19:52 UTC (rev 69798)
@@ -1,3 +1,13 @@
+2024-02-11  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
+
+	* ptexenc.c, ptexenc/ptexenc.h:
+	Add function guessing end-of-line code.
+	* ptekf.c: Better treatment of end-of-line.
+	* tests/ptekf-eol.test: New tests for end-of-line.
+	* Makefile.am: Adjust.
+	* version.ac: Bump to 1.4.6.
+	https://github.com/texjporg/tex-jp-build/issues/142
+
 2024-02-10  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
 
 	* ptekf.1: Add command line manual.

Modified: trunk/Build/source/texk/ptexenc/Makefile.am
===================================================================
--- trunk/Build/source/texk/ptexenc/Makefile.am	2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/Makefile.am	2024-02-11 08:19:52 UTC (rev 69798)
@@ -49,9 +49,9 @@
 
 ## Tests
 #
-TESTS = tests/ptekf-smoke.test tests/ptekf-guess.test  tests/ptekf-conv.test
+TESTS = tests/ptekf-smoke.test tests/ptekf-guess.test tests/ptekf-conv.test tests/ptekf-eol.test
 #
-tests/ptekf-smoke.log tests/ptekf-guess.log tests/ptekf-conv.log \
+tests/ptekf-smoke.log tests/ptekf-guess.log tests/ptekf-conv.log tests/ptekf-eol.log \
   : ptekf$(EXEEXT)
 #
 EXTRA_DIST += $(TESTS)
@@ -67,7 +67,7 @@
   tests/enc-utf8.bib-euc tests/enc-utf8.bib-jis tests/enc-utf8.bib-sjis tests/enc-utf8.bib-utf8 \
   tests/enc-utf8a.bib-euc tests/enc-utf8a.bib-jis tests/enc-utf8a.bib-sjis tests/enc-utf8a.bib-utf8 \
   tests/enc-utf8b.bib-euc tests/enc-utf8b.bib-jis tests/enc-utf8b.bib-sjis tests/enc-utf8b.bib-utf8
-DISTCLEANFILES = enc-*.bib*
+DISTCLEANFILES = enc-*.bib* saza-*.txt*
 
 
 # Rebuild

Modified: trunk/Build/source/texk/ptexenc/Makefile.in
===================================================================
--- trunk/Build/source/texk/ptexenc/Makefile.in	2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/Makefile.in	2024-02-11 08:19:52 UTC (rev 69798)
@@ -617,8 +617,8 @@
 LDADD = $(KPATHSEA_LIBS)
 
 #
-TESTS = tests/ptekf-smoke.test tests/ptekf-guess.test  tests/ptekf-conv.test
-DISTCLEANFILES = enc-*.bib*
+TESTS = tests/ptekf-smoke.test tests/ptekf-guess.test tests/ptekf-conv.test tests/ptekf-eol.test
+DISTCLEANFILES = enc-*.bib* saza-*.txt*
 
 # Rebuild
 rebuild_prereq = 
@@ -1486,7 +1486,7 @@
 
 @KPATHSEA_RULE@
 #
-tests/ptekf-smoke.log tests/ptekf-guess.log tests/ptekf-conv.log \
+tests/ptekf-smoke.log tests/ptekf-guess.log tests/ptekf-conv.log tests/ptekf-eol.log \
   : ptekf$(EXEEXT)
 rebuild.stamp: $(rebuild_target)
 	echo timestamp >$@

Modified: trunk/Build/source/texk/ptexenc/c-auto.in
===================================================================
--- trunk/Build/source/texk/ptexenc/c-auto.in	2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/c-auto.in	2024-02-11 08:19:52 UTC (rev 69798)
@@ -6,7 +6,7 @@
 #define PTEXENC_C_AUTO_H
 
 /* ptexenc: the version string. */
-#define PTEXENCVERSION "ptexenc version 1.4.5"
+#define PTEXENCVERSION "ptexenc version 1.4.6"
 
 /* Define to 1 if the 'closedir' function returns void instead of int. */
 #undef CLOSEDIR_VOID

Modified: trunk/Build/source/texk/ptexenc/configure
===================================================================
--- trunk/Build/source/texk/ptexenc/configure	2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/configure	2024-02-11 08:19:52 UTC (rev 69798)
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.72 for ptexenc 1.4.5.
+# Generated by GNU Autoconf 2.72 for ptexenc 1.4.6.
 #
 # Report bugs to <tex-k at tug.org>.
 #
@@ -614,8 +614,8 @@
 # Identity of this package.
 PACKAGE_NAME='ptexenc'
 PACKAGE_TARNAME='ptexenc'
-PACKAGE_VERSION='1.4.5'
-PACKAGE_STRING='ptexenc 1.4.5'
+PACKAGE_VERSION='1.4.6'
+PACKAGE_STRING='ptexenc 1.4.6'
 PACKAGE_BUGREPORT='tex-k at tug.org'
 PACKAGE_URL=''
 
@@ -1363,7 +1363,7 @@
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-'configure' configures ptexenc 1.4.5 to adapt to many kinds of systems.
+'configure' configures ptexenc 1.4.6 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1434,7 +1434,7 @@
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of ptexenc 1.4.5:";;
+     short | recursive ) echo "Configuration of ptexenc 1.4.6:";;
    esac
   cat <<\_ACEOF
 
@@ -1555,7 +1555,7 @@
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-ptexenc configure 1.4.5
+ptexenc configure 1.4.6
 generated by GNU Autoconf 2.72
 
 Copyright (C) 2023 Free Software Foundation, Inc.
@@ -2097,7 +2097,7 @@
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by ptexenc $as_me 1.4.5, which was
+It was created by ptexenc $as_me 1.4.6, which was
 generated by GNU Autoconf 2.72.  Invocation command line was
 
   $ $0$ac_configure_args_raw
@@ -2875,10 +2875,10 @@
 
 
 
-PTEXENCVERSION=1.4.5
+PTEXENCVERSION=1.4.6
 
 
-PTEXENC_LT_VERSINFO=5:5:4
+PTEXENC_LT_VERSINFO=5:6:4
 
 
 am__api_version='1.16'
@@ -8664,7 +8664,7 @@
 
 # Define the identity of the package.
  PACKAGE='ptexenc'
- VERSION='1.4.5'
+ VERSION='1.4.6'
 
 
 printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -15005,7 +15005,7 @@
 Report bugs to <bug-libtool at gnu.org>."
 
 lt_cl_version="\
-ptexenc config.lt 1.4.5
+ptexenc config.lt 1.4.6
 configured by $0, generated by GNU Autoconf 2.72.
 
 Copyright (C) 2011 Free Software Foundation, Inc.
@@ -16777,7 +16777,7 @@
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by ptexenc $as_me 1.4.5, which was
+This file was extended by ptexenc $as_me 1.4.6, which was
 generated by GNU Autoconf 2.72.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -16845,7 +16845,7 @@
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-ptexenc config.status 1.4.5
+ptexenc config.status 1.4.6
 configured by $0, generated by GNU Autoconf 2.72,
   with options \\"\$ac_cs_config\\"
 

Modified: trunk/Build/source/texk/ptexenc/ptekf.c
===================================================================
--- trunk/Build/source/texk/ptexenc/ptekf.c	2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/ptekf.c	2024-02-11 08:19:52 UTC (rev 69798)
@@ -45,7 +45,7 @@
   {0, 0, 0, 0}
 };
 
-#define MY_VERSION   "20240201"
+#define MY_VERSION   "20240211"
 #define BUG_ADDRESS  "issue at texjp.org"
 
 static void show_version(void)
@@ -87,7 +87,7 @@
   if ((len = input_line2(fp, (unsigned char *)buff, NULL, 0, size, &c)) == 0
       && c != '\r' && c != '\n') return NULL;
   if (c == '\n' || c == '\r') {
-    if (len+1 < size) strcat(buff+len, "\n");
+    if (len+1 < size) { buff[len]=(unsigned char)c; buff[len+1]='\0'; }
     else ungetc(c, fp);
   }
   if (c == EOF) return NULL;
@@ -183,7 +183,7 @@
       exit(32);
     }
     if (flg_guess_enc) {
-      genc = ptenc_guess_enc(infp, 1);
+      genc = ptenc_guess_enc(infp, 1, 1);
       printf("%s: %s\n", infname, genc);
       setinfileenc(infp, genc);
       free(genc);
@@ -212,6 +212,11 @@
       while ((ret = mfgets(buff, BUFFERLEN, infp)) != NULL) {
         (*fputs__)(buff, outfp);
       }
+      if (ret == NULL && feof(infp)) {
+        c = buff[strlen(buff)-1];
+        if (c != '\n' && c != '\r')
+          (*fputs__)(buff, outfp);
+      }
       if (fclose(infp)) {
         fprintf(stderr, "ERROR: fail to close input file [%s].", infname);
         exit(32);

Modified: trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h
===================================================================
--- trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h	2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h	2024-02-11 08:19:52 UTC (rev 69798)
@@ -101,7 +101,7 @@
 extern PTENCDLL boolean setinfileenc(FILE *fp, const char *str);
 extern PTENCDLL boolean setstdinenc(const char *str);
 extern PTENCDLL boolean setfileenc(const char *str);
-extern PTENCDLL char *ptenc_guess_enc(FILE *fp, boolean chk_bom);
+extern PTENCDLL char *ptenc_guess_enc(FILE *fp, boolean chk_bom, boolean chk_nl);
 
 #ifdef WIN32
 extern PTENCDLL void clear_infile_enc(FILE *fp);

Modified: trunk/Build/source/texk/ptexenc/ptexenc.c
===================================================================
--- trunk/Build/source/texk/ptexenc/ptexenc.c	2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/ptexenc.c	2024-02-11 08:19:52 UTC (rev 69798)
@@ -73,14 +73,14 @@
     if (strcasecmp(str, "utf8")   == 0) return ENC_UTF8;
     if (UPTEX_enabled && strcasecmp(str, "uptex")  == 0) return ENC_UPTEX;
 
-    if (strcasecmp(str, "ASCII")== 0)        return file_enc;
+    if (strncasecmp(str, "ASCII", 5)== 0)      return file_enc;
     if (strncasecmp(str, "AMBIGUOUS", 9) == 0) return guess_enc;
-    if (strcasecmp(str, "BINARY") == 0)      return ENC_JIS;
-    if (strcasecmp(str, "ISO-2022-JP") == 0) return ENC_JIS;
-    if (strcasecmp(str, "EUC-JP") == 0)      return ENC_EUC;
-    if (strcasecmp(str, "Shift_JIS")   == 0) return ENC_SJIS;
-    if (strncasecmp(str, "UTF-8", 5)   == 0) return ENC_UTF8;
-    if (strcasecmp(str, "ISO-8859") == 0)    return ENC_JIS;
+    if (strncasecmp(str, "BINARY", 6) == 0)       return ENC_JIS;
+    if (strncasecmp(str, "ISO-2022-JP", 11) == 0) return ENC_JIS;
+    if (strncasecmp(str, "EUC-JP", 6) == 0)       return ENC_EUC;
+    if (strncasecmp(str, "Shift_JIS", 9)   == 0)  return ENC_SJIS;
+    if (strncasecmp(str, "UTF-8", 5)   == 0)      return ENC_UTF8;
+    if (strncasecmp(str, "ISO-8859", 8) == 0)     return ENC_JIS;
     return -1; /* error */
 }
 
@@ -820,10 +820,11 @@
       JIS X 0208 only and no platform dependent characters in Shift_JIS, EUC-JP
       ISO-8859 may have 0xA0..0xFF, may not have 0x80..0x9F
 */
-char *ptenc_guess_enc(FILE *fp, boolean chk_bom)
+char *ptenc_guess_enc(FILE *fp, boolean chk_bom, boolean chk_nl)
 {
     char *enc;
     int k0, k1, k2, cdb[2], cu8[4], len_utf8;
+    int nl0=0, nl_cr=0, nl_lf=0, nl_crlf=0;
     int is_ascii=1, lbyte=0;
     int maybe_sjis=1, maybe_euc=1, maybe_utf8=1, maybe_iso8859=1, pos_db=0, pos_utf8=0;
     int ch_sjis=0, ch_euc=0, ch_utf8=0, ch_iso8859=0, bom=0;
@@ -831,14 +832,23 @@
     int i;
     unsigned char str0[5];
 #endif /* DEBUG */
-    enc = xmalloc(sizeof(char)*20);
+    enc = xmalloc(sizeof(char)*32);
 
     while ((k0 = fgetc(fp)) != EOF &&
-           (maybe_sjis+maybe_euc+maybe_utf8+maybe_iso8859>1 || pos_db || pos_utf8 || lbyte<200)) {
+           (maybe_sjis+maybe_euc+maybe_utf8+maybe_iso8859>1 || pos_db || pos_utf8
+            || lbyte<320 || k0=='\r')) {
+        if (chk_nl) {
+            if (k0 == '\r') nl_cr++;
+            if (k0 == '\n') {
+                if (nl0 == '\r') { nl_cr--;  nl_crlf++; }
+                else             { nl_lf++;             }
+            }
+        }
         if (maybe_iso8859 && maybe_sjis+maybe_euc+maybe_utf8==1 && !pos_db && !pos_utf8
-            && ch_iso8859>=2000) {
+            && ch_iso8859>=2000 && k0!='\r') {
             break;
         }
+        nl0 = k0;
         if (chk_bom && lbyte<4 && bom_l[lbyte] <= k0 && k0 <= bom_u[lbyte]) bom++;
         lbyte++;
         if (k0==ESC) {
@@ -1049,6 +1059,12 @@
     else
         strcpy(enc,"BINARY");
   post_process:
+    if (chk_nl && (nl_cr || nl_lf || nl_crlf)) {
+        if      (nl_lf+nl_crlf==0) strcat(enc," (CR)");
+        else if (nl_cr+nl_crlf==0) strcat(enc," (LF)");
+        else if (nl_cr+nl_lf  ==0) strcat(enc," (CRLF)");
+        else                       strcat(enc," (MIXED NL)");
+    }
     rewind (fp);
     return enc;
 }
@@ -1102,7 +1118,7 @@
             getc4(fp);
             getc4(fp);
             rewind(fp);
-            enc = ptenc_guess_enc(fp, 0);
+            enc = ptenc_guess_enc(fp, 0, 0);
             if (string_to_enc(enc) > 0) {
                 infile_enc[fd] = string_to_enc(enc);
                 fprintf(stderr, "(guessed encoding #%d: %s = %s)", fd, enc, enc_to_string(infile_enc[fd]));

Added: trunk/Build/source/texk/ptexenc/tests/ptekf-eol.test
===================================================================
--- trunk/Build/source/texk/ptexenc/tests/ptekf-eol.test	                        (rev 0)
+++ trunk/Build/source/texk/ptexenc/tests/ptekf-eol.test	2024-02-11 08:19:52 UTC (rev 69798)
@@ -0,0 +1,58 @@
+#! /bin/sh -vx
+# $Id$
+# Copyright 2024 Japanese TeX Development Community <issue at texjp.org>
+# You may freely use, modify and/or distribute this file.
+
+# Not really a test, just making sure the program executes.
+
+BinDir=${BinDir:-.}
+ExeExt=${ExeExt:-}
+_ptekf=$BinDir/ptekf$ExeExt
+
+TEXMFCNF=$srcdir/../kpathsea
+web2cdir=$srcdir/../web2c
+
+export TEXMFCNF
+
+rm -f ./saza*.txt*
+
+#   "さざ波" -> \343\201\225\343\201\226\346\263\242
+jstr='\343\201\225\343\201\226\346\263\242'
+astr='Ripples'
+cr='\015'
+lf='\012'
+crlf='\015\012'
+
+printf "$astr$cr$jstr$cr$astr $jstr$cr$jstr" > saza-utf8-cr0.txt
+printf "$astr$lf$jstr$lf$astr $jstr$lf$jstr" > saza-utf8-lf0.txt
+printf "$astr$crlf$jstr$crlf$astr $jstr$crlf$jstr" > saza-utf8-crlf0.txt
+printf "$astr$cr$jstr$lf$astr $jstr$crlf$jstr" > saza-utf8-mix.txt
+printf "$astr$cr$jstr$cr$astr $jstr$cr$jstr$cr" > saza-utf8-cr1.txt
+printf "$astr$lf$jstr$lf$astr $jstr$lf$jstr$lf" > saza-utf8-lf1.txt
+printf "$astr$crlf$jstr$crlf$astr $jstr$crlf$jstr$crlf" > saza-utf8-crlf1.txt
+
+for nl in cr0 lf0 crlf0 cr1 lf1 crlf1 mix; do
+
+  $_ptekf --guess saza-utf8-$nl.txt || exit 1
+  $_ptekf -Gj saza-utf8-$nl.txt || exit 2
+  mv saza-utf8-$nl.txt.out saza-utf8-$nl-jis.txt
+  $_ptekf -Ge saza-utf8-$nl.txt || exit 3
+  mv saza-utf8-$nl.txt.out saza-utf8-$nl-euc.txt
+  $_ptekf -Gs saza-utf8-$nl.txt || exit 4
+  mv saza-utf8-$nl.txt.out saza-utf8-$nl-sjis.txt
+
+
+  for enc in jis euc sjis; do
+
+    $_ptekf -Gu saza-utf8-$nl-$enc.txt || exit 21
+    mv saza-utf8-$nl-$enc.txt.out saza-utf8-$nl-$enc-utf8.txt
+    diff saza-utf8-$nl.txt saza-utf8-$nl-$enc-utf8.txt || exit 22
+
+  done
+
+done
+
+$_ptekf --guess saza-utf8-*-jis.txt || exit 51
+$_ptekf --guess saza-utf8-*-euc.txt || exit 52
+$_ptekf --guess saza-utf8-*-sjis.txt || exit 53
+$_ptekf --guess saza-utf8-*-*-utf8.txt || exit 54


Property changes on: trunk/Build/source/texk/ptexenc/tests/ptekf-eol.test
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Modified: trunk/Build/source/texk/ptexenc/version.ac
===================================================================
--- trunk/Build/source/texk/ptexenc/version.ac	2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/version.ac	2024-02-11 08:19:52 UTC (rev 69798)
@@ -11,4 +11,4 @@
 dnl see kpathsea/version.ac.
 dnl
 dnl This file is m4-included from configure.ac.
-m4_define([ptexenc_version], [1.4.5])
+m4_define([ptexenc_version], [1.4.6])



More information about the tex-live-commits mailing list.