texlive[69798] Build/source/texk/ptexenc: ptexenc: Add function
commits+takuji at tug.org
commits+takuji at tug.org
Sun Feb 11 09:19:53 CET 2024
Revision: 69798
https://tug.org/svn/texlive?view=revision&revision=69798
Author: takuji
Date: 2024-02-11 09:19:52 +0100 (Sun, 11 Feb 2024)
Log Message:
-----------
ptexenc: Add function guessing end-of-line code
Modified Paths:
--------------
trunk/Build/source/texk/ptexenc/ChangeLog
trunk/Build/source/texk/ptexenc/Makefile.am
trunk/Build/source/texk/ptexenc/Makefile.in
trunk/Build/source/texk/ptexenc/c-auto.in
trunk/Build/source/texk/ptexenc/configure
trunk/Build/source/texk/ptexenc/ptekf.c
trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h
trunk/Build/source/texk/ptexenc/ptexenc.c
trunk/Build/source/texk/ptexenc/version.ac
Added Paths:
-----------
trunk/Build/source/texk/ptexenc/tests/ptekf-eol.test
Modified: trunk/Build/source/texk/ptexenc/ChangeLog
===================================================================
--- trunk/Build/source/texk/ptexenc/ChangeLog 2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/ChangeLog 2024-02-11 08:19:52 UTC (rev 69798)
@@ -1,3 +1,13 @@
+2024-02-11 TANAKA Takuji <ttk at t-lab.opal.ne.jp>
+
+ * ptexenc.c, ptexenc/ptexenc.h:
+ Add function guessing end-of-line code.
+ * ptekf.c: Better treatment of end-of-line.
+ * tests/ptekf-eol.test: New tests for end-of-line.
+ * Makefile.am: Adjust.
+ * version.ac: Bump to 1.4.6.
+ https://github.com/texjporg/tex-jp-build/issues/142
+
2024-02-10 TANAKA Takuji <ttk at t-lab.opal.ne.jp>
* ptekf.1: Add command line manual.
Modified: trunk/Build/source/texk/ptexenc/Makefile.am
===================================================================
--- trunk/Build/source/texk/ptexenc/Makefile.am 2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/Makefile.am 2024-02-11 08:19:52 UTC (rev 69798)
@@ -49,9 +49,9 @@
## Tests
#
-TESTS = tests/ptekf-smoke.test tests/ptekf-guess.test tests/ptekf-conv.test
+TESTS = tests/ptekf-smoke.test tests/ptekf-guess.test tests/ptekf-conv.test tests/ptekf-eol.test
#
-tests/ptekf-smoke.log tests/ptekf-guess.log tests/ptekf-conv.log \
+tests/ptekf-smoke.log tests/ptekf-guess.log tests/ptekf-conv.log tests/ptekf-eol.log \
: ptekf$(EXEEXT)
#
EXTRA_DIST += $(TESTS)
@@ -67,7 +67,7 @@
tests/enc-utf8.bib-euc tests/enc-utf8.bib-jis tests/enc-utf8.bib-sjis tests/enc-utf8.bib-utf8 \
tests/enc-utf8a.bib-euc tests/enc-utf8a.bib-jis tests/enc-utf8a.bib-sjis tests/enc-utf8a.bib-utf8 \
tests/enc-utf8b.bib-euc tests/enc-utf8b.bib-jis tests/enc-utf8b.bib-sjis tests/enc-utf8b.bib-utf8
-DISTCLEANFILES = enc-*.bib*
+DISTCLEANFILES = enc-*.bib* saza-*.txt*
# Rebuild
Modified: trunk/Build/source/texk/ptexenc/Makefile.in
===================================================================
--- trunk/Build/source/texk/ptexenc/Makefile.in 2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/Makefile.in 2024-02-11 08:19:52 UTC (rev 69798)
@@ -617,8 +617,8 @@
LDADD = $(KPATHSEA_LIBS)
#
-TESTS = tests/ptekf-smoke.test tests/ptekf-guess.test tests/ptekf-conv.test
-DISTCLEANFILES = enc-*.bib*
+TESTS = tests/ptekf-smoke.test tests/ptekf-guess.test tests/ptekf-conv.test tests/ptekf-eol.test
+DISTCLEANFILES = enc-*.bib* saza-*.txt*
# Rebuild
rebuild_prereq =
@@ -1486,7 +1486,7 @@
@KPATHSEA_RULE@
#
-tests/ptekf-smoke.log tests/ptekf-guess.log tests/ptekf-conv.log \
+tests/ptekf-smoke.log tests/ptekf-guess.log tests/ptekf-conv.log tests/ptekf-eol.log \
: ptekf$(EXEEXT)
rebuild.stamp: $(rebuild_target)
echo timestamp >$@
Modified: trunk/Build/source/texk/ptexenc/c-auto.in
===================================================================
--- trunk/Build/source/texk/ptexenc/c-auto.in 2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/c-auto.in 2024-02-11 08:19:52 UTC (rev 69798)
@@ -6,7 +6,7 @@
#define PTEXENC_C_AUTO_H
/* ptexenc: the version string. */
-#define PTEXENCVERSION "ptexenc version 1.4.5"
+#define PTEXENCVERSION "ptexenc version 1.4.6"
/* Define to 1 if the 'closedir' function returns void instead of int. */
#undef CLOSEDIR_VOID
Modified: trunk/Build/source/texk/ptexenc/configure
===================================================================
--- trunk/Build/source/texk/ptexenc/configure 2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/configure 2024-02-11 08:19:52 UTC (rev 69798)
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.72 for ptexenc 1.4.5.
+# Generated by GNU Autoconf 2.72 for ptexenc 1.4.6.
#
# Report bugs to <tex-k at tug.org>.
#
@@ -614,8 +614,8 @@
# Identity of this package.
PACKAGE_NAME='ptexenc'
PACKAGE_TARNAME='ptexenc'
-PACKAGE_VERSION='1.4.5'
-PACKAGE_STRING='ptexenc 1.4.5'
+PACKAGE_VERSION='1.4.6'
+PACKAGE_STRING='ptexenc 1.4.6'
PACKAGE_BUGREPORT='tex-k at tug.org'
PACKAGE_URL=''
@@ -1363,7 +1363,7 @@
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-'configure' configures ptexenc 1.4.5 to adapt to many kinds of systems.
+'configure' configures ptexenc 1.4.6 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1434,7 +1434,7 @@
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of ptexenc 1.4.5:";;
+ short | recursive ) echo "Configuration of ptexenc 1.4.6:";;
esac
cat <<\_ACEOF
@@ -1555,7 +1555,7 @@
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-ptexenc configure 1.4.5
+ptexenc configure 1.4.6
generated by GNU Autoconf 2.72
Copyright (C) 2023 Free Software Foundation, Inc.
@@ -2097,7 +2097,7 @@
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by ptexenc $as_me 1.4.5, which was
+It was created by ptexenc $as_me 1.4.6, which was
generated by GNU Autoconf 2.72. Invocation command line was
$ $0$ac_configure_args_raw
@@ -2875,10 +2875,10 @@
-PTEXENCVERSION=1.4.5
+PTEXENCVERSION=1.4.6
-PTEXENC_LT_VERSINFO=5:5:4
+PTEXENC_LT_VERSINFO=5:6:4
am__api_version='1.16'
@@ -8664,7 +8664,7 @@
# Define the identity of the package.
PACKAGE='ptexenc'
- VERSION='1.4.5'
+ VERSION='1.4.6'
printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -15005,7 +15005,7 @@
Report bugs to <bug-libtool at gnu.org>."
lt_cl_version="\
-ptexenc config.lt 1.4.5
+ptexenc config.lt 1.4.6
configured by $0, generated by GNU Autoconf 2.72.
Copyright (C) 2011 Free Software Foundation, Inc.
@@ -16777,7 +16777,7 @@
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by ptexenc $as_me 1.4.5, which was
+This file was extended by ptexenc $as_me 1.4.6, which was
generated by GNU Autoconf 2.72. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -16845,7 +16845,7 @@
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
-ptexenc config.status 1.4.5
+ptexenc config.status 1.4.6
configured by $0, generated by GNU Autoconf 2.72,
with options \\"\$ac_cs_config\\"
Modified: trunk/Build/source/texk/ptexenc/ptekf.c
===================================================================
--- trunk/Build/source/texk/ptexenc/ptekf.c 2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/ptekf.c 2024-02-11 08:19:52 UTC (rev 69798)
@@ -45,7 +45,7 @@
{0, 0, 0, 0}
};
-#define MY_VERSION "20240201"
+#define MY_VERSION "20240211"
#define BUG_ADDRESS "issue at texjp.org"
static void show_version(void)
@@ -87,7 +87,7 @@
if ((len = input_line2(fp, (unsigned char *)buff, NULL, 0, size, &c)) == 0
&& c != '\r' && c != '\n') return NULL;
if (c == '\n' || c == '\r') {
- if (len+1 < size) strcat(buff+len, "\n");
+ if (len+1 < size) { buff[len]=(unsigned char)c; buff[len+1]='\0'; }
else ungetc(c, fp);
}
if (c == EOF) return NULL;
@@ -183,7 +183,7 @@
exit(32);
}
if (flg_guess_enc) {
- genc = ptenc_guess_enc(infp, 1);
+ genc = ptenc_guess_enc(infp, 1, 1);
printf("%s: %s\n", infname, genc);
setinfileenc(infp, genc);
free(genc);
@@ -212,6 +212,11 @@
while ((ret = mfgets(buff, BUFFERLEN, infp)) != NULL) {
(*fputs__)(buff, outfp);
}
+ if (ret == NULL && feof(infp)) {
+ c = buff[strlen(buff)-1];
+ if (c != '\n' && c != '\r')
+ (*fputs__)(buff, outfp);
+ }
if (fclose(infp)) {
fprintf(stderr, "ERROR: fail to close input file [%s].", infname);
exit(32);
Modified: trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h
===================================================================
--- trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h 2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/ptexenc/ptexenc.h 2024-02-11 08:19:52 UTC (rev 69798)
@@ -101,7 +101,7 @@
extern PTENCDLL boolean setinfileenc(FILE *fp, const char *str);
extern PTENCDLL boolean setstdinenc(const char *str);
extern PTENCDLL boolean setfileenc(const char *str);
-extern PTENCDLL char *ptenc_guess_enc(FILE *fp, boolean chk_bom);
+extern PTENCDLL char *ptenc_guess_enc(FILE *fp, boolean chk_bom, boolean chk_nl);
#ifdef WIN32
extern PTENCDLL void clear_infile_enc(FILE *fp);
Modified: trunk/Build/source/texk/ptexenc/ptexenc.c
===================================================================
--- trunk/Build/source/texk/ptexenc/ptexenc.c 2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/ptexenc.c 2024-02-11 08:19:52 UTC (rev 69798)
@@ -73,14 +73,14 @@
if (strcasecmp(str, "utf8") == 0) return ENC_UTF8;
if (UPTEX_enabled && strcasecmp(str, "uptex") == 0) return ENC_UPTEX;
- if (strcasecmp(str, "ASCII")== 0) return file_enc;
+ if (strncasecmp(str, "ASCII", 5)== 0) return file_enc;
if (strncasecmp(str, "AMBIGUOUS", 9) == 0) return guess_enc;
- if (strcasecmp(str, "BINARY") == 0) return ENC_JIS;
- if (strcasecmp(str, "ISO-2022-JP") == 0) return ENC_JIS;
- if (strcasecmp(str, "EUC-JP") == 0) return ENC_EUC;
- if (strcasecmp(str, "Shift_JIS") == 0) return ENC_SJIS;
- if (strncasecmp(str, "UTF-8", 5) == 0) return ENC_UTF8;
- if (strcasecmp(str, "ISO-8859") == 0) return ENC_JIS;
+ if (strncasecmp(str, "BINARY", 6) == 0) return ENC_JIS;
+ if (strncasecmp(str, "ISO-2022-JP", 11) == 0) return ENC_JIS;
+ if (strncasecmp(str, "EUC-JP", 6) == 0) return ENC_EUC;
+ if (strncasecmp(str, "Shift_JIS", 9) == 0) return ENC_SJIS;
+ if (strncasecmp(str, "UTF-8", 5) == 0) return ENC_UTF8;
+ if (strncasecmp(str, "ISO-8859", 8) == 0) return ENC_JIS;
return -1; /* error */
}
@@ -820,10 +820,11 @@
JIS X 0208 only and no platform dependent characters in Shift_JIS, EUC-JP
ISO-8859 may have 0xA0..0xFF, may not have 0x80..0x9F
*/
-char *ptenc_guess_enc(FILE *fp, boolean chk_bom)
+char *ptenc_guess_enc(FILE *fp, boolean chk_bom, boolean chk_nl)
{
char *enc;
int k0, k1, k2, cdb[2], cu8[4], len_utf8;
+ int nl0=0, nl_cr=0, nl_lf=0, nl_crlf=0;
int is_ascii=1, lbyte=0;
int maybe_sjis=1, maybe_euc=1, maybe_utf8=1, maybe_iso8859=1, pos_db=0, pos_utf8=0;
int ch_sjis=0, ch_euc=0, ch_utf8=0, ch_iso8859=0, bom=0;
@@ -831,14 +832,23 @@
int i;
unsigned char str0[5];
#endif /* DEBUG */
- enc = xmalloc(sizeof(char)*20);
+ enc = xmalloc(sizeof(char)*32);
while ((k0 = fgetc(fp)) != EOF &&
- (maybe_sjis+maybe_euc+maybe_utf8+maybe_iso8859>1 || pos_db || pos_utf8 || lbyte<200)) {
+ (maybe_sjis+maybe_euc+maybe_utf8+maybe_iso8859>1 || pos_db || pos_utf8
+ || lbyte<320 || k0=='\r')) {
+ if (chk_nl) {
+ if (k0 == '\r') nl_cr++;
+ if (k0 == '\n') {
+ if (nl0 == '\r') { nl_cr--; nl_crlf++; }
+ else { nl_lf++; }
+ }
+ }
if (maybe_iso8859 && maybe_sjis+maybe_euc+maybe_utf8==1 && !pos_db && !pos_utf8
- && ch_iso8859>=2000) {
+ && ch_iso8859>=2000 && k0!='\r') {
break;
}
+ nl0 = k0;
if (chk_bom && lbyte<4 && bom_l[lbyte] <= k0 && k0 <= bom_u[lbyte]) bom++;
lbyte++;
if (k0==ESC) {
@@ -1049,6 +1059,12 @@
else
strcpy(enc,"BINARY");
post_process:
+ if (chk_nl && (nl_cr || nl_lf || nl_crlf)) {
+ if (nl_lf+nl_crlf==0) strcat(enc," (CR)");
+ else if (nl_cr+nl_crlf==0) strcat(enc," (LF)");
+ else if (nl_cr+nl_lf ==0) strcat(enc," (CRLF)");
+ else strcat(enc," (MIXED NL)");
+ }
rewind (fp);
return enc;
}
@@ -1102,7 +1118,7 @@
getc4(fp);
getc4(fp);
rewind(fp);
- enc = ptenc_guess_enc(fp, 0);
+ enc = ptenc_guess_enc(fp, 0, 0);
if (string_to_enc(enc) > 0) {
infile_enc[fd] = string_to_enc(enc);
fprintf(stderr, "(guessed encoding #%d: %s = %s)", fd, enc, enc_to_string(infile_enc[fd]));
Added: trunk/Build/source/texk/ptexenc/tests/ptekf-eol.test
===================================================================
--- trunk/Build/source/texk/ptexenc/tests/ptekf-eol.test (rev 0)
+++ trunk/Build/source/texk/ptexenc/tests/ptekf-eol.test 2024-02-11 08:19:52 UTC (rev 69798)
@@ -0,0 +1,58 @@
+#! /bin/sh -vx
+# $Id$
+# Copyright 2024 Japanese TeX Development Community <issue at texjp.org>
+# You may freely use, modify and/or distribute this file.
+
+# Not really a test, just making sure the program executes.
+
+BinDir=${BinDir:-.}
+ExeExt=${ExeExt:-}
+_ptekf=$BinDir/ptekf$ExeExt
+
+TEXMFCNF=$srcdir/../kpathsea
+web2cdir=$srcdir/../web2c
+
+export TEXMFCNF
+
+rm -f ./saza*.txt*
+
+# "さざ波" -> \343\201\225\343\201\226\346\263\242
+jstr='\343\201\225\343\201\226\346\263\242'
+astr='Ripples'
+cr='\015'
+lf='\012'
+crlf='\015\012'
+
+printf "$astr$cr$jstr$cr$astr $jstr$cr$jstr" > saza-utf8-cr0.txt
+printf "$astr$lf$jstr$lf$astr $jstr$lf$jstr" > saza-utf8-lf0.txt
+printf "$astr$crlf$jstr$crlf$astr $jstr$crlf$jstr" > saza-utf8-crlf0.txt
+printf "$astr$cr$jstr$lf$astr $jstr$crlf$jstr" > saza-utf8-mix.txt
+printf "$astr$cr$jstr$cr$astr $jstr$cr$jstr$cr" > saza-utf8-cr1.txt
+printf "$astr$lf$jstr$lf$astr $jstr$lf$jstr$lf" > saza-utf8-lf1.txt
+printf "$astr$crlf$jstr$crlf$astr $jstr$crlf$jstr$crlf" > saza-utf8-crlf1.txt
+
+for nl in cr0 lf0 crlf0 cr1 lf1 crlf1 mix; do
+
+ $_ptekf --guess saza-utf8-$nl.txt || exit 1
+ $_ptekf -Gj saza-utf8-$nl.txt || exit 2
+ mv saza-utf8-$nl.txt.out saza-utf8-$nl-jis.txt
+ $_ptekf -Ge saza-utf8-$nl.txt || exit 3
+ mv saza-utf8-$nl.txt.out saza-utf8-$nl-euc.txt
+ $_ptekf -Gs saza-utf8-$nl.txt || exit 4
+ mv saza-utf8-$nl.txt.out saza-utf8-$nl-sjis.txt
+
+
+ for enc in jis euc sjis; do
+
+ $_ptekf -Gu saza-utf8-$nl-$enc.txt || exit 21
+ mv saza-utf8-$nl-$enc.txt.out saza-utf8-$nl-$enc-utf8.txt
+ diff saza-utf8-$nl.txt saza-utf8-$nl-$enc-utf8.txt || exit 22
+
+ done
+
+done
+
+$_ptekf --guess saza-utf8-*-jis.txt || exit 51
+$_ptekf --guess saza-utf8-*-euc.txt || exit 52
+$_ptekf --guess saza-utf8-*-sjis.txt || exit 53
+$_ptekf --guess saza-utf8-*-*-utf8.txt || exit 54
Property changes on: trunk/Build/source/texk/ptexenc/tests/ptekf-eol.test
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Modified: trunk/Build/source/texk/ptexenc/version.ac
===================================================================
--- trunk/Build/source/texk/ptexenc/version.ac 2024-02-11 07:01:57 UTC (rev 69797)
+++ trunk/Build/source/texk/ptexenc/version.ac 2024-02-11 08:19:52 UTC (rev 69798)
@@ -11,4 +11,4 @@
dnl see kpathsea/version.ac.
dnl
dnl This file is m4-included from configure.ac.
-m4_define([ptexenc_version], [1.4.5])
+m4_define([ptexenc_version], [1.4.6])
More information about the tex-live-commits
mailing list.