texlive[63558] Build/source/texk/mendexk: mendex: support guessing

commits+takuji at tug.org commits+takuji at tug.org
Sun Jun 12 10:18:31 CEST 2022


Revision: 63558
          http://tug.org/svn/texlive?view=revision&revision=63558
Author:   takuji
Date:     2022-06-12 10:18:31 +0200 (Sun, 12 Jun 2022)
Log Message:
-----------
mendex: support guessing input file encodings

Modified Paths:
--------------
    trunk/Build/source/texk/mendexk/ChangeLog
    trunk/Build/source/texk/mendexk/Makefile.am
    trunk/Build/source/texk/mendexk/Makefile.in
    trunk/Build/source/texk/mendexk/main.c
    trunk/Build/source/texk/mendexk/tests/mendex.test
    trunk/Build/source/texk/mendexk/version.h

Added Paths:
-----------
    trunk/Build/source/texk/mendexk/tests/euc.ist
    trunk/Build/source/texk/mendexk/tests/sjis.ist

Modified: trunk/Build/source/texk/mendexk/ChangeLog
===================================================================
--- trunk/Build/source/texk/mendexk/ChangeLog	2022-06-12 08:18:23 UTC (rev 63557)
+++ trunk/Build/source/texk/mendexk/ChangeLog	2022-06-12 08:18:31 UTC (rev 63558)
@@ -1,3 +1,12 @@
+2022-06-12  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
+
+	* main.c:
+	Support guessing input file encodings.
+	https://github.com/texjporg/tex-jp-build/issues/142
+	* tests/mendex.test, tests/{sjis,euc}.ist, Makefile.am:
+	Update tests.
+	* version.h: Date [12-Jun-2022]
+
 2022-05-22  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
 
 	* fread.c:

Modified: trunk/Build/source/texk/mendexk/Makefile.am
===================================================================
--- trunk/Build/source/texk/mendexk/Makefile.am	2022-06-12 08:18:23 UTC (rev 63557)
+++ trunk/Build/source/texk/mendexk/Makefile.am	2022-06-12 08:18:31 UTC (rev 63558)
@@ -59,6 +59,7 @@
 EXTRA_DIST += tests/foo.idx tests/foo.ind \
 	tests/uni.idx tests/uni.dict tests/uni.ist \
 	tests/uni1.ist tests/uni2.ist tests/uni.ind \
+	tests/sjis.ist tests/euc.ist \
 	tests/range.idx tests/range2.ist tests/range2.ist tests/range3.ist \
 	tests/range1.ind tests/range2.ind tests/range3.ind \
 	tests/pprec0.ist tests/pprec1.ist tests/pprec2.ist tests/pprec3.ist \

Modified: trunk/Build/source/texk/mendexk/Makefile.in
===================================================================
--- trunk/Build/source/texk/mendexk/Makefile.in	2022-06-12 08:18:23 UTC (rev 63557)
+++ trunk/Build/source/texk/mendexk/Makefile.in	2022-06-12 08:18:31 UTC (rev 63558)
@@ -582,19 +582,20 @@
 EXTRA_DIST = COPYRIGHT COPYRIGHT.ja ChangeLog.ja indexsty \
 	mendex.ja.txt mendex.1 mendex.1.ja $(TESTS) tests/foo.idx \
 	tests/foo.ind tests/uni.idx tests/uni.dict tests/uni.ist \
-	tests/uni1.ist tests/uni2.ist tests/uni.ind tests/range.idx \
-	tests/range2.ist tests/range2.ist tests/range3.ist \
-	tests/range1.ind tests/range2.ind tests/range3.ind \
-	tests/pprec0.ist tests/pprec1.ist tests/pprec2.ist \
-	tests/pprec3.ist tests/pprec4.ist tests/pprec5.ist \
-	tests/pprec6.ist tests/pprec7.ist tests/pprecA.idx \
-	tests/pprecA-0.ind tests/pprecA-1.ind tests/pprecA-2.ind \
-	tests/pprecB.idx tests/pprecB-0.ind tests/pprecB-3.ind \
-	tests/pprecB-4.ind tests/romalpA.idx tests/romalpA-5.ind \
-	tests/romalpA-6.ind tests/romalpB.idx tests/romalpB-5.ind \
-	tests/romalpB-6.ind tests/romalpB-7.ind tests/romalpC.idx \
-	tests/romalpC-5.ind tests/romalpD.idx tests/romalpD-5.ind \
-	tests/romalpD-6.ind tests/romalpD-7.ind
+	tests/uni1.ist tests/uni2.ist tests/uni.ind tests/sjis.ist \
+	tests/euc.ist tests/range.idx tests/range2.ist \
+	tests/range2.ist tests/range3.ist tests/range1.ind \
+	tests/range2.ind tests/range3.ind tests/pprec0.ist \
+	tests/pprec1.ist tests/pprec2.ist tests/pprec3.ist \
+	tests/pprec4.ist tests/pprec5.ist tests/pprec6.ist \
+	tests/pprec7.ist tests/pprecA.idx tests/pprecA-0.ind \
+	tests/pprecA-1.ind tests/pprecA-2.ind tests/pprecB.idx \
+	tests/pprecB-0.ind tests/pprecB-3.ind tests/pprecB-4.ind \
+	tests/romalpA.idx tests/romalpA-5.ind tests/romalpA-6.ind \
+	tests/romalpB.idx tests/romalpB-5.ind tests/romalpB-6.ind \
+	tests/romalpB-7.ind tests/romalpC.idx tests/romalpC-5.ind \
+	tests/romalpD.idx tests/romalpD-5.ind tests/romalpD-6.ind \
+	tests/romalpD-7.ind
 TESTS = tests/mendex.test
 DISTCLEANFILES = foo.* uni.* range*.* pprec*.* romalp*.*
 all: c-auto.h

Modified: trunk/Build/source/texk/mendexk/main.c
===================================================================
--- trunk/Build/source/texk/mendexk/main.c	2022-06-12 08:18:23 UTC (rev 63557)
+++ trunk/Build/source/texk/mendexk/main.c	2022-06-12 08:18:31 UTC (rev 63558)
@@ -40,7 +40,6 @@
 			fprintf (stderr, "Ignoring bad kanji encoding \"%s\".\n", p);
 	}
 
-#ifdef WIN32
 	p = kpse_var_value ("guess_input_kanji_encoding");
 	if (p) {
 		if (*p == '1' || *p == 'y' || *p == 't')
@@ -47,7 +46,6 @@
 			infile_enc_auto = 1;
 		free(p);
 	}
-#endif
 
 	kp_ist.var_name = "INDEXSTYLE";
 	kp_ist.path = DEFAULT_INDEXSTYLES; /* default path. */
@@ -189,6 +187,8 @@
 
 			case '-':
 				if (strlen(argv[i])==2) chkopt=0;
+				if (strcmp(argv[i],"--guess-input-enc"   )==0) infile_enc_auto=1;
+				if (strcmp(argv[i],"--no-guess-input-enc")==0) infile_enc_auto=0;
 				if (strcmp(argv[i],"--help")!=0) break;
 
 			default:
@@ -195,7 +195,7 @@
 				fprintf(stderr,"mendex - Japanese index processor, %s (%s) (%s).\n",VERSION, get_enc_string(), TL_VERSION);
 				fprintf(stderr," Copyright 2009 ASCII MEDIA WORKS, 2017-2022 Japanese TeX Development Community\n");
 				fprintf(stderr,"usage:\n");
-				fprintf(stderr,"%% mendex [-ilqrcgfEJSU] [-s sty] [-d dic] [-o ind] [-t log] [-p no] [-I enc] [--] [idx0 idx1 ...]\n");
+				fprintf(stderr,"%% mendex [-ilqrcgfEJSU] [-s sty] [-d dic] [-o ind] [-t log] [-p no] [-I enc] [--[no-]guess-input-enc] [--] [idx0 idx1 ...]\n");
 				fprintf(stderr,"options:\n");
 				fprintf(stderr,"-i      use stdin as the input file.\n");
 				fprintf(stderr,"-l      use letter ordering.\n");
@@ -215,6 +215,7 @@
 				fprintf(stderr,"-S      ShiftJIS mode.\n");
 				fprintf(stderr,"-U      UTF-8 mode.\n");
 				fprintf(stderr,"-I enc  internal encoding for keywords (enc: euc or utf8).\n");
+				fprintf(stderr,"--[no-]guess-input-enc  disable/enable to guess input file encoding.\n");
 				fprintf(stderr,"idx...  input files.\n");
 				fprintf(stderr,"\nEmail bug reports to %s.\n", BUG_ADDRESS);
 				exit(0);

Added: trunk/Build/source/texk/mendexk/tests/euc.ist
===================================================================
--- trunk/Build/source/texk/mendexk/tests/euc.ist	                        (rev 0)
+++ trunk/Build/source/texk/mendexk/tests/euc.ist	2022-06-12 08:18:31 UTC (rev 63558)
@@ -0,0 +1,10 @@
+% -*- coding: euc-jp -*-
+
+delim_0 "\\leaders\\hbox{$\\cdot$}\\hfill "
+delim_1 "\\leaders\\hbox{$\\cdot$}\\hfill "
+delim_2 "\\leaders\\hbox{$\\cdot$}\\hfill "
+
+lethead_flag 1
+lethead_prefix "\n\\centerline{\\bfseries \xA2\xA1\xA2\xA1\xA2\xA1 "
+lethead_suffix  " \xA2\xA1\xA2\xA1\xA2\xA1}\\par\\nobreak"
+

Modified: trunk/Build/source/texk/mendexk/tests/mendex.test
===================================================================
--- trunk/Build/source/texk/mendexk/tests/mendex.test	2022-06-12 08:18:23 UTC (rev 63557)
+++ trunk/Build/source/texk/mendexk/tests/mendex.test	2022-06-12 08:18:31 UTC (rev 63558)
@@ -7,7 +7,7 @@
 TEXMFCNF=$srcdir/../kpathsea
 export TEXMFCNF
 
-rm -f foo.* uni.* range*.* pprec*.* romalp*.*
+rm -f foo.* uni.* range*.* pprec*.* romalp*.* sjis.* euc.*
 rc=0
 
 ./mendex $srcdir/tests/foo.idx -o foo.ind1 -t foo.ilg1 \
@@ -29,7 +29,17 @@
 	$srcdir/tests/uni.idx -o uni.ind3 -t uni.ilg3 \
 	&& diff $srcdir/tests/uni.ind uni.ind3 || rc=5
 
+cat $srcdir/tests/uni.idx | \
+	guess_input_kanji_encoding=1 ./mendex -U -d $srcdir/tests/uni.dict -s $srcdir/tests/sjis.ist \
+	>sjis.ind1 2>sjis.ilg1 \
+	&& diff $srcdir/tests/uni.ind sjis.ind1 || rc=20
 
+cat $srcdir/tests/uni.idx | \
+	./mendex -U -d $srcdir/tests/uni.dict -s $srcdir/tests/euc.ist --guess-input-enc \
+	>euc.ind1 2>euc.ilg1 \
+	&& diff $srcdir/tests/uni.ind euc.ind1 || rc=21
+
+
 # test for range suffix_2p, suffix_3p, suffix_mp
 for num in 1 2 3; do
 ./mendex -s $srcdir/tests/range$num.ist $srcdir/tests/range.idx \

Added: trunk/Build/source/texk/mendexk/tests/sjis.ist
===================================================================
--- trunk/Build/source/texk/mendexk/tests/sjis.ist	                        (rev 0)
+++ trunk/Build/source/texk/mendexk/tests/sjis.ist	2022-06-12 08:18:31 UTC (rev 63558)
@@ -0,0 +1,10 @@
+% -*- coding: shift_jis -*-
+
+delim_0 "\\leaders\\hbox{$\\cdot$}\\hfill "
+delim_1 "\\leaders\\hbox{$\\cdot$}\\hfill "
+delim_2 "\\leaders\\hbox{$\\cdot$}\\hfill "
+
+lethead_flag 1
+lethead_prefix "\n\\centerline{\\bfseries \x81\x9F\x81\x9F\x81\x9F "
+lethead_suffix  " \x81\x9F\x81\x9F\x81\x9F}\\par\\nobreak"
+

Modified: trunk/Build/source/texk/mendexk/version.h
===================================================================
--- trunk/Build/source/texk/mendexk/version.h	2022-06-12 08:18:23 UTC (rev 63557)
+++ trunk/Build/source/texk/mendexk/version.h	2022-06-12 08:18:31 UTC (rev 63558)
@@ -1,2 +1,2 @@
-#define VERSION  "version " PACKAGE_VERSION " [22-May-2022]"
+#define VERSION  "version " PACKAGE_VERSION " [12-Jun-2022]"
 #define BUG_ADDRESS "issue at texjp.org"



More information about the tex-live-commits mailing list.