texlive[60191] Build/source/texk/upmendex: upmendex 0.58: support

commits+takuji at tug.org commits+takuji at tug.org
Sun Aug 8 05:26:19 CEST 2021


Revision: 60191
          http://tug.org/svn/texlive?view=revision&revision=60191
Author:   takuji
Date:     2021-08-08 05:26:18 +0200 (Sun, 08 Aug 2021)
Log Message:
-----------
upmendex 0.58: support Devanagari, Thai scripts

Modified Paths:
--------------
    trunk/Build/source/texk/upmendex/ChangeLog
    trunk/Build/source/texk/upmendex/configure
    trunk/Build/source/texk/upmendex/configure.ac
    trunk/Build/source/texk/upmendex/convert.c
    trunk/Build/source/texk/upmendex/exkana.h
    trunk/Build/source/texk/upmendex/exvar.h
    trunk/Build/source/texk/upmendex/fwrite.c
    trunk/Build/source/texk/upmendex/kana.h
    trunk/Build/source/texk/upmendex/main.c
    trunk/Build/source/texk/upmendex/mendex.h
    trunk/Build/source/texk/upmendex/sort.c
    trunk/Build/source/texk/upmendex/styfile.c
    trunk/Build/source/texk/upmendex/upmendex.ja.txt
    trunk/Build/source/texk/upmendex/var.h

Modified: trunk/Build/source/texk/upmendex/ChangeLog
===================================================================
--- trunk/Build/source/texk/upmendex/ChangeLog	2021-08-08 00:53:29 UTC (rev 60190)
+++ trunk/Build/source/texk/upmendex/ChangeLog	2021-08-08 03:26:18 UTC (rev 60191)
@@ -1,3 +1,16 @@
+2021-08-08  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
+
+	* version 0.58  Beta version.
+	* configure.ac: Bump version.
+	* main.c, convert.c, fwrite.c, sort.c, styfile.c, mendex.h, {,ex}kana.h, {,ex}var.h:
+	Supports Devanagari, Thai scripts (experimental).
+	Add keywords "devanagari_head", "thai_head" to style file.
+	Add a keyword "hangul_head" to style file, let "tumumja" deprecated.
+	* styfile.c:
+	Add an option "numeric-ordering" to the keyword "icu_attributes" in style file.
+	* upmendex.ja.txt:
+	Update document.
+
 2021-06-19  Karl Berry  <karl at freefriends.org>
 
 	* configure.ac: pthread doc update; also needed in bibtexu (q.v.).

Modified: trunk/Build/source/texk/upmendex/configure
===================================================================
--- trunk/Build/source/texk/upmendex/configure	2021-08-08 00:53:29 UTC (rev 60190)
+++ trunk/Build/source/texk/upmendex/configure	2021-08-08 03:26:18 UTC (rev 60191)
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for upmendex (TeX Live) 0.57.
+# Generated by GNU Autoconf 2.71 for upmendex (TeX Live) 0.58.
 #
 #
 # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -626,8 +626,8 @@
 # Identity of this package.
 PACKAGE_NAME='upmendex (TeX Live)'
 PACKAGE_TARNAME='upmendex--tex-live-'
-PACKAGE_VERSION='0.57'
-PACKAGE_STRING='upmendex (TeX Live) 0.57'
+PACKAGE_VERSION='0.58'
+PACKAGE_STRING='upmendex (TeX Live) 0.58'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
@@ -1390,7 +1390,7 @@
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures upmendex (TeX Live) 0.57 to adapt to many kinds of systems.
+\`configure' configures upmendex (TeX Live) 0.58 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1462,7 +1462,7 @@
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of upmendex (TeX Live) 0.57:";;
+     short | recursive ) echo "Configuration of upmendex (TeX Live) 0.58:";;
    esac
   cat <<\_ACEOF
 
@@ -1587,7 +1587,7 @@
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-upmendex (TeX Live) configure 0.57
+upmendex (TeX Live) configure 0.58
 generated by GNU Autoconf 2.71
 
 Copyright (C) 2021 Free Software Foundation, Inc.
@@ -2268,7 +2268,7 @@
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by upmendex (TeX Live) $as_me 0.57, which was
+It was created by upmendex (TeX Live) $as_me 0.58, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   $ $0$ac_configure_args_raw
@@ -8806,7 +8806,7 @@
 
 # Define the identity of the package.
  PACKAGE='upmendex--tex-live-'
- VERSION='0.57'
+ VERSION='0.58'
 
 
 # Some tools Automake needs.
@@ -18942,7 +18942,7 @@
 Report bugs to <bug-libtool at gnu.org>."
 
 lt_cl_version="\
-upmendex (TeX Live) config.lt 0.57
+upmendex (TeX Live) config.lt 0.58
 configured by $0, generated by GNU Autoconf 2.71.
 
 Copyright (C) 2011 Free Software Foundation, Inc.
@@ -21114,7 +21114,7 @@
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by upmendex (TeX Live) $as_me 0.57, which was
+This file was extended by upmendex (TeX Live) $as_me 0.58, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -21182,7 +21182,7 @@
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-upmendex (TeX Live) config.status 0.57
+upmendex (TeX Live) config.status 0.58
 configured by $0, generated by GNU Autoconf 2.71,
   with options \\"\$ac_cs_config\\"
 

Modified: trunk/Build/source/texk/upmendex/configure.ac
===================================================================
--- trunk/Build/source/texk/upmendex/configure.ac	2021-08-08 00:53:29 UTC (rev 60190)
+++ trunk/Build/source/texk/upmendex/configure.ac	2021-08-08 03:26:18 UTC (rev 60191)
@@ -8,7 +8,7 @@
 dnl   gives unlimited permission to copy and/or distribute it,
 dnl   with or without modifications, as long as this notice is preserved.
 dnl
-AC_INIT([upmendex (TeX Live)], [0.57])
+AC_INIT([upmendex (TeX Live)], [0.58])
 AC_PREREQ([2.63])
 AC_CONFIG_SRCDIR([main.c])
 AC_CONFIG_AUX_DIR([../../build-aux])

Modified: trunk/Build/source/texk/upmendex/convert.c
===================================================================
--- trunk/Build/source/texk/upmendex/convert.c	2021-08-08 00:53:29 UTC (rev 60190)
+++ trunk/Build/source/texk/upmendex/convert.c	2021-08-08 03:26:18 UTC (rev 60191)
@@ -218,6 +218,7 @@
 
 			else if (is_latin(buff3)||is_cyrillic(buff3)||is_greek(buff3)
 				 ||is_jpn_kana(buff3)||is_kor_hngl(buff3)||is_zhuyin(buff3)
+				 ||is_numeric(buff3)||is_devanagari(buff3)||is_thai(buff3)
 					||is_comb_diacritical_mark(buff3)) {
 				buff2[j]=buff3[0];
 				if (wclen==2) buff2[j+1]=buff3[1];

Modified: trunk/Build/source/texk/upmendex/exkana.h
===================================================================
--- trunk/Build/source/texk/upmendex/exkana.h	2021-08-08 00:53:29 UTC (rev 60190)
+++ trunk/Build/source/texk/upmendex/exkana.h	2021-08-08 03:26:18 UTC (rev 60191)
@@ -6,8 +6,6 @@
 extern UChar extkanatable[];
 extern UChar circkanatable[];
 
-extern UChar GANADA[];
-
 #define SPACE    0x3000    /* 全角スペース */
 #define ALPHATOP 0xff01    /* ! */
 #define ALPHAEND 0xff5a    /* z */

Modified: trunk/Build/source/texk/upmendex/exvar.h
===================================================================
--- trunk/Build/source/texk/upmendex/exvar.h	2021-08-08 00:53:29 UTC (rev 60190)
+++ trunk/Build/source/texk/upmendex/exvar.h	2021-08-08 03:26:18 UTC (rev 60191)
@@ -31,7 +31,8 @@
 extern char numhead_positive[],numhead_negative[];
 extern int symbol_flag;
 extern int letter_head;
-extern UChar atama[],tumunja[],hanzi_head[],kana_head[];
+extern UChar atama[],hangul_head[],hanzi_head[],kana_head[];
+extern UChar devanagari_head[],thai_head[];
 extern char page_compositor[],page_precedence[];
 extern char character_order[];
 extern char icu_locale[],icu_rules[];

Modified: trunk/Build/source/texk/upmendex/fwrite.c
===================================================================
--- trunk/Build/source/texk/upmendex/fwrite.c	2021-08-08 00:53:29 UTC (rev 60190)
+++ trunk/Build/source/texk/upmendex/fwrite.c	2021-08-08 03:26:18 UTC (rev 60191)
@@ -150,7 +150,7 @@
 /*   write ind file   */
 void indwrite(char *filename, struct index *ind, int pagenum)
 {
-	int i,j,hpoint=0,tpoint=0;
+	int i,j,hpoint=0,tpoint=0,ipoint=0,jpoint=0;
 	char lbuff[BUFFERLEN],obuff[BUFFERLEN];
 	UChar datama[256],initial[INITIALLENGTH],initial_prev[INITIALLENGTH];
 	int chset,chset_prev;
@@ -213,26 +213,72 @@
 			else if (chset==CH_HANGUL) {
 				if (lethead_flag!=0) {
 					fputs(lethead_prefix,fp);
-					for (j=tpoint;j<(u_strlen(tumunja));j++) {
-						if (initial_cmp_char(initial,tumunja[j])) {
-							fprint_uchar(fp,&tumunja[j-1],M_NONE,1);
+					for (j=tpoint;j<(u_strlen(hangul_head));j++) {
+						if (initial_cmp_char(initial,hangul_head[j])) {
+							fprint_uchar(fp,&hangul_head[j-1],M_NONE,1);
 							tpoint=j;
 							break;
 						}
 					}
-					if (j==(u_strlen(tumunja))) {
-						fprint_uchar(fp,&tumunja[j-1],M_NONE,1);
+					if (j==(u_strlen(hangul_head))) {
+						fprint_uchar(fp,&hangul_head[j-1],M_NONE,1);
 					}
 					fputs(lethead_suffix,fp);
 				}
 				widechar_to_multibyte(obuff,BUFFERLEN,ind[i].idx[0]);
 				SPRINTF(lbuff,"%s%s",item_0,obuff);
-				for (tpoint=0;tpoint<(u_strlen(tumunja));tpoint++) {
-					if (initial_cmp_char(initial,tumunja[tpoint])) {
+				for (tpoint=0;tpoint<(u_strlen(hangul_head));tpoint++) {
+					if (initial_cmp_char(initial,hangul_head[tpoint])) {
 						break;
 					}
 				}
 			}
+			else if (chset==CH_DEVANAGARI) {
+				if (lethead_flag!=0) {
+					fputs(lethead_prefix,fp);
+					for (j=jpoint;j<(u_strlen(devanagari_head));j++) {
+						if (initial_cmp_char(initial,devanagari_head[j])) {
+							fprint_uchar(fp,&devanagari_head[j-1],M_NONE,1);
+							jpoint=j;
+							break;
+						}
+					}
+					if (j==(u_strlen(devanagari_head))) {
+						fprint_uchar(fp,&devanagari_head[j-1],M_NONE,1);
+					}
+					fputs(lethead_suffix,fp);
+				}
+				widechar_to_multibyte(obuff,BUFFERLEN,ind[i].idx[0]);
+				SPRINTF(lbuff,"%s%s",item_0,obuff);
+				for (jpoint=0;jpoint<(u_strlen(devanagari_head));jpoint++) {
+					if (initial_cmp_char(initial,devanagari_head[jpoint])) {
+						break;
+					}
+				}
+			}
+			else if (chset==CH_THAI) {
+				if (lethead_flag!=0) {
+					fputs(lethead_prefix,fp);
+					for (j=ipoint;j<(u_strlen(thai_head));j++) {
+						if (initial_cmp_char(initial,thai_head[j])) {
+							fprint_uchar(fp,&thai_head[j-1],M_NONE,1);
+							ipoint=j;
+							break;
+						}
+					}
+					if (j==(u_strlen(thai_head))) {
+						fprint_uchar(fp,&thai_head[j-1],M_NONE,1);
+					}
+					fputs(lethead_suffix,fp);
+				}
+				widechar_to_multibyte(obuff,BUFFERLEN,ind[i].idx[0]);
+				SPRINTF(lbuff,"%s%s",item_0,obuff);
+				for (ipoint=0;ipoint<(u_strlen(thai_head));ipoint++) {
+					if (initial_cmp_char(initial,thai_head[ipoint])) {
+						break;
+					}
+				}
+			}
 			else {
 				if (lethead_flag!=0) {
 					if (symbol_flag && strlen(symbol)) {
@@ -304,8 +350,8 @@
 				}
 			}
 			else if (chset==CH_HANGUL) {
-				for (j=tpoint;j<(u_strlen(tumunja));j++) {
-					if (initial_cmp_char(initial,tumunja[j])) {
+				for (j=tpoint;j<(u_strlen(hangul_head));j++) {
+					if (initial_cmp_char(initial,hangul_head[j])) {
 						break;
 					}
 				}
@@ -314,13 +360,45 @@
 					fputs(group_skip,fp);
 					if (lethead_flag!=0) {
 						fputs(lethead_prefix,fp);
-						fprint_uchar(fp,&tumunja[j-1],M_NONE,1);
+						fprint_uchar(fp,&hangul_head[j-1],M_NONE,1);
 						fputs(lethead_suffix,fp);
 					}
 				}
 			}
+			else if (chset==CH_DEVANAGARI) {
+				for (j=jpoint;j<(u_strlen(devanagari_head));j++) {
+					if (initial_cmp_char(initial,devanagari_head[j])) {
+						break;
+					}
+				}
+				if ((j!=jpoint)||(j==0)) {
+					jpoint=j;
+					fputs(group_skip,fp);
+					if (lethead_flag!=0) {
+						fputs(lethead_prefix,fp);
+						fprint_uchar(fp,&devanagari_head[j-1],M_NONE,1);
+						fputs(lethead_suffix,fp);
+					}
+				}
+			}
+			else if (chset==CH_THAI) {
+				for (j=ipoint;j<(u_strlen(thai_head));j++) {
+					if (initial_cmp_char(initial,thai_head[j])) {
+						break;
+					}
+				}
+				if ((j!=ipoint)||(j==0)) {
+					ipoint=j;
+					fputs(group_skip,fp);
+					if (lethead_flag!=0) {
+						fputs(lethead_prefix,fp);
+						fprint_uchar(fp,&thai_head[j-1],M_NONE,1);
+						fputs(lethead_suffix,fp);
+					}
+				}
+			}
 			else {
-				if (CH_LATIN<=chset_prev&&chset_prev<=CH_HANZI){
+				if (CH_LATIN<=chset_prev&&chset_prev<=CH_THAI){
 					fputs(group_skip,fp);
 					if (lethead_flag!=0 && symbol_flag) {
 						if (strlen(symbol)) {
@@ -774,6 +852,17 @@
 		u_strcpy(ini,hz_index[lo-1].idx);
 		return;
 	}
+	else if (is_devanagari(&ch)||is_thai(&ch)) {
+		if (ch==0x929||0x931||0x934||(0x958<=ch&&ch<=0x95F)) {
+			src[0]=ch;  src[1]=0x00;
+			perr=U_ZERO_ERROR;
+			unorm2_normalize(unormalizer_NFD, src, 1, dest, 8, &perr);
+			if (U_SUCCESS(perr))
+				ch=dest[0];                         /* without modifier */
+		}
+		ini[0]=ch;
+		return;
+	}
 	if (ch==0x049||ch==0x069||ch==0x130||ch==0x131||ch==0x0CE||ch==0x0EE) {
 		/* check dotted/dotless İ,I,i,ı and Î,î for Turkish */
 		strX[0] = 0x131;  strX[1] = 0x5A;  strX[2] = 0x00;  /* ıZ */

Modified: trunk/Build/source/texk/upmendex/kana.h
===================================================================
--- trunk/Build/source/texk/upmendex/kana.h	2021-08-08 00:53:29 UTC (rev 60190)
+++ trunk/Build/source/texk/upmendex/kana.h	2021-08-08 03:26:18 UTC (rev 60191)
@@ -261,3 +261,112 @@
 	0x314E, /* ㅎ */
 	0
 };
+
+
+UChar DVNG_HEAD[]={
+	/* Independent vowels */
+	0x0904, /* ऄ */
+	0x0905, /* अ */
+	0x0906, /* आ */
+	0x0907, /* इ */
+	0x0908, /* ई */
+	0x0909, /* उ */
+	0x090A, /* ऊ */
+	0x090B, /* ऋ */
+	0x090C, /* ऌ */
+	0x090D, /* ऍ */
+	0x090E, /* ऎ */
+	0x090F, /* ए */
+	0x0910, /* ऐ */
+	0x0911, /* ऑ */
+	0x0912, /* ऒ */
+	0x0913, /* ओ */
+	0x0914, /* औ */
+	/* Consonants */
+	0x0915, /* क */
+	0x0916, /* ख */
+	0x0917, /* ग */
+	0x0918, /* घ */
+	0x0919, /* ङ */
+	0x091A, /* च */
+	0x091B, /* छ */
+	0x091C, /* ज */
+	0x091D, /* झ */
+	0x091E, /* ञ */
+	0x091F, /* ट */
+	0x0920, /* ठ */
+	0x0921, /* ड */
+	0x0922, /* ढ */
+	0x0923, /* ण */
+	0x0924, /* त */
+	0x0925, /* थ */
+	0x0926, /* द */
+	0x0927, /* ध */
+	0x0928, /* न */
+	0x092A, /* प */
+	0x092B, /* फ */
+	0x092C, /* ब */
+	0x092D, /* भ */
+	0x092E, /* म */
+	0x092F, /* य */
+	0x0930, /* र */
+	0x0932, /* ल */
+	0x0933, /* ळ */
+	0x0935, /* व */
+	0x0936, /* श */
+	0x0937, /* ष */
+	0x0938, /* स */
+	0x0939, /* ह */
+	0
+};
+
+
+UChar THAI_HEAD[]={
+	0x0E01, /* ก */
+	0x0E02, /* ข */
+	0x0E03, /* ฃ */
+	0x0E04, /* ค */
+	0x0E05, /* ฅ */
+	0x0E06, /* ฆ */
+	0x0E07, /* ง */
+	0x0E08, /* จ */
+	0x0E09, /* ฉ */
+	0x0E0A, /* ช */
+	0x0E0B, /* ซ */
+	0x0E0C, /* ฌ */
+	0x0E0D, /* ญ */
+	0x0E0E, /* ฎ */
+	0x0E0F, /* ฏ */
+	0x0E10, /* ฐ */
+	0x0E11, /* ฑ */
+	0x0E12, /* ฒ */
+	0x0E13, /* ณ */
+	0x0E14, /* ด */
+	0x0E15, /* ต */
+	0x0E16, /* ถ */
+	0x0E17, /* ท */
+	0x0E18, /* ธ */
+	0x0E19, /* น */
+	0x0E1A, /* บ */
+	0x0E1B, /* ป */
+	0x0E1C, /* ผ */
+	0x0E1D, /* ฝ */
+	0x0E1E, /* พ */
+	0x0E1F, /* ฟ */
+	0x0E20, /* ภ */
+	0x0E21, /* ม */
+	0x0E22, /* ย */
+	0x0E23, /* ร */
+	0x0E24, /* ฤ */
+	0x0E25, /* ล */
+	0x0E26, /* ฦ */
+	0x0E27, /* ว */
+	0x0E28, /* ศ */
+	0x0E29, /* ษ */
+	0x0E2A, /* ส */
+	0x0E2B, /* ห */
+	0x0E2C, /* ฬ */
+	0x0E2D, /* อ */
+	0x0E2E, /* ฮ */
+	0
+};

Modified: trunk/Build/source/texk/upmendex/main.c
===================================================================
--- trunk/Build/source/texk/upmendex/main.c	2021-08-08 00:53:29 UTC (rev 60190)
+++ trunk/Build/source/texk/upmendex/main.c	2021-08-08 03:26:18 UTC (rev 60191)
@@ -245,8 +245,10 @@
 
 	initkanatable();
 
-/*   init hangul tumunja table   */
-	u_strcpy(tumunja,GANADA);
+/*   init hangul,devanagari,thai *_head table   */
+	u_strcpy(hangul_head,GANADA);
+	u_strcpy(devanagari_head,DVNG_HEAD);
+	u_strcpy(thai_head,THAI_HEAD);
 
 	for (k=0;styfile[k]!=NULL;k++) {
 		styread(styfile[k]);

Modified: trunk/Build/source/texk/upmendex/mendex.h
===================================================================
--- trunk/Build/source/texk/upmendex/mendex.h	2021-08-08 00:53:29 UTC (rev 60190)
+++ trunk/Build/source/texk/upmendex/mendex.h	2021-08-08 03:26:18 UTC (rev 60191)
@@ -56,6 +56,8 @@
 int is_zhuyin(UChar *c);
 int is_cyrillic(UChar *c);
 int is_greek(UChar *c);
+int is_devanagari(UChar *c);
+int is_thai(UChar *c);
 int is_comb_diacritical_mark(UChar *c);
 int chkcontinue(struct page *p, int num);
 int ss_comp(UChar *s1, UChar *s2);
@@ -67,6 +69,8 @@
 #define CH_KANA         4
 #define CH_HANGUL       5
 #define CH_HANZI        6
+#define CH_DEVANAGARI   7
+#define CH_THAI         8
 #define CH_SYMBOL   0x100
 #define CH_NUMERIC  0x101
 

Modified: trunk/Build/source/texk/upmendex/sort.c
===================================================================
--- trunk/Build/source/texk/upmendex/sort.c	2021-08-08 00:53:29 UTC (rev 60190)
+++ trunk/Build/source/texk/upmendex/sort.c	2021-08-08 03:26:18 UTC (rev 60191)
@@ -4,7 +4,7 @@
 #include "exkana.h"
 #include "exvar.h"
 
-int sym,nmbr,ltn,kana,hngl,hnz,cyr,grk;
+int sym,nmbr,ltn,kana,hngl,hnz,cyr,grk,dvng,thai;
 
 static int wcomp(const void *p, const void *q);
 static int pcomp(const void *p, const void *q);
@@ -57,6 +57,14 @@
 			grk=order++;
 			break;
 
+		case 'D':
+			dvng=order++;
+			break;
+
+		case 'T':
+			thai=order++;
+			break;
+
 		default:
 			break;
 		}
@@ -72,6 +80,8 @@
 	if (hnz==0) hnz=order++;
 	if (cyr==0) cyr=order++;
 	if (grk==0) grk=order++;
+	if (dvng==0) dvng=order++;
+	if (thai==0) thai=order++;
 
 	status = U_ZERO_ERROR;
 	if (strlen(icu_rules)>0) {
@@ -257,6 +267,9 @@
 		else if (is_hanzi(c))    return hnz;
 		else if (is_cyrillic(c)) return cyr;
 		else if (is_greek(c))    return grk;
+		else if (is_numeric(c))  return nmbr;
+		else if (is_devanagari(c)) return dvng;
+		else if (is_thai(c))     return thai;
 		else return sym;
 	}
 }
@@ -276,6 +289,9 @@
 		else if (is_hanzi(c))    return CH_HANZI;
 		else if (is_cyrillic(c)) return CH_CYRILLIC;
 		else if (is_greek(c))    return CH_GREEK;
+		else if (is_numeric(c))  return CH_NUMERIC;
+		else if (is_devanagari(c)) return CH_DEVANAGARI;
+		else if (is_thai(c))     return CH_THAI;
 		else return CH_SYMBOL;
 	}
 }
@@ -361,6 +377,8 @@
 int is_numeric(UChar *c)
 {
 	if ((*c>=L'0')&&(*c<=L'9')) return 1;
+	else if ((*c>=0x0966)&&(*c<=0x096F)) return 1; /* Devanagari Digit */
+	else if ((*c>=0x0E50)&&(*c<=0x0E59)) return 1; /* Thai Digit */
 	else return 0;
 }
 
@@ -438,6 +456,22 @@
 	else return 0;
 }
 
+int is_devanagari(UChar *c)
+{
+	if      ((*c>=0x0966)&&(*c<=0x096F)) return 0; /* Devanagari Digit */
+	else if ((*c>=0x0900)&&(*c<=0x097F)) return 1; /* Devanagari */
+	else if ((*c>=0xA8E0)&&(*c<=0xA8FF)) return 1; /* Devanagari Extended */
+	else return 0;
+}
+
+int is_thai(UChar *c)
+{
+	if      ((*c==0x0E3F))               return 0; /* Thai Currency Symbol Baht */
+	else if ((*c>=0x0E50)&&(*c<=0x0E59)) return 0; /* Thai Digit */
+	else if ((*c>=0x0E00)&&(*c<=0x0E7F)) return 1; /* Thai */
+	else return 0;
+}
+
 int is_comb_diacritical_mark(UChar *c)
 {
 	if      ((*c>=0x02B0)&&(*c<=0x02FF)) return 1; /* Spacing Modifier Letters */

Modified: trunk/Build/source/texk/upmendex/styfile.c
===================================================================
--- trunk/Build/source/texk/upmendex/styfile.c	2021-08-08 00:53:29 UTC (rev 60190)
+++ trunk/Build/source/texk/upmendex/styfile.c	2021-08-08 03:26:18 UTC (rev 60191)
@@ -136,8 +136,8 @@
 			multibyte_to_widechar(kana_head,STYBUFSIZE,tmp);
 			continue;
 		}
-		if (getparam(buff,"tumunja",tmp)) {
-			multibyte_to_widechar(tumunja,STYBUFSIZE,tmp);
+		if (getparam(buff,"hangul_head",tmp) || getparam(buff,"tumunja",tmp)) {
+			multibyte_to_widechar(hangul_head,STYBUFSIZE,tmp);
 			continue;
 		}
 		if (getparam(buff,"hanzi_head",tmp)) {
@@ -144,6 +144,14 @@
 			multibyte_to_widechar(hanzi_head,STYBUFSIZE,tmp);
 			continue;
 		}
+		if (getparam(buff,"thai_head",tmp)) {
+			multibyte_to_widechar(thai_head,STYBUFSIZE,tmp);
+			continue;
+		}
+		if (getparam(buff,"devanagari_head",tmp)) {
+			multibyte_to_widechar(devanagari_head,STYBUFSIZE,tmp);
+			continue;
+		}
 		if (getparam(buff,"page_compositor",page_compositor)) continue;
 		if (getparam(buff,"page_precedence",page_precedence)) continue;
 		if (getparam(buff,"character_order",character_order)) continue;
@@ -352,4 +360,10 @@
 		else if (strstr(pos,"off"))           icu_attributes[attr]=UCOL_OFF;
 		else	verb_printf(efp,"\nWarning: Illegal input for icu_attributes (normalization-mode).");
 	}
+	if ((pos=strstr(tmp,"numeric-ordering:"))>0) {
+		pos+=17;  attr=UCOL_NUMERIC_COLLATION;
+		if      (strstr(pos,"on"))            icu_attributes[attr]=UCOL_ON;
+		else if (strstr(pos,"off"))           icu_attributes[attr]=UCOL_OFF;
+		else	verb_printf(efp,"\nWarning: Illegal input for icu_attributes (numeric-ordering).");
+	}
 }

Modified: trunk/Build/source/texk/upmendex/upmendex.ja.txt
===================================================================
--- trunk/Build/source/texk/upmendex/upmendex.ja.txt	2021-08-08 00:53:29 UTC (rev 60190)
+++ trunk/Build/source/texk/upmendex/upmendex.ja.txt	2021-08-08 03:26:18 UTC (rev 60191)
@@ -173,10 +173,14 @@
       デフォルトでは、letter_head とコマンドラインオプション '-g'
       の設定に従う。(upmendex専用)
 
-   tumunja  <文字列>
+   hangul_head  <文字列>
       "ㄱㄴㄷㄹㅁㅂㅅㅇㅈㅊㅋㅌㅍㅎ"
       ハングル見出し文字。見出し文字を文字列で指定する。(upmendex専用)
 
+   tumunja  <文字列>
+      "ㄱㄴㄷㄹㅁㅂㅅㅇㅈㅊㅋㅌㅍㅎ"
+      hangul_headと同じ。(非推奨, upmendex専用)
+
    hanzi_head  <文字列>
       ""
       漢字見出し文字列。見出し文字列を";"で区切って指定することにより
@@ -309,10 +313,13 @@
       ((up)mendex専用)
 
    character_order  <文字列>
-      "SNLGCJKH"
+      "SNLGCJKHDT"
       記号、英字、日本語の優先順位。'S'は記号、'N'は数字、'L'はラテン文字、
       'G'はギリシャ文字、'C'はキリル文字、'J'は日本語(かな)、'K'はハングル、
-      'H'は漢字を表す。(upmendex専用)
+      'H'は漢字、'D'はデーヴァナーガリー、'T'はタイ文字を表す。
+      upendexでは索引項目の分類として「数字」は「記号」に含める仕様なので、
+      'S'と'N'は必ず隣り合わせること(数字と数字以外の記号の順序入れ替えは可能)。
+      (upmendex専用)
 
    icu_locale  <文字列>
       ""
@@ -346,7 +353,8 @@
       "french-collation:on", "french-collation:off",
       "case-first:off", "case-first:upper-first", "case-first:lower-first",
       "case-level:on", "case-level:off",
-      "normalization-mode:on", "normalization-mode:off"
+      "normalization-mode:on", "normalization-mode:off",
+      "numeric-ordering:on", "numeric-ordering:off"
       (upmendex専用)
 
 

Modified: trunk/Build/source/texk/upmendex/var.h
===================================================================
--- trunk/Build/source/texk/upmendex/var.h	2021-08-08 00:53:29 UTC (rev 60190)
+++ trunk/Build/source/texk/upmendex/var.h	2021-08-08 03:26:18 UTC (rev 60191)
@@ -32,9 +32,10 @@
 char numhead_positive[STYBUFSIZE]={"Numbers"},numhead_negative[STYBUFSIZE]={"numbers"};
 int symbol_flag=1;
 int letter_head=1;
-UChar atama[STYBUFSIZE],tumunja[STYBUFSIZE],hanzi_head[STYBUFSIZE]={L'\0'},kana_head[STYBUFSIZE]={L'\0'};
+UChar atama[STYBUFSIZE],hangul_head[STYBUFSIZE],hanzi_head[STYBUFSIZE]={L'\0'},kana_head[STYBUFSIZE]={L'\0'};
+UChar devanagari_head[STYBUFSIZE],thai_head[STYBUFSIZE];
 char page_compositor[STYBUFSIZE]={"-"},page_precedence[STYBUFSIZE]={"rnaRA"};
-char character_order[STYBUFSIZE]={"SNLGCJKH"};
+char character_order[STYBUFSIZE]={"SNLGCJKHDT"};
 char icu_locale[STYBUFSIZE]={"root"},icu_rules[STYBUFSIZE]={""};
 int icu_attributes[UCOL_ATTRIBUTE_COUNT];
 



More information about the tex-live-commits mailing list.