texlive[64341] Build/source/texk/upmendex: upmendex: version 1.03,

commits+takuji at tug.org commits+takuji at tug.org
Sun Sep 11 12:15:22 CEST 2022


Revision: 64341
          http://tug.org/svn/texlive?view=revision&revision=64341
Author:   takuji
Date:     2022-09-11 12:15:22 +0200 (Sun, 11 Sep 2022)
Log Message:
-----------
upmendex: version 1.03, support Kana Archaic Wu, Yi, Ye

Modified Paths:
--------------
    trunk/Build/source/texk/upmendex/ChangeLog
    trunk/Build/source/texk/upmendex/configure
    trunk/Build/source/texk/upmendex/configure.ac
    trunk/Build/source/texk/upmendex/convert.c
    trunk/Build/source/texk/upmendex/fwrite.c
    trunk/Build/source/texk/upmendex/kana.h
    trunk/Build/source/texk/upmendex/main.c
    trunk/Build/source/texk/upmendex/sort.c

Modified: trunk/Build/source/texk/upmendex/ChangeLog
===================================================================
--- trunk/Build/source/texk/upmendex/ChangeLog	2022-09-11 03:52:43 UTC (rev 64340)
+++ trunk/Build/source/texk/upmendex/ChangeLog	2022-09-11 10:15:22 UTC (rev 64341)
@@ -1,3 +1,10 @@
+2022-09-11  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
+
+	* version 1.03  Stable version.
+	* configure.ac: Bump version.
+	* main.c, sort.c, convert.c, fwrite.c, kana.h:
+	Support U+1B11F..1B122 Hiragana/Katakana Letter Archaic Wu, Yi, Ye.
+
 2022-09-04  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
 
 	* version 1.02  Stable version.

Modified: trunk/Build/source/texk/upmendex/configure
===================================================================
--- trunk/Build/source/texk/upmendex/configure	2022-09-11 03:52:43 UTC (rev 64340)
+++ trunk/Build/source/texk/upmendex/configure	2022-09-11 10:15:22 UTC (rev 64341)
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for upmendex (TeX Live) 1.02.
+# Generated by GNU Autoconf 2.71 for upmendex (TeX Live) 1.03.
 #
 #
 # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -626,8 +626,8 @@
 # Identity of this package.
 PACKAGE_NAME='upmendex (TeX Live)'
 PACKAGE_TARNAME='upmendex--tex-live-'
-PACKAGE_VERSION='1.02'
-PACKAGE_STRING='upmendex (TeX Live) 1.02'
+PACKAGE_VERSION='1.03'
+PACKAGE_STRING='upmendex (TeX Live) 1.03'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
@@ -1390,7 +1390,7 @@
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures upmendex (TeX Live) 1.02 to adapt to many kinds of systems.
+\`configure' configures upmendex (TeX Live) 1.03 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1462,7 +1462,7 @@
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of upmendex (TeX Live) 1.02:";;
+     short | recursive ) echo "Configuration of upmendex (TeX Live) 1.03:";;
    esac
   cat <<\_ACEOF
 
@@ -1587,7 +1587,7 @@
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-upmendex (TeX Live) configure 1.02
+upmendex (TeX Live) configure 1.03
 generated by GNU Autoconf 2.71
 
 Copyright (C) 2021 Free Software Foundation, Inc.
@@ -2268,7 +2268,7 @@
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by upmendex (TeX Live) $as_me 1.02, which was
+It was created by upmendex (TeX Live) $as_me 1.03, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   $ $0$ac_configure_args_raw
@@ -8806,7 +8806,7 @@
 
 # Define the identity of the package.
  PACKAGE='upmendex--tex-live-'
- VERSION='1.02'
+ VERSION='1.03'
 
 
 # Some tools Automake needs.
@@ -18942,7 +18942,7 @@
 Report bugs to <bug-libtool at gnu.org>."
 
 lt_cl_version="\
-upmendex (TeX Live) config.lt 1.02
+upmendex (TeX Live) config.lt 1.03
 configured by $0, generated by GNU Autoconf 2.71.
 
 Copyright (C) 2011 Free Software Foundation, Inc.
@@ -21114,7 +21114,7 @@
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by upmendex (TeX Live) $as_me 1.02, which was
+This file was extended by upmendex (TeX Live) $as_me 1.03, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -21182,7 +21182,7 @@
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-upmendex (TeX Live) config.status 1.02
+upmendex (TeX Live) config.status 1.03
 configured by $0, generated by GNU Autoconf 2.71,
   with options \\"\$ac_cs_config\\"
 

Modified: trunk/Build/source/texk/upmendex/configure.ac
===================================================================
--- trunk/Build/source/texk/upmendex/configure.ac	2022-09-11 03:52:43 UTC (rev 64340)
+++ trunk/Build/source/texk/upmendex/configure.ac	2022-09-11 10:15:22 UTC (rev 64341)
@@ -8,7 +8,7 @@
 dnl   gives unlimited permission to copy and/or distribute it,
 dnl   with or without modifications, as long as this notice is preserved.
 dnl
-AC_INIT([upmendex (TeX Live)],[1.02])
+AC_INIT([upmendex (TeX Live)],[1.03])
 AC_PREREQ([2.71])
 AC_CONFIG_SRCDIR([main.c])
 AC_CONFIG_AUX_DIR([../../build-aux])

Modified: trunk/Build/source/texk/upmendex/convert.c
===================================================================
--- trunk/Build/source/texk/upmendex/convert.c	2022-09-11 03:52:43 UTC (rev 64340)
+++ trunk/Build/source/texk/upmendex/convert.c	2022-09-11 10:15:22 UTC (rev 64341)
@@ -43,7 +43,12 @@
 	aiueo=u_xstrdup(AIUEO);
 	for (i=0;;i++) {
 		if (aiueo[i]==0) break;
-		aiueo[i]+=KATATOP-HIRATOP; /* hiragana -> katakana */
+		if (aiueo[i]==0xd82c) {
+			i++;
+			if (aiueo[i]==0xdd1f) aiueo[i]+=3;  /* U+1B11F -> U+1B122 */
+		}
+		else
+			aiueo[i]+=KATATOP-HIRATOP; /* hiragana -> katakana */
 	}
 
 	u_strcpy(atama,akasatana);

Modified: trunk/Build/source/texk/upmendex/fwrite.c
===================================================================
--- trunk/Build/source/texk/upmendex/fwrite.c	2022-09-11 03:52:43 UTC (rev 64340)
+++ trunk/Build/source/texk/upmendex/fwrite.c	2022-09-11 10:15:22 UTC (rev 64341)
@@ -16,7 +16,7 @@
 static void linecheck(char *lbuff, char *tmpbuff, FILE *fp, int force);
 static void crcheck(char *lbuff, FILE *fp);
 static void index_normalize(UChar *istr, UChar *ini, int *chset);
-static int initial_cmp_char(UChar *ini, UChar ch);
+static int initial_cmp_char(UChar *ini, UChar *ch);
 static int init_hanzi_header(void);
 static const UNormalizer2 *unormalizer_NFD, *unormalizer_NFKD;
 static int turkish_i;
@@ -166,9 +166,9 @@
 /*   write ind file   */
 void indwrite(char *filename, struct index *ind, int pagenum)
 {
-	int i,j,hpoint=0,tpoint=0,ipoint=0,jpoint=0,block_open=0;
+	int i,j,k,hpoint=0,tpoint=0,ipoint=0,jpoint=0,block_open=0;
 	char lbuff[BUFFERLEN],obuff[BUFFERLEN];
-	UChar datama[256],initial[INITIALLENGTH],initial_prev[INITIALLENGTH];
+	UChar initial[INITIALLENGTH],initial_prev[INITIALLENGTH];
 	int chset,chset_prev;
 	FILE *fp=NULL;
 	UErrorCode perr;
@@ -182,7 +182,6 @@
 #endif
 	}
 
-	convert(atama,datama);
 	fputs(preamble,fp);
 
 	if (fpage>0) {
@@ -228,24 +227,28 @@
 			else if (chset==CH_KANA) {
 				if (lethead_flag!=0) {
 					fputs(lethead_prefix,fp);
-					for (j=hpoint;j<(u_strlen(datama));j++) {
-						if (initial_cmp_char(initial,datama[j])) {
-							fprint_uchar(fp,&atama[j-1],M_NONE,1);
+					for (j=hpoint;j<(u_strlen(kana_head));) {
+						if (initial_cmp_char(initial,&kana_head[j])) {
+							k=j;  U16_BACK_1(kana_head, 0, k);
+							fprint_uchar(fp,&kana_head[k],M_NONE,1);
 							hpoint=j;
 							break;
 						}
+						U16_FWD_1(kana_head, j, -1);
 					}
-					if (j==(u_strlen(datama))) {
-						fprint_uchar(fp,&atama[j-1],M_NONE,1);
+					if (j==(u_strlen(kana_head))) {
+						k=j;  U16_BACK_1(kana_head, 0, k);
+						fprint_uchar(fp,&kana_head[k],M_NONE,1);
 					}
 					fputs(lethead_suffix,fp);
 				}
 				widechar_to_multibyte(obuff,BUFFERLEN,ind[i].idx[0]);
 				SPRINTF(lbuff,"%s%s",item_0,obuff);
-				for (hpoint=0;hpoint<(u_strlen(datama));hpoint++) {
-					if (initial_cmp_char(initial,datama[hpoint])) {
+				for (hpoint=0;hpoint<(u_strlen(kana_head));) {
+					if (initial_cmp_char(initial,&kana_head[hpoint])) {
 						break;
 					}
+					U16_FWD_1(kana_head, hpoint, -1);
 				}
 			}
 			else if (chset==CH_HANGUL) {
@@ -252,7 +255,7 @@
 				if (lethead_flag!=0) {
 					fputs(lethead_prefix,fp);
 					for (j=tpoint;j<(u_strlen(hangul_head));j++) {
-						if (initial_cmp_char(initial,hangul_head[j])) {
+						if (initial_cmp_char(initial,&hangul_head[j])) {
 							fprint_uchar(fp,&hangul_head[j-1],M_NONE,1);
 							tpoint=j;
 							break;
@@ -266,7 +269,7 @@
 				widechar_to_multibyte(obuff,BUFFERLEN,ind[i].idx[0]);
 				SPRINTF(lbuff,"%s%s",item_0,obuff);
 				for (tpoint=0;tpoint<(u_strlen(hangul_head));tpoint++) {
-					if (initial_cmp_char(initial,hangul_head[tpoint])) {
+					if (initial_cmp_char(initial,&hangul_head[tpoint])) {
 						break;
 					}
 				}
@@ -274,24 +277,28 @@
 			else if (chset==CH_DEVANAGARI) {
 				if (lethead_flag!=0) {
 					fputs(lethead_prefix,fp);
-					for (j=jpoint;j<(u_strlen(devanagari_head));j++) {
-						if (initial_cmp_char(initial,devanagari_head[j])) {
-							fprint_uchar(fp,&devanagari_head[j-1],M_NONE,1);
+					for (j=jpoint;j<(u_strlen(devanagari_head));) {
+						if (initial_cmp_char(initial,&devanagari_head[j])) {
+							k=j;  U16_BACK_1(devanagari_head, 0, k);
+							fprint_uchar(fp,&devanagari_head[k],M_NONE,1);
 							jpoint=j;
 							break;
 						}
+						U16_FWD_1(devanagari_head, j, -1);
 					}
 					if (j==(u_strlen(devanagari_head))) {
-						fprint_uchar(fp,&devanagari_head[j-1],M_NONE,1);
+						k=j;  U16_BACK_1(devanagari_head, 0, k);
+						fprint_uchar(fp,&devanagari_head[k],M_NONE,1);
 					}
 					fputs(lethead_suffix,fp);
 				}
 				widechar_to_multibyte(obuff,BUFFERLEN,ind[i].idx[0]);
 				SPRINTF(lbuff,"%s%s",item_0,obuff);
-				for (jpoint=0;jpoint<(u_strlen(devanagari_head));jpoint++) {
-					if (initial_cmp_char(initial,devanagari_head[jpoint])) {
+				for (jpoint=0;jpoint<(u_strlen(devanagari_head));) {
+					if (initial_cmp_char(initial,&devanagari_head[jpoint])) {
 						break;
 					}
+					U16_FWD_1(devanagari_head, jpoint, -1);
 				}
 			}
 			else if (chset==CH_THAI) {
@@ -298,7 +305,7 @@
 				if (lethead_flag!=0) {
 					fputs(lethead_prefix,fp);
 					for (j=ipoint;j<(u_strlen(thai_head));j++) {
-						if (initial_cmp_char(initial,thai_head[j])) {
+						if (initial_cmp_char(initial,&thai_head[j])) {
 							fprint_uchar(fp,&thai_head[j-1],M_NONE,1);
 							ipoint=j;
 							break;
@@ -312,7 +319,7 @@
 				widechar_to_multibyte(obuff,BUFFERLEN,ind[i].idx[0]);
 				SPRINTF(lbuff,"%s%s",item_0,obuff);
 				for (ipoint=0;ipoint<(u_strlen(thai_head));ipoint++) {
-					if (initial_cmp_char(initial,thai_head[ipoint])) {
+					if (initial_cmp_char(initial,&thai_head[ipoint])) {
 						break;
 					}
 				}
@@ -379,17 +386,19 @@
 				}
 			}
 			else if (chset==CH_KANA) {
-				for (j=hpoint;j<(u_strlen(datama));j++) {
-					if (initial_cmp_char(initial,datama[j])) {
+				for (j=hpoint;j<(u_strlen(kana_head));) {
+					if (initial_cmp_char(initial,&kana_head[j])) {
 						break;
 					}
+					U16_FWD_1(kana_head, j, -1);
 				}
 				if ((j!=hpoint)||(j==0)) {
 					hpoint=j;
 					fputs(group_skip,fp);
 					if (lethead_flag!=0) {
+						k=j;  U16_BACK_1(kana_head, 0, k);
 						fputs(lethead_prefix,fp);
-						fprint_uchar(fp,&atama[j-1],M_NONE,1);
+						fprint_uchar(fp,&kana_head[k],M_NONE,1);
 						fputs(lethead_suffix,fp);
 					}
 				}
@@ -396,7 +405,7 @@
 			}
 			else if (chset==CH_HANGUL) {
 				for (j=tpoint;j<(u_strlen(hangul_head));j++) {
-					if (initial_cmp_char(initial,hangul_head[j])) {
+					if (initial_cmp_char(initial,&hangul_head[j])) {
 						break;
 					}
 				}
@@ -411,17 +420,19 @@
 				}
 			}
 			else if (chset==CH_DEVANAGARI) {
-				for (j=jpoint;j<(u_strlen(devanagari_head));j++) {
-					if (initial_cmp_char(initial,devanagari_head[j])) {
+				for (j=jpoint;j<(u_strlen(devanagari_head));) {
+					if (initial_cmp_char(initial,&devanagari_head[j])) {
 						break;
 					}
+					U16_FWD_1(devanagari_head, j, -1);
 				}
 				if ((j!=jpoint)||(j==0)) {
 					jpoint=j;
 					fputs(group_skip,fp);
 					if (lethead_flag!=0) {
+						k=j;  U16_BACK_1(devanagari_head, 0, k);
 						fputs(lethead_prefix,fp);
-						fprint_uchar(fp,&devanagari_head[j-1],M_NONE,1);
+						fprint_uchar(fp,&devanagari_head[k],M_NONE,1);
 						fputs(lethead_suffix,fp);
 					}
 				}
@@ -428,7 +439,7 @@
 			}
 			else if (chset==CH_THAI) {
 				for (j=ipoint;j<(u_strlen(thai_head));j++) {
-					if (initial_cmp_char(initial,thai_head[j])) {
+					if (initial_cmp_char(initial,&thai_head[j])) {
 						break;
 					}
 				}
@@ -812,6 +823,13 @@
 				ini[0]=0x307B; break;  /* ほ */
 			case 0x1B000:                  /* 𛀀 */
 				ini[0]=0x3048; break;  /* え */
+			case 0x1B11F: case 0x1B122:    /* Archaic WU 𛄟 𛄢 */
+				ini[0]=0xD82C; ini[1]=0xDD1F; ini[2]=L'\0';
+				break;
+			case 0x1B120:                  /* Archaic YI 𛄠 */
+				ini[0]=0xD82C; ini[1]=0xDD20; ini[2]=L'\0'; break;
+			case 0x1B121:                  /* Archaic YE 𛄡 */
+				ini[0]=0xD82C; ini[1]=0xDD21; ini[2]=L'\0'; break;
 			case 0x1B132: case 0x1B155:
 				ini[0]=0x3053; break;  /* こ */
 			case 0x1B150: case 0x1B164:
@@ -909,6 +927,10 @@
 		u_strcpy(ini,hz_index[lo-1].idx);
 		return;
 	}
+	else if (is_devanagari(istr)==2) {
+		ini[0]=istr[0]; ini[1]=istr[1]; ini[2]=L'\0';
+		return;
+	}
 	else if (is_devanagari(&ch)||is_thai(&ch)||is_arabic(&ch)||is_hebrew(&ch)) {
 		if (ch==0x626) {  /* Arabic Letter Yeh with Hamza Above for Uyghur */
 			strY[0]=0x626; strY[1]=L'\0'; /* Yeh with Hamza Above */
@@ -1091,12 +1113,14 @@
 	return;
 }
 
-static int initial_cmp_char(UChar *ini, UChar ch)
+static int initial_cmp_char(UChar *ini, UChar *ch)
 {
-	UChar initial_tmp[INITIALLENGTH],istr[2];
-	int chset;
-	istr[0]=ch;
-	istr[1]=L'\0';
+	UChar initial_tmp[INITIALLENGTH],istr[3];
+	int chset, l;
+	l = is_surrogate_pair(ch) ? 2 : 1;
+	          istr[0]=ch[0];
+	if (l==2) istr[1]=ch[1];
+	          istr[l]=L'\0';
 
 	index_normalize(istr, initial_tmp, &chset);
 	return (ss_comp(ini, initial_tmp)<0);

Modified: trunk/Build/source/texk/upmendex/kana.h
===================================================================
--- trunk/Build/source/texk/upmendex/kana.h	2022-09-11 03:52:43 UTC (rev 64340)
+++ trunk/Build/source/texk/upmendex/kana.h	2022-09-11 10:15:22 UTC (rev 64341)
@@ -65,7 +65,9 @@
 	0x3081, /* め */
 	0x3082, /* も */
 	0x3084, /* や */
+	0xd82c, 0xdd20, /* U+1B120 Katakana Letter Archaic YI  𛄠 */
 	0x3086, /* ゆ */
+	0xd82c, 0xdd21, /* U+1B121 Katakana Letter Archaic YE  𛄡 */
 	0x3088, /* よ */
 	0x3089, /* ら */
 	0x308a, /* り */
@@ -74,6 +76,7 @@
 	0x308d, /* ろ */
 	0x308f, /* わ */
 	0x3090, /* ゐ */
+	0xd82c, 0xdd1f, /* U+1B11F Hiragana Letter Archaic WU  𛄟 */
 	0x3091, /* ゑ */
 	0x3092, /* を */
 	0x3093, /* ん */

Modified: trunk/Build/source/texk/upmendex/main.c
===================================================================
--- trunk/Build/source/texk/upmendex/main.c	2022-09-11 03:52:43 UTC (rev 64340)
+++ trunk/Build/source/texk/upmendex/main.c	2022-09-11 10:15:22 UTC (rev 64341)
@@ -283,7 +283,7 @@
 	default:
 		break;
 	}
-	if (u_strlen(kana_head)>0) u_strcpy(atama,kana_head);
+	if (u_strlen(kana_head)==0) u_strcpy(kana_head,atama);
 
 /*   read idx file   */
 

Modified: trunk/Build/source/texk/upmendex/sort.c
===================================================================
--- trunk/Build/source/texk/upmendex/sort.c	2022-09-11 03:52:43 UTC (rev 64340)
+++ trunk/Build/source/texk/upmendex/sort.c	2022-09-11 10:15:22 UTC (rev 64341)
@@ -483,11 +483,13 @@
 		c32=U16_GET_SUPPLEMENTARY(*c,*(c+1));
 		if ((c32>=0x1B130) && (c32<=0x1B16F)) return 2; /* Small Kana Extensions */
 		else if ((c32==0x1B000))              return 2; /* KATAKANA LETTER ARCHAIC E */
+		else if ((c32>=0x1B11F)                         /* HIRAGANA LETTER ARCHAIC WU */
+		                   && (c32<=0x1B122)) return 2; /* KATAKANA LETTER ARCHAIC WU */
 		else if ((c32==0x1F200))              return 2; /* SQUARE HIRAGANA HOKA */
 	}
 	return 0;
-		/* ICU 65 does not seem to support
-		   "Kana Supplement" and "Kana Extended-A" yet. (2020/02/16) */
+		/* ICU 71.1 does not seem to support
+		   most of "Kana Supplement" and "Kana Extended-A" yet. (2022/09/11) */
 }
 
 int is_kor_hngl(UChar *c)



More information about the tex-live-commits mailing list.