texlive[64650] Build/source/texk/upmendex: upmendex: ver1.05, Support

commits+takuji at tug.org commits+takuji at tug.org
Sat Oct 8 16:19:03 CEST 2022


Revision: 64650
          http://tug.org/svn/texlive?view=revision&revision=64650
Author:   takuji
Date:     2022-10-08 16:19:02 +0200 (Sat, 08 Oct 2022)
Log Message:
-----------
upmendex: ver1.05, Support U+1B001 Hiragana Letter Archaic Ye

Modified Paths:
--------------
    trunk/Build/source/texk/upmendex/ChangeLog
    trunk/Build/source/texk/upmendex/configure
    trunk/Build/source/texk/upmendex/configure.ac
    trunk/Build/source/texk/upmendex/exkana.h
    trunk/Build/source/texk/upmendex/fwrite.c
    trunk/Build/source/texk/upmendex/kana.h
    trunk/Build/source/texk/upmendex/main.c
    trunk/Build/source/texk/upmendex/mendex.h
    trunk/Build/source/texk/upmendex/sort.c

Modified: trunk/Build/source/texk/upmendex/ChangeLog
===================================================================
--- trunk/Build/source/texk/upmendex/ChangeLog	2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/ChangeLog	2022-10-08 14:19:02 UTC (rev 64650)
@@ -1,3 +1,13 @@
+2022-10-08  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
+
+	* version 1.05  Stable version.
+	* configure.ac: Bump version.
+	* main.c, sort.c, fwrite.c, {,ex}kana.h, mendex.h:
+	Support U+1B001 Hiragana Letter Archaic Ye.
+	* fwrite.c:
+	Strict check for dotted/dotless I/i in Turkish.
+	Strict check for Thai reordering.
+
 2022-09-17  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
 
 	* fwrite.c:

Modified: trunk/Build/source/texk/upmendex/configure
===================================================================
--- trunk/Build/source/texk/upmendex/configure	2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/configure	2022-10-08 14:19:02 UTC (rev 64650)
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for upmendex (TeX Live) 1.03.
+# Generated by GNU Autoconf 2.71 for upmendex (TeX Live) 1.05.
 #
 #
 # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -626,8 +626,8 @@
 # Identity of this package.
 PACKAGE_NAME='upmendex (TeX Live)'
 PACKAGE_TARNAME='upmendex--tex-live-'
-PACKAGE_VERSION='1.03'
-PACKAGE_STRING='upmendex (TeX Live) 1.03'
+PACKAGE_VERSION='1.05'
+PACKAGE_STRING='upmendex (TeX Live) 1.05'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
@@ -1390,7 +1390,7 @@
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures upmendex (TeX Live) 1.03 to adapt to many kinds of systems.
+\`configure' configures upmendex (TeX Live) 1.05 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1462,7 +1462,7 @@
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of upmendex (TeX Live) 1.03:";;
+     short | recursive ) echo "Configuration of upmendex (TeX Live) 1.05:";;
    esac
   cat <<\_ACEOF
 
@@ -1587,7 +1587,7 @@
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-upmendex (TeX Live) configure 1.03
+upmendex (TeX Live) configure 1.05
 generated by GNU Autoconf 2.71
 
 Copyright (C) 2021 Free Software Foundation, Inc.
@@ -2268,7 +2268,7 @@
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by upmendex (TeX Live) $as_me 1.03, which was
+It was created by upmendex (TeX Live) $as_me 1.05, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   $ $0$ac_configure_args_raw
@@ -8806,7 +8806,7 @@
 
 # Define the identity of the package.
  PACKAGE='upmendex--tex-live-'
- VERSION='1.03'
+ VERSION='1.05'
 
 
 # Some tools Automake needs.
@@ -18942,7 +18942,7 @@
 Report bugs to <bug-libtool at gnu.org>."
 
 lt_cl_version="\
-upmendex (TeX Live) config.lt 1.03
+upmendex (TeX Live) config.lt 1.05
 configured by $0, generated by GNU Autoconf 2.71.
 
 Copyright (C) 2011 Free Software Foundation, Inc.
@@ -21114,7 +21114,7 @@
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by upmendex (TeX Live) $as_me 1.03, which was
+This file was extended by upmendex (TeX Live) $as_me 1.05, which was
 generated by GNU Autoconf 2.71.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -21182,7 +21182,7 @@
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-upmendex (TeX Live) config.status 1.03
+upmendex (TeX Live) config.status 1.05
 configured by $0, generated by GNU Autoconf 2.71,
   with options \\"\$ac_cs_config\\"
 

Modified: trunk/Build/source/texk/upmendex/configure.ac
===================================================================
--- trunk/Build/source/texk/upmendex/configure.ac	2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/configure.ac	2022-10-08 14:19:02 UTC (rev 64650)
@@ -8,7 +8,7 @@
 dnl   gives unlimited permission to copy and/or distribute it,
 dnl   with or without modifications, as long as this notice is preserved.
 dnl
-AC_INIT([upmendex (TeX Live)],[1.03])
+AC_INIT([upmendex (TeX Live)],[1.05])
 AC_PREREQ([2.71])
 AC_CONFIG_SRCDIR([main.c])
 AC_CONFIG_AUX_DIR([../../build-aux])

Modified: trunk/Build/source/texk/upmendex/exkana.h
===================================================================
--- trunk/Build/source/texk/upmendex/exkana.h	2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/exkana.h	2022-10-08 14:19:02 UTC (rev 64650)
@@ -4,6 +4,7 @@
 extern UChar *aiueo;
 extern UChar kanatable[];
 extern UChar extkanatable[];
+extern int   kana_ye_mode;
 
 #define SPACE    0x3000    /* 全角スペース */
 #define ALPHATOP 0xff01    /* ! */

Modified: trunk/Build/source/texk/upmendex/fwrite.c
===================================================================
--- trunk/Build/source/texk/upmendex/fwrite.c	2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/fwrite.c	2022-10-08 14:19:02 UTC (rev 64650)
@@ -86,7 +86,7 @@
 		olen=u_strToUpper(istr,INITIALLENGTH,istr,wclen,"",&perr);
 	} else if (mode==M_TO_LOWER) {
 		perr=U_ZERO_ERROR;
-		olen=u_strToLower(istr,INITIALLENGTH,istr,wclen, istr[0]==0x130&&turkish_i?"tr":"", &perr);
+		olen=u_strToLower(istr,INITIALLENGTH,istr,wclen, istr[0]==0x130&&turkish_i==2?"tr":"", &perr);
 	} else if (mode==M_TO_TITLE) {
 		perr=U_ZERO_ERROR;
 		olen=u_strToTitle(istr,INITIALLENGTH,istr,wclen,NULL,"",&perr);
@@ -828,7 +828,7 @@
 				break;
 			case 0x1B120:                  /* Archaic YI 𛄠 */
 				ini[0]=0xD82C; ini[1]=0xDD20; ini[2]=L'\0'; break;
-			case 0x1B121:                  /* Archaic YE 𛄡 */
+			case 0x1B121: case 0x1B001:    /* Archaic YE 𛄡 𛀁 */
 				ini[0]=0xD82C; ini[1]=0xDD21; ini[2]=L'\0'; break;
 			case 0x1B132: case 0x1B155:
 				ini[0]=0x3053; break;  /* こ */
@@ -928,8 +928,9 @@
 		return;
 	}
 	else if (is_thai(&ch)) {
-		if (istr[0]>=0x0E2F && (istr[1]>0x0E00 && istr[1]<0x0E2F)) {
-			/* Vowel followed by Consonant */
+		if ((istr[0]>=0x0E40 && istr[0]<=0x0E44) && (istr[1]>=0x0E01 && istr[1]<=0x0E2E)) {
+			/* Thai reordering :: Vowel followed by Consonant */
+			/* https://unicode-org.github.io/icu/userguide/collation/concepts.html#thailao-reordering */
 			ini[0]=istr[1];
 		} else {
 			ini[0]=istr[0];
@@ -974,12 +975,17 @@
 	}
 	if (ch==0x049||ch==0x069||ch==0x130||ch==0x131||ch==0x0CE||ch==0x0EE) {
 		/* check dotted/dotless İ,I,i,ı and Î,î for Turkish */
-		strX[0] = 0x131;  strX[1] = 0x5A;  strX[2] = 0x00;  /* ıZ */
-		strZ[0] = 0x069;  strZ[1] = 0x00;                   /* i  */
-		order = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
-		if (order==UCOL_GREATER) {
+		if (turkish_i==0) {
+			strgth = ucol_getStrength(icu_collator);
+			ucol_setStrength(icu_collator, UCOL_SECONDARY);
+			strX[0] = 0x131;  strX[1] = 0x069;  strX[2] = 0x00; /* ıi */
+			strZ[0] = 0x049;  strZ[1] = 0x130;  strZ[2] = 0x00; /* Iİ */
+			order = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
+			turkish_i = (order==UCOL_EQUAL) ? 2 : 1;
+			ucol_setStrength(icu_collator, strgth);
+		}
+		if (turkish_i==2) {
 			ini[0] = (ch==0x049||ch==0x131) ? 0x131 : 0x130; /* ı or İ */
-			turkish_i=1;
 			return;
 		}
 	}
@@ -991,7 +997,7 @@
 			strX[0] = 0x059;  strX[1] = 0x00; /* Y */
 			strZ[0] = 0x049;  strZ[1] = 0x00; /* I */
 			order = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
-			if (order==UCOL_EQUAL) i_y_mode=2; else i_y_mode=1;
+			i_y_mode = (order==UCOL_EQUAL) ? 2 : 1;
 			ucol_setStrength(icu_collator, strgth);
 		}
 		if (i_y_mode==2) {

Modified: trunk/Build/source/texk/upmendex/kana.h
===================================================================
--- trunk/Build/source/texk/upmendex/kana.h	2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/kana.h	2022-10-08 14:19:02 UTC (rev 64650)
@@ -195,6 +195,7 @@
 	0x308d, /* ろ */
 	0
 };
+int kana_ye_mode=0;
 
 
 UChar GANADA[]={

Modified: trunk/Build/source/texk/upmendex/main.c
===================================================================
--- trunk/Build/source/texk/upmendex/main.c	2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/main.c	2022-10-08 14:19:02 UTC (rev 64650)
@@ -284,6 +284,7 @@
 		break;
 	}
 	if (u_strlen(kana_head)==0) u_strcpy(kana_head,atama);
+	init_icu_collator();
 
 /*   read idx file   */
 

Modified: trunk/Build/source/texk/upmendex/mendex.h
===================================================================
--- trunk/Build/source/texk/upmendex/mendex.h	2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/mendex.h	2022-10-08 14:19:02 UTC (rev 64650)
@@ -47,6 +47,7 @@
 int lastpage(const char *filename);
 
 /* sort.c */
+void init_icu_collator();
 void wsort(struct index *ind, int num);
 void pagesort(struct index *ind, int num);
 int is_latin(UChar *c);

Modified: trunk/Build/source/texk/upmendex/sort.c
===================================================================
--- trunk/Build/source/texk/upmendex/sort.c	2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/sort.c	2022-10-08 14:19:02 UTC (rev 64650)
@@ -28,15 +28,65 @@
 static int get_charset_juncture(UChar *str);
 static int unescape(const unsigned char *src, UChar *dist);
 
-/*   sort index   */
-void wsort(struct index *ind, int num)
+/*   init ICU collator   */
+void init_icu_collator()
 {
-	int i,order;
 	UErrorCode status;
 	UParseError parse_error;
 	UChar rules[RULEBUFSIZE] = {'\0'};
+	int i;
 	int32_t len;
 
+	status = U_ZERO_ERROR;
+	if (strlen(icu_rules)>0) {
+		if (strcmp(icu_locale,"root")!=0) {
+			icu_collator = ucol_open(icu_locale, &status);
+			if (U_FAILURE(status)) {
+				verb_printf(efp, "\n[ICU] Collator creation failed.: %s\n", u_errorName(status));
+				exit(254);
+			}
+			len = ucol_getRulesEx(icu_collator, UCOL_TAILORING_ONLY, rules, RULEBUFSIZE);
+			if (u_strlen(rules)<len) {
+				verb_printf(efp, "\n[ICU] Failed to extract collation rules by locale (%s). Need buffer size %d.\n",
+					icu_locale, len);
+				exit(254);
+			}
+			ucol_close(icu_collator);
+		}
+		unescape((unsigned char *)icu_rules, rules);
+		status = U_ZERO_ERROR;
+		icu_collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY, &parse_error, &status);
+	} else
+		icu_collator = ucol_open(icu_locale, &status);
+	if (U_FAILURE(status)) {
+		verb_printf(efp, "\n[ICU] Collator creation failed.: %s\n", u_errorName(status));
+		exit(254);
+	}
+	if (status == U_USING_DEFAULT_WARNING) {
+		warn_printf(efp, "\nWarning: [ICU] U_USING_DEFAULT_WARNING for locale %s\n",
+			    icu_locale);
+	}
+	if (status == U_USING_FALLBACK_WARNING) {
+		warn_printf(efp, "\nWarning: [ICU] U_USING_FALLBACK_WARNING for locale %s\n",
+			    icu_locale);
+	}
+	for (i=0;i<UCOL_ATTRIBUTE_COUNT;i++) {
+		if (icu_attributes[i]!=UCOL_DEFAULT) {
+			status = U_ZERO_ERROR;
+			ucol_setAttribute(icu_collator, i, icu_attributes[i], &status);
+		}
+		if (U_FAILURE(status)) {
+			warn_printf(efp, "\nWarning: [ICU] Failed to set attribute (%d): %s\n",
+				    i, u_errorName(status));
+		}
+	}
+}
+
+/*   sort index   */
+void wsort(struct index *ind, int num)
+{
+	int i,order;
+
 	for (order=1,i=0;;i++) {
 		switch (character_order[i]) {
 		case '\0':
@@ -112,49 +162,6 @@
 	if (arab==0) arab=order++;
 	if (hbrw==0) hbrw=order++;
 
-	status = U_ZERO_ERROR;
-	if (strlen(icu_rules)>0) {
-		if (strcmp(icu_locale,"root")!=0) {
-			icu_collator = ucol_open(icu_locale, &status);
-			if (U_FAILURE(status)) {
-				verb_printf(efp, "\n[ICU] Collator creation failed.: %s\n", u_errorName(status));
-				exit(254);
-			}
-			len = ucol_getRulesEx(icu_collator, UCOL_TAILORING_ONLY, rules, RULEBUFSIZE);
-			if (u_strlen(rules)<len) {
-				verb_printf(efp, "\n[ICU] Failed to extract collation rules by locale (%s). Need buffer size %d.\n",
-					icu_locale, len);
-				exit(254);
-			}
-			ucol_close(icu_collator);
-		}
-		unescape((unsigned char *)icu_rules, rules);
-		status = U_ZERO_ERROR;
-		icu_collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY, &parse_error, &status);
-	} else
-		icu_collator = ucol_open(icu_locale, &status);
-	if (U_FAILURE(status)) {
-		verb_printf(efp, "\n[ICU] Collator creation failed.: %s\n", u_errorName(status));
-		exit(254);
-	}
-	if (status == U_USING_DEFAULT_WARNING) {
-		warn_printf(efp, "\nWarning: [ICU] U_USING_DEFAULT_WARNING for locale %s\n",
-			    icu_locale);
-	}
-	if (status == U_USING_FALLBACK_WARNING) {
-		warn_printf(efp, "\nWarning: [ICU] U_USING_FALLBACK_WARNING for locale %s\n",
-			    icu_locale);
-	}
-	for (i=0;i<UCOL_ATTRIBUTE_COUNT;i++) {
-		if (icu_attributes[i]!=UCOL_DEFAULT) {
-			status = U_ZERO_ERROR;
-			ucol_setAttribute(icu_collator, i, icu_attributes[i], &status);
-		}
-		if (U_FAILURE(status)) {
-			warn_printf(efp, "\nWarning: [ICU] Failed to set attribute (%d): %s\n",
-				    i, u_errorName(status));
-		}
-	}
 	qsort(ind,num,sizeof(struct index),wcomp);
 }
 
@@ -486,6 +493,23 @@
 		else if ((c32>=0x1B11F)                         /* HIRAGANA LETTER ARCHAIC WU */
 		                   && (c32<=0x1B122)) return 2; /* KATAKANA LETTER ARCHAIC WU */
 		else if ((c32==0x1F200))              return 2; /* SQUARE HIRAGANA HOKA */
+		else if (c32==0x1B001) {
+		/* check whether U+1B001 is HIRAGANA LETTER ARCHAIC YE or not.
+		                  It may be HENTAIGANA LETTER E-1              */
+			if (kana_ye_mode==0) {
+				UCollationResult order;
+				UCollationStrength strgth;
+				UChar strX[4],strZ[4];
+				strgth = ucol_getStrength(icu_collator);
+				ucol_setStrength(icu_collator, UCOL_PRIMARY);
+				strX[0] = 0xD82C; strX[1] = 0xDC01; strX[2] = L'\0'; /* U+1B001 */
+				strZ[0] = 0xD82C; strZ[1] = 0xDD21; strZ[2] = L'\0'; /* U+1B121 */
+				order = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
+				kana_ye_mode = (order==UCOL_EQUAL) ? 2 : 1;
+				ucol_setStrength(icu_collator, strgth);
+			}
+			if (kana_ye_mode==2) return 2;
+		}
 	}
 	return 0;
 		/* ICU 71.1 does not seem to support



More information about the tex-live-commits mailing list.