texlive[64650] Build/source/texk/upmendex: upmendex: ver1.05, Support
commits+takuji at tug.org
commits+takuji at tug.org
Sat Oct 8 16:19:03 CEST 2022
Revision: 64650
http://tug.org/svn/texlive?view=revision&revision=64650
Author: takuji
Date: 2022-10-08 16:19:02 +0200 (Sat, 08 Oct 2022)
Log Message:
-----------
upmendex: ver1.05, Support U+1B001 Hiragana Letter Archaic Ye
Modified Paths:
--------------
trunk/Build/source/texk/upmendex/ChangeLog
trunk/Build/source/texk/upmendex/configure
trunk/Build/source/texk/upmendex/configure.ac
trunk/Build/source/texk/upmendex/exkana.h
trunk/Build/source/texk/upmendex/fwrite.c
trunk/Build/source/texk/upmendex/kana.h
trunk/Build/source/texk/upmendex/main.c
trunk/Build/source/texk/upmendex/mendex.h
trunk/Build/source/texk/upmendex/sort.c
Modified: trunk/Build/source/texk/upmendex/ChangeLog
===================================================================
--- trunk/Build/source/texk/upmendex/ChangeLog 2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/ChangeLog 2022-10-08 14:19:02 UTC (rev 64650)
@@ -1,3 +1,13 @@
+2022-10-08 TANAKA Takuji <ttk at t-lab.opal.ne.jp>
+
+ * version 1.05 Stable version.
+ * configure.ac: Bump version.
+ * main.c, sort.c, fwrite.c, {,ex}kana.h, mendex.h:
+ Support U+1B001 Hiragana Letter Archaic Ye.
+ * fwrite.c:
+ Strict check for dotted/dotless I/i in Turkish.
+ Strict check for Thai reordering.
+
2022-09-17 TANAKA Takuji <ttk at t-lab.opal.ne.jp>
* fwrite.c:
Modified: trunk/Build/source/texk/upmendex/configure
===================================================================
--- trunk/Build/source/texk/upmendex/configure 2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/configure 2022-10-08 14:19:02 UTC (rev 64650)
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.71 for upmendex (TeX Live) 1.03.
+# Generated by GNU Autoconf 2.71 for upmendex (TeX Live) 1.05.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
@@ -626,8 +626,8 @@
# Identity of this package.
PACKAGE_NAME='upmendex (TeX Live)'
PACKAGE_TARNAME='upmendex--tex-live-'
-PACKAGE_VERSION='1.03'
-PACKAGE_STRING='upmendex (TeX Live) 1.03'
+PACKAGE_VERSION='1.05'
+PACKAGE_STRING='upmendex (TeX Live) 1.05'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1390,7 +1390,7 @@
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures upmendex (TeX Live) 1.03 to adapt to many kinds of systems.
+\`configure' configures upmendex (TeX Live) 1.05 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1462,7 +1462,7 @@
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of upmendex (TeX Live) 1.03:";;
+ short | recursive ) echo "Configuration of upmendex (TeX Live) 1.05:";;
esac
cat <<\_ACEOF
@@ -1587,7 +1587,7 @@
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-upmendex (TeX Live) configure 1.03
+upmendex (TeX Live) configure 1.05
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
@@ -2268,7 +2268,7 @@
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by upmendex (TeX Live) $as_me 1.03, which was
+It was created by upmendex (TeX Live) $as_me 1.05, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
@@ -8806,7 +8806,7 @@
# Define the identity of the package.
PACKAGE='upmendex--tex-live-'
- VERSION='1.03'
+ VERSION='1.05'
# Some tools Automake needs.
@@ -18942,7 +18942,7 @@
Report bugs to <bug-libtool at gnu.org>."
lt_cl_version="\
-upmendex (TeX Live) config.lt 1.03
+upmendex (TeX Live) config.lt 1.05
configured by $0, generated by GNU Autoconf 2.71.
Copyright (C) 2011 Free Software Foundation, Inc.
@@ -21114,7 +21114,7 @@
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by upmendex (TeX Live) $as_me 1.03, which was
+This file was extended by upmendex (TeX Live) $as_me 1.05, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -21182,7 +21182,7 @@
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
-upmendex (TeX Live) config.status 1.03
+upmendex (TeX Live) config.status 1.05
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"
Modified: trunk/Build/source/texk/upmendex/configure.ac
===================================================================
--- trunk/Build/source/texk/upmendex/configure.ac 2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/configure.ac 2022-10-08 14:19:02 UTC (rev 64650)
@@ -8,7 +8,7 @@
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
dnl
-AC_INIT([upmendex (TeX Live)],[1.03])
+AC_INIT([upmendex (TeX Live)],[1.05])
AC_PREREQ([2.71])
AC_CONFIG_SRCDIR([main.c])
AC_CONFIG_AUX_DIR([../../build-aux])
Modified: trunk/Build/source/texk/upmendex/exkana.h
===================================================================
--- trunk/Build/source/texk/upmendex/exkana.h 2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/exkana.h 2022-10-08 14:19:02 UTC (rev 64650)
@@ -4,6 +4,7 @@
extern UChar *aiueo;
extern UChar kanatable[];
extern UChar extkanatable[];
+extern int kana_ye_mode;
#define SPACE 0x3000 /* 全角スペース */
#define ALPHATOP 0xff01 /* ! */
Modified: trunk/Build/source/texk/upmendex/fwrite.c
===================================================================
--- trunk/Build/source/texk/upmendex/fwrite.c 2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/fwrite.c 2022-10-08 14:19:02 UTC (rev 64650)
@@ -86,7 +86,7 @@
olen=u_strToUpper(istr,INITIALLENGTH,istr,wclen,"",&perr);
} else if (mode==M_TO_LOWER) {
perr=U_ZERO_ERROR;
- olen=u_strToLower(istr,INITIALLENGTH,istr,wclen, istr[0]==0x130&&turkish_i?"tr":"", &perr);
+ olen=u_strToLower(istr,INITIALLENGTH,istr,wclen, istr[0]==0x130&&turkish_i==2?"tr":"", &perr);
} else if (mode==M_TO_TITLE) {
perr=U_ZERO_ERROR;
olen=u_strToTitle(istr,INITIALLENGTH,istr,wclen,NULL,"",&perr);
@@ -828,7 +828,7 @@
break;
case 0x1B120: /* Archaic YI 𛄠 */
ini[0]=0xD82C; ini[1]=0xDD20; ini[2]=L'\0'; break;
- case 0x1B121: /* Archaic YE 𛄡 */
+ case 0x1B121: case 0x1B001: /* Archaic YE 𛄡 𛀁 */
ini[0]=0xD82C; ini[1]=0xDD21; ini[2]=L'\0'; break;
case 0x1B132: case 0x1B155:
ini[0]=0x3053; break; /* こ */
@@ -928,8 +928,9 @@
return;
}
else if (is_thai(&ch)) {
- if (istr[0]>=0x0E2F && (istr[1]>0x0E00 && istr[1]<0x0E2F)) {
- /* Vowel followed by Consonant */
+ if ((istr[0]>=0x0E40 && istr[0]<=0x0E44) && (istr[1]>=0x0E01 && istr[1]<=0x0E2E)) {
+ /* Thai reordering :: Vowel followed by Consonant */
+ /* https://unicode-org.github.io/icu/userguide/collation/concepts.html#thailao-reordering */
ini[0]=istr[1];
} else {
ini[0]=istr[0];
@@ -974,12 +975,17 @@
}
if (ch==0x049||ch==0x069||ch==0x130||ch==0x131||ch==0x0CE||ch==0x0EE) {
/* check dotted/dotless İ,I,i,ı and Î,î for Turkish */
- strX[0] = 0x131; strX[1] = 0x5A; strX[2] = 0x00; /* ıZ */
- strZ[0] = 0x069; strZ[1] = 0x00; /* i */
- order = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
- if (order==UCOL_GREATER) {
+ if (turkish_i==0) {
+ strgth = ucol_getStrength(icu_collator);
+ ucol_setStrength(icu_collator, UCOL_SECONDARY);
+ strX[0] = 0x131; strX[1] = 0x069; strX[2] = 0x00; /* ıi */
+ strZ[0] = 0x049; strZ[1] = 0x130; strZ[2] = 0x00; /* Iİ */
+ order = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
+ turkish_i = (order==UCOL_EQUAL) ? 2 : 1;
+ ucol_setStrength(icu_collator, strgth);
+ }
+ if (turkish_i==2) {
ini[0] = (ch==0x049||ch==0x131) ? 0x131 : 0x130; /* ı or İ */
- turkish_i=1;
return;
}
}
@@ -991,7 +997,7 @@
strX[0] = 0x059; strX[1] = 0x00; /* Y */
strZ[0] = 0x049; strZ[1] = 0x00; /* I */
order = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
- if (order==UCOL_EQUAL) i_y_mode=2; else i_y_mode=1;
+ i_y_mode = (order==UCOL_EQUAL) ? 2 : 1;
ucol_setStrength(icu_collator, strgth);
}
if (i_y_mode==2) {
Modified: trunk/Build/source/texk/upmendex/kana.h
===================================================================
--- trunk/Build/source/texk/upmendex/kana.h 2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/kana.h 2022-10-08 14:19:02 UTC (rev 64650)
@@ -195,6 +195,7 @@
0x308d, /* ろ */
0
};
+int kana_ye_mode=0;
UChar GANADA[]={
Modified: trunk/Build/source/texk/upmendex/main.c
===================================================================
--- trunk/Build/source/texk/upmendex/main.c 2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/main.c 2022-10-08 14:19:02 UTC (rev 64650)
@@ -284,6 +284,7 @@
break;
}
if (u_strlen(kana_head)==0) u_strcpy(kana_head,atama);
+ init_icu_collator();
/* read idx file */
Modified: trunk/Build/source/texk/upmendex/mendex.h
===================================================================
--- trunk/Build/source/texk/upmendex/mendex.h 2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/mendex.h 2022-10-08 14:19:02 UTC (rev 64650)
@@ -47,6 +47,7 @@
int lastpage(const char *filename);
/* sort.c */
+void init_icu_collator();
void wsort(struct index *ind, int num);
void pagesort(struct index *ind, int num);
int is_latin(UChar *c);
Modified: trunk/Build/source/texk/upmendex/sort.c
===================================================================
--- trunk/Build/source/texk/upmendex/sort.c 2022-10-07 21:05:45 UTC (rev 64649)
+++ trunk/Build/source/texk/upmendex/sort.c 2022-10-08 14:19:02 UTC (rev 64650)
@@ -28,15 +28,65 @@
static int get_charset_juncture(UChar *str);
static int unescape(const unsigned char *src, UChar *dist);
-/* sort index */
-void wsort(struct index *ind, int num)
+/* init ICU collator */
+void init_icu_collator()
{
- int i,order;
UErrorCode status;
UParseError parse_error;
UChar rules[RULEBUFSIZE] = {'\0'};
+ int i;
int32_t len;
+ status = U_ZERO_ERROR;
+ if (strlen(icu_rules)>0) {
+ if (strcmp(icu_locale,"root")!=0) {
+ icu_collator = ucol_open(icu_locale, &status);
+ if (U_FAILURE(status)) {
+ verb_printf(efp, "\n[ICU] Collator creation failed.: %s\n", u_errorName(status));
+ exit(254);
+ }
+ len = ucol_getRulesEx(icu_collator, UCOL_TAILORING_ONLY, rules, RULEBUFSIZE);
+ if (u_strlen(rules)<len) {
+ verb_printf(efp, "\n[ICU] Failed to extract collation rules by locale (%s). Need buffer size %d.\n",
+ icu_locale, len);
+ exit(254);
+ }
+ ucol_close(icu_collator);
+ }
+ unescape((unsigned char *)icu_rules, rules);
+ status = U_ZERO_ERROR;
+ icu_collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY, &parse_error, &status);
+ } else
+ icu_collator = ucol_open(icu_locale, &status);
+ if (U_FAILURE(status)) {
+ verb_printf(efp, "\n[ICU] Collator creation failed.: %s\n", u_errorName(status));
+ exit(254);
+ }
+ if (status == U_USING_DEFAULT_WARNING) {
+ warn_printf(efp, "\nWarning: [ICU] U_USING_DEFAULT_WARNING for locale %s\n",
+ icu_locale);
+ }
+ if (status == U_USING_FALLBACK_WARNING) {
+ warn_printf(efp, "\nWarning: [ICU] U_USING_FALLBACK_WARNING for locale %s\n",
+ icu_locale);
+ }
+ for (i=0;i<UCOL_ATTRIBUTE_COUNT;i++) {
+ if (icu_attributes[i]!=UCOL_DEFAULT) {
+ status = U_ZERO_ERROR;
+ ucol_setAttribute(icu_collator, i, icu_attributes[i], &status);
+ }
+ if (U_FAILURE(status)) {
+ warn_printf(efp, "\nWarning: [ICU] Failed to set attribute (%d): %s\n",
+ i, u_errorName(status));
+ }
+ }
+}
+
+/* sort index */
+void wsort(struct index *ind, int num)
+{
+ int i,order;
+
for (order=1,i=0;;i++) {
switch (character_order[i]) {
case '\0':
@@ -112,49 +162,6 @@
if (arab==0) arab=order++;
if (hbrw==0) hbrw=order++;
- status = U_ZERO_ERROR;
- if (strlen(icu_rules)>0) {
- if (strcmp(icu_locale,"root")!=0) {
- icu_collator = ucol_open(icu_locale, &status);
- if (U_FAILURE(status)) {
- verb_printf(efp, "\n[ICU] Collator creation failed.: %s\n", u_errorName(status));
- exit(254);
- }
- len = ucol_getRulesEx(icu_collator, UCOL_TAILORING_ONLY, rules, RULEBUFSIZE);
- if (u_strlen(rules)<len) {
- verb_printf(efp, "\n[ICU] Failed to extract collation rules by locale (%s). Need buffer size %d.\n",
- icu_locale, len);
- exit(254);
- }
- ucol_close(icu_collator);
- }
- unescape((unsigned char *)icu_rules, rules);
- status = U_ZERO_ERROR;
- icu_collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY, &parse_error, &status);
- } else
- icu_collator = ucol_open(icu_locale, &status);
- if (U_FAILURE(status)) {
- verb_printf(efp, "\n[ICU] Collator creation failed.: %s\n", u_errorName(status));
- exit(254);
- }
- if (status == U_USING_DEFAULT_WARNING) {
- warn_printf(efp, "\nWarning: [ICU] U_USING_DEFAULT_WARNING for locale %s\n",
- icu_locale);
- }
- if (status == U_USING_FALLBACK_WARNING) {
- warn_printf(efp, "\nWarning: [ICU] U_USING_FALLBACK_WARNING for locale %s\n",
- icu_locale);
- }
- for (i=0;i<UCOL_ATTRIBUTE_COUNT;i++) {
- if (icu_attributes[i]!=UCOL_DEFAULT) {
- status = U_ZERO_ERROR;
- ucol_setAttribute(icu_collator, i, icu_attributes[i], &status);
- }
- if (U_FAILURE(status)) {
- warn_printf(efp, "\nWarning: [ICU] Failed to set attribute (%d): %s\n",
- i, u_errorName(status));
- }
- }
qsort(ind,num,sizeof(struct index),wcomp);
}
@@ -486,6 +493,23 @@
else if ((c32>=0x1B11F) /* HIRAGANA LETTER ARCHAIC WU */
&& (c32<=0x1B122)) return 2; /* KATAKANA LETTER ARCHAIC WU */
else if ((c32==0x1F200)) return 2; /* SQUARE HIRAGANA HOKA */
+ else if (c32==0x1B001) {
+ /* check whether U+1B001 is HIRAGANA LETTER ARCHAIC YE or not.
+ It may be HENTAIGANA LETTER E-1 */
+ if (kana_ye_mode==0) {
+ UCollationResult order;
+ UCollationStrength strgth;
+ UChar strX[4],strZ[4];
+ strgth = ucol_getStrength(icu_collator);
+ ucol_setStrength(icu_collator, UCOL_PRIMARY);
+ strX[0] = 0xD82C; strX[1] = 0xDC01; strX[2] = L'\0'; /* U+1B001 */
+ strZ[0] = 0xD82C; strZ[1] = 0xDD21; strZ[2] = L'\0'; /* U+1B121 */
+ order = ucol_strcoll(icu_collator, strZ, -1, strX, -1);
+ kana_ye_mode = (order==UCOL_EQUAL) ? 2 : 1;
+ ucol_setStrength(icu_collator, strgth);
+ }
+ if (kana_ye_mode==2) return 2;
+ }
}
return 0;
/* ICU 71.1 does not seem to support
More information about the tex-live-commits
mailing list.