texlive[60549] Build/source/texk/upmendex: upmendex: modify heuristic
commits+takuji at tug.org
commits+takuji at tug.org
Sun Sep 19 10:57:18 CEST 2021
Revision: 60549
http://tug.org/svn/texlive?view=revision&revision=60549
Author: takuji
Date: 2021-09-19 10:57:18 +0200 (Sun, 19 Sep 2021)
Log Message:
-----------
upmendex: modify heuristic detection of page number types
Modified Paths:
--------------
trunk/Build/source/texk/upmendex/ChangeLog
trunk/Build/source/texk/upmendex/fread.c
trunk/Build/source/texk/upmendex/tests/range.idx
trunk/Build/source/texk/upmendex/tests/range1.ind
trunk/Build/source/texk/upmendex/tests/range2.ind
trunk/Build/source/texk/upmendex/tests/range3.ind
trunk/Build/source/texk/upmendex/tests/upmendex.test
Added Paths:
-----------
trunk/Build/source/texk/upmendex/tests/pprec3.ist
trunk/Build/source/texk/upmendex/tests/pprec4.ist
trunk/Build/source/texk/upmendex/tests/pprecB-0.ind
trunk/Build/source/texk/upmendex/tests/pprecB-3.ind
trunk/Build/source/texk/upmendex/tests/pprecB-4.ind
trunk/Build/source/texk/upmendex/tests/pprecB.idx
Modified: trunk/Build/source/texk/upmendex/ChangeLog
===================================================================
--- trunk/Build/source/texk/upmendex/ChangeLog 2021-09-19 08:50:52 UTC (rev 60548)
+++ trunk/Build/source/texk/upmendex/ChangeLog 2021-09-19 08:57:18 UTC (rev 60549)
@@ -1,3 +1,11 @@
+2021-09-19 TANAKA Takuji <ttk at t-lab.opal.ne.jp>
+
+ * fread.c:
+ Modify heuristic detection of page number types.
+ * tests/upmendex.test, tests/range.idx, tests/range[123].ind,
+ tests/pprecB.idx, tests/pprec[34].ist, tests/pprecB-[034].ind:
+ Update tests.
+
2021-09-18 TANAKA Takuji <ttk at t-lab.opal.ne.jp>
* fread.c, fwrite.c, sort.c, mendex.h, {,ex}var.h:
Modified: trunk/Build/source/texk/upmendex/fread.c
===================================================================
--- trunk/Build/source/texk/upmendex/fread.c 2021-09-19 08:50:52 UTC (rev 60548)
+++ trunk/Build/source/texk/upmendex/fread.c 2021-09-19 08:57:18 UTC (rev 60549)
@@ -344,7 +344,7 @@
copy_multibyte_char(buff, table, &j, &k);
}
- table[k]='\0';
+ table[k]='\0';
for (k=0;k<=ind[l].num;k++) {
if (strcmp(ind[l].p[k].page,table)==0) {
@@ -356,9 +356,9 @@
ind[l].num++;
if (!((ind[l].num)%16)) ind[l].p=(struct page *)xrealloc(ind[l].p,sizeof(struct page)*((int)((ind[l].num)/16)+1)*16);
- ind[l].p[ind[l].num].page=xstrdup(table);
+ ind[l].p[ind[l].num].page=xstrdup(table);
- ind[l].p[ind[l].num].enc=xstrdup(estr);
+ ind[l].p[ind[l].num].enc=xstrdup(estr);
chkpageattr(&ind[l].p[ind[l].num]);
}
}
@@ -388,8 +388,8 @@
nest++;
if (buff[j]==arg_close) {
if (nest==0) {
- table[k]='\0';
- ind[i].p[0].page=xstrdup(table);
+ table[k]='\0';
+ ind[i].p[0].page=xstrdup(table);
break;
}
else nest--;
@@ -396,7 +396,7 @@
}
copy_multibyte_char(buff, table, &j, &k);
}
- ind[l].p[0].enc=xstrdup(estr);
+ ind[l].p[0].enc=xstrdup(estr);
chkpageattr(&ind[i].p[0]);
}
}
@@ -538,7 +538,7 @@
switch(page_precedence[pattr[cc]]) {
case 'r':
if (strchr("ivxlcdm",*page0)==NULL ||
- (strchr("lcdm",*page0) && strchr(page_precedence,'a') && strlen(buff)==1 && pcpos)) {
+ (strchr("lcdm",*page0) && strchr(page_precedence,'a') && strlen(buff)==1 && pcpos)) {
/* heuristic detection as alphabet since L=50, C=100, D=100, M=1000 are quite large */
if (pattr[cc]<pplen-1)
pattr[cc]++;
@@ -549,7 +549,7 @@
break;
case 'R':
if (strchr("IVXLCDM",*page0)==NULL ||
- (strchr("LCDM",*page0) && strchr(page_precedence,'A') && strlen(buff)==1 && pcpos)) {
+ (strchr("LCDM",*page0) && strchr(page_precedence,'A') && strlen(buff)==1 && pcpos)) {
/* heuristic detection as alphabet since L=50, C=100, D=100, M=1000 are quite large */
if (pattr[cc]<pplen-1)
pattr[cc]++;
@@ -568,7 +568,9 @@
}
break;
case 'a':
- if (*page0<'a' || *page0>'z' || strlen(buff)>1) {
+ if (*page0<'a' || *page0>'z' || strlen(buff)>1 ||
+ (strchr("ivx",*page0) && strchr(page_precedence,'r') && !pcpos)) {
+ /* heuristic detection as roman number since I=1, V=5, X=10 are quite small */
if (pattr[cc]<pplen-1)
pattr[cc]++;
else pattr[cc]=0;
@@ -577,7 +579,9 @@
}
break;
case 'A':
- if (*page0<'A' || *page0>'Z' || strlen(buff)>1) {
+ if (*page0<'A' || *page0>'Z' || strlen(buff)>1 ||
+ (strchr("IVX",*page0) && strchr(page_precedence,'R') && !pcpos)) {
+ /* heuristic detection as roman number since I=1, V=5, X=10 are quite small */
if (pattr[cc]<pplen-1)
pattr[cc]++;
else pattr[cc]=0;
Added: trunk/Build/source/texk/upmendex/tests/pprec3.ist
===================================================================
--- trunk/Build/source/texk/upmendex/tests/pprec3.ist (rev 0)
+++ trunk/Build/source/texk/upmendex/tests/pprec3.ist 2021-09-19 08:57:18 UTC (rev 60549)
@@ -0,0 +1,11 @@
+% -*- coding: utf-8 -*-
+
+page_precedence "naA"
+
+delim_0 "[[delim0]]"
+delim_1 "[[delim1]]"
+delim_2 "[[delim2]]"
+delim_r "[[delimr]]"
+suffix_2p "[[sfx 2p]]"
+suffix_3p "[[sfx 3p]]"
+
Added: trunk/Build/source/texk/upmendex/tests/pprec4.ist
===================================================================
--- trunk/Build/source/texk/upmendex/tests/pprec4.ist (rev 0)
+++ trunk/Build/source/texk/upmendex/tests/pprec4.ist 2021-09-19 08:57:18 UTC (rev 60549)
@@ -0,0 +1,11 @@
+% -*- coding: utf-8 -*-
+
+page_precedence "Ana"
+
+delim_0 "[[delim0]]"
+delim_1 "[[delim1]]"
+delim_2 "[[delim2]]"
+delim_r "[[delimr]]"
+suffix_2p "[[sfx 2p]]"
+suffix_3p "[[sfx 3p]]"
+
Added: trunk/Build/source/texk/upmendex/tests/pprecB-0.ind
===================================================================
--- trunk/Build/source/texk/upmendex/tests/pprecB-0.ind (rev 0)
+++ trunk/Build/source/texk/upmendex/tests/pprecB-0.ind 2021-09-19 08:57:18 UTC (rev 60549)
@@ -0,0 +1,5 @@
+\begin{theindex}
+
+ \item entryA[[delim0]]1[[sfx 3p]], a[[sfx 3p]], A[[sfx 3p]]
+
+\end{theindex}
Added: trunk/Build/source/texk/upmendex/tests/pprecB-3.ind
===================================================================
--- trunk/Build/source/texk/upmendex/tests/pprecB-3.ind (rev 0)
+++ trunk/Build/source/texk/upmendex/tests/pprecB-3.ind 2021-09-19 08:57:18 UTC (rev 60549)
@@ -0,0 +1,5 @@
+\begin{theindex}
+
+ \item entryA[[delim0]]1[[sfx 3p]], a[[sfx 3p]], A[[sfx 3p]]
+
+\end{theindex}
Added: trunk/Build/source/texk/upmendex/tests/pprecB-4.ind
===================================================================
--- trunk/Build/source/texk/upmendex/tests/pprecB-4.ind (rev 0)
+++ trunk/Build/source/texk/upmendex/tests/pprecB-4.ind 2021-09-19 08:57:18 UTC (rev 60549)
@@ -0,0 +1,5 @@
+\begin{theindex}
+
+ \item entryA[[delim0]]A[[sfx 3p]], 1[[sfx 3p]], a[[sfx 3p]]
+
+\end{theindex}
Added: trunk/Build/source/texk/upmendex/tests/pprecB.idx
===================================================================
--- trunk/Build/source/texk/upmendex/tests/pprecB.idx (rev 0)
+++ trunk/Build/source/texk/upmendex/tests/pprecB.idx 2021-09-19 08:57:18 UTC (rev 60549)
@@ -0,0 +1,9 @@
+\indexentry{entryA}{a}
+\indexentry{entryA}{b}
+\indexentry{entryA}{c}
+\indexentry{entryA}{1}
+\indexentry{entryA}{2}
+\indexentry{entryA}{3}
+\indexentry{entryA}{A}
+\indexentry{entryA}{B}
+\indexentry{entryA}{C}
Modified: trunk/Build/source/texk/upmendex/tests/range.idx
===================================================================
--- trunk/Build/source/texk/upmendex/tests/range.idx 2021-09-19 08:50:52 UTC (rev 60548)
+++ trunk/Build/source/texk/upmendex/tests/range.idx 2021-09-19 08:57:18 UTC (rev 60549)
@@ -58,13 +58,13 @@
\indexentry{entryF}{D-DIII}
\indexentry{entryF}{D-DIV}
-\indexentry{entryP}{A-a-1-I-iii}
-\indexentry{entryP}{A-a-2-I-iii}
-\indexentry{entryP}{A-a-3-I-iii}
-\indexentry{entryP}{A-a-1-II-iii}
-\indexentry{entryP}{A-a-1-III-iii}
-\indexentry{entryP}{A-a-1-I-iv}
-\indexentry{entryP}{A-a-1-I-v}
+\indexentry{entryP}{A-a-1-VII-iii}
+\indexentry{entryP}{A-a-2-VII-iii}
+\indexentry{entryP}{A-a-3-VII-iii}
+\indexentry{entryP}{A-a-1-VIII-iii}
+\indexentry{entryP}{A-a-1-IX-iii}
+\indexentry{entryP}{A-a-1-VII-iv}
+\indexentry{entryP}{A-a-1-VII-v}
\indexentry{entryQ}{1-2-3-4-5-6-7-8-9-10}
\indexentry{entryQ}{1-2-3-4-5-6-7-8-9-11}
Modified: trunk/Build/source/texk/upmendex/tests/range1.ind
===================================================================
--- trunk/Build/source/texk/upmendex/tests/range1.ind 2021-09-19 08:50:52 UTC (rev 60548)
+++ trunk/Build/source/texk/upmendex/tests/range1.ind 2021-09-19 08:57:18 UTC (rev 60549)
@@ -15,8 +15,8 @@
\item entryF, C-I[[sfx mp]], D-DI[[sfx mp]], V-LIII[[sfx mp]]
\item entryF1, C-I[[sfx mp]], \bold{D-DII}, D-DI[[sfx mp]],
V-LIII[[sfx mp]]
- \item entryP, A-a-1-I-iii[[sfx mp]], A-a-1-II-iii, A-a-1-III-iii,
- A-a-2-I-iii, A-a-3-I-iii
+ \item entryP, A-a-1-VII-iii[[sfx mp]], A-a-1-VIII-iii, A-a-1-IX-iii,
+ A-a-2-VII-iii, A-a-3-VII-iii
\item entryQ, 1-2-3-4-5-6-7-8-9-10[[sfx 2p]],
1-2-3-4-5-6-7-8-9-14[[sfx mp]],
1-2-3-4-5-6-7-8-9-22[[sfx mp]],
Modified: trunk/Build/source/texk/upmendex/tests/range2.ind
===================================================================
--- trunk/Build/source/texk/upmendex/tests/range2.ind 2021-09-19 08:50:52 UTC (rev 60548)
+++ trunk/Build/source/texk/upmendex/tests/range2.ind 2021-09-19 08:57:18 UTC (rev 60549)
@@ -16,8 +16,8 @@
V-LIII[[delim r]]V-LVI
\item entryF1, C-I[[delim r]]C-IV, \bold{D-DII}, D-DI[[delim r]]D-DIV,
V-LIII[[delim r]]V-LVI
- \item entryP, A-a-1-I-iii[[sfx 3p]], A-a-1-II-iii, A-a-1-III-iii,
- A-a-2-I-iii, A-a-3-I-iii
+ \item entryP, A-a-1-VII-iii[[sfx 3p]], A-a-1-VIII-iii, A-a-1-IX-iii,
+ A-a-2-VII-iii, A-a-3-VII-iii
\item entryQ, 1-2-3-4-5-6-7-8-9-10[[sfx 2p]],
1-2-3-4-5-6-7-8-9-14[[sfx 3p]],
1-2-3-4-5-6-7-8-9-22[[delim r]]1-2-3-4-5-6-7-8-9-25,
Modified: trunk/Build/source/texk/upmendex/tests/range3.ind
===================================================================
--- trunk/Build/source/texk/upmendex/tests/range3.ind 2021-09-19 08:50:52 UTC (rev 60548)
+++ trunk/Build/source/texk/upmendex/tests/range3.ind 2021-09-19 08:57:18 UTC (rev 60549)
@@ -15,8 +15,8 @@
\item entryF, C-I[[sfx mp]], D-DI[[sfx mp]], V-LIII[[sfx mp]]
\item entryF1, C-I[[sfx mp]], \bold{D-DII}, D-DI[[sfx mp]],
V-LIII[[sfx mp]]
- \item entryP, A-a-1-I-iii[[sfx 3p]], A-a-1-II-iii, A-a-1-III-iii,
- A-a-2-I-iii, A-a-3-I-iii
+ \item entryP, A-a-1-VII-iii[[sfx 3p]], A-a-1-VIII-iii, A-a-1-IX-iii,
+ A-a-2-VII-iii, A-a-3-VII-iii
\item entryQ, 1-2-3-4-5-6-7-8-9-10[[sfx 2p]],
1-2-3-4-5-6-7-8-9-14[[sfx 3p]],
1-2-3-4-5-6-7-8-9-22[[sfx mp]],
Modified: trunk/Build/source/texk/upmendex/tests/upmendex.test
===================================================================
--- trunk/Build/source/texk/upmendex/tests/upmendex.test 2021-09-19 08:50:52 UTC (rev 60548)
+++ trunk/Build/source/texk/upmendex/tests/upmendex.test 2021-09-19 08:57:18 UTC (rev 60549)
@@ -7,7 +7,7 @@
TEXMFCNF=$srcdir/../kpathsea
export TEXMFCNF
-rm -f foo.* uni.* pprec*.* range*.* locale*.* attr*.* rule*.*
+rm -f foo.* uni.* range*.* pprec*.* locale*.* attr*.* rule*.*
./upmendex $srcdir/tests/foo.idx -o foo.ind1 -t foo.ilg1 \
&& diff $srcdir/tests/foo.ind foo.ind1 || exit 1
@@ -54,6 +54,18 @@
-o pprecA-2.ind1 -t pprecA-2.ilg \
&& diff $srcdir/tests/pprecA-2.ind pprecA-2.ind1 || exit 1
+./upmendex -s $srcdir/tests/pprec0.ist $srcdir/tests/pprecB.idx \
+ -o pprecB-0.ind1 -t pprecB-0.ilg \
+ && diff $srcdir/tests/pprecB-0.ind pprecB-0.ind1 || exit 1
+
+./upmendex -s $srcdir/tests/pprec3.ist $srcdir/tests/pprecB.idx \
+ -o pprecB-3.ind1 -t pprecB-3.ilg \
+ && diff $srcdir/tests/pprecB-3.ind pprecB-3.ind1 || exit 1
+
+./upmendex -s $srcdir/tests/pprec4.ist $srcdir/tests/pprecB.idx \
+ -o pprecB-4.ind1 -t pprecB-4.ilg \
+ && diff $srcdir/tests/pprecB-4.ind pprecB-4.ind1 || exit 1
+
# test for icu_locale
./upmendex -s $srcdir/tests/uni.ist -s $srcdir/tests/locale1.ist $srcdir/tests/rule.idx \
-o locale1.ind1 -t locale1.ilg \
More information about the tex-live-commits
mailing list.