texlive[72394] Build/source/texk/web2c/uptexdir: upTeX, upbibtex:

commits+takuji at tug.org commits+takuji at tug.org
Sat Sep 28 16:21:29 CEST 2024


Revision: 72394
          https://tug.org/svn/texlive?view=revision&revision=72394
Author:   takuji
Date:     2024-09-28 16:21:29 +0200 (Sat, 28 Sep 2024)
Log Message:
-----------
upTeX, upbibtex: Unicode 16.0

Modified Paths:
--------------
    trunk/Build/source/texk/web2c/uptexdir/ChangeLog
    trunk/Build/source/texk/web2c/uptexdir/kanji.c
    trunk/Build/source/texk/web2c/uptexdir/tests/unibib.bbl
    trunk/Build/source/texk/web2c/uptexdir/tests/unibib.bib
    trunk/Build/source/texk/web2c/uptexdir/tests/unibib.tex
    trunk/Build/source/texk/web2c/uptexdir/upbibtex.ch
    trunk/Build/source/texk/web2c/uptexdir/uptex-m.ch

Modified: trunk/Build/source/texk/web2c/uptexdir/ChangeLog
===================================================================
--- trunk/Build/source/texk/web2c/uptexdir/ChangeLog	2024-09-28 14:21:24 UTC (rev 72393)
+++ trunk/Build/source/texk/web2c/uptexdir/ChangeLog	2024-09-28 14:21:29 UTC (rev 72394)
@@ -1,3 +1,13 @@
+2024-09-28  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
+
+	* uptex-m.ch, upbibtex.ch, kanji.c:
+	Update reference of Unicode blocks to
+	"Blocks-16.0.0.txt Date: 2024-02-22".
+	Also add a new block
+	"CJK Unified Ideographs Extension J"
+	which is approved in Unicode 17.0, 2025.
+	* tests/unibib.{tex,bib,bbl}: Update a test.
+
 2024-09-22  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
 
 	* ptex-base.ch, ptex_version.h: Version p4.1.2.

Modified: trunk/Build/source/texk/web2c/uptexdir/kanji.c
===================================================================
--- trunk/Build/source/texk/web2c/uptexdir/kanji.c	2024-09-28 14:21:24 UTC (rev 72393)
+++ trunk/Build/source/texk/web2c/uptexdir/kanji.c	2024-09-28 14:21:29 UTC (rev 72394)
@@ -91,8 +91,8 @@
 }
 
 /* Ref. http://www.unicode.org/Public/UNIDATA/Blocks.txt */
-/* # Blocks-15.1.0.txt                                   */
-/* # Date: 2023-07-28, 15:47:20 GMT                      */
+/* # Blocks-16.0.0.txt                                   */
+/* # Date: 2024-02-22                                    */
 static long ucs_range[]={
       0x0000, /* Basic Latin					     */ /* 0x00 */
       0x0080, /* Latin-1 Supplement				     */
@@ -279,6 +279,7 @@
       0x10500, /* Elbasan					     */
       0x10530, /* Caucasian Albanian				     */
       0x10570, /* Vithkuqi					     */
+      0x105C0, /* Todhri					     */
       0x10600, /* Linear A					     */
       0x10780, /* Latin Extended-F				     */
       0x10800, /* Cypriot Syllabary				     */
@@ -285,8 +286,8 @@
       0x10840, /* Imperial Aramaic				     */
       0x10860, /* Palmyrene					     */
       0x10880, /* Nabataean					     */
-      0x108E0, /* Hatran					     */
-      0x10900, /* Phoenician					     */ /* 0xc0 */
+      0x108E0, /* Hatran					     */ /* 0xc0 */
+      0x10900, /* Phoenician					     */
       0x10920, /* Lydian					     */
       0x10980, /* Meroitic Hieroglyphs				     */
       0x109A0, /* Meroitic Cursive				     */
@@ -301,8 +302,9 @@
       0x10C00, /* Old Turkic					     */
       0x10C80, /* Old Hungarian				     */
       0x10D00, /* Hanifi Rohingya				     */
+      0x10D40, /* Garay					     */ /* 0xd0 */
       0x10E60, /* Rumi Numeral Symbols				     */
-      0x10E80, /* Yezidi					     */ /* 0xd0 */
+      0x10E80, /* Yezidi					     */
       0x10EC0, /* Arabic Extended-C				     */
       0x10F00, /* Old Sogdian					     */
       0x10F30, /* Sogdian					     */
@@ -316,10 +318,11 @@
       0x11150, /* Mahajani					     */
       0x11180, /* Sharada					     */
       0x111E0, /* Sinhala Archaic Numbers			     */
-      0x11200, /* Khojki					     */
+      0x11200, /* Khojki					     */ /* 0xe0 */
       0x11280, /* Multani					     */
-      0x112B0, /* Khudawadi					     */ /* 0xe0 */
+      0x112B0, /* Khudawadi					     */
       0x11300, /* Grantha					     */
+      0x11380, /* Tulu-Tigalaria				     */
       0x11400, /* Newa						     */
       0x11480, /* Tirhuta					     */
       0x11580, /* Siddham					     */
@@ -326,16 +329,18 @@
       0x11600, /* Modi						     */
       0x11660, /* Mongolian Supplement				     */
       0x11680, /* Takri					     */
+      0x116D0, /* Myanmar Extended-C				     */
       0x11700, /* Ahom						     */
       0x11800, /* Dogra					     */
       0x118A0, /* Warang Citi					     */
       0x11900, /* Dives Akuru					     */
-      0x119A0, /* Nandinagari					     */
+      0x119A0, /* Nandinagari					     */ /* 0xf0 */
       0x11A00, /* Zanabazar Square				     */
       0x11A50, /* Soyombo					     */
       0x11AB0, /* Unified Canadian Aboriginal Syllabics Extended-A   */
-      0x11AC0, /* Pau Cin Hau					     */ /* 0xf0 */
+      0x11AC0, /* Pau Cin Hau					     */
       0x11B00, /* Devanagari Extended-A			     */
+      0x11BC0, /* Sunuwar					     */
       0x11C00, /* Bhaiksuki					     */
       0x11C70, /* Marchen					     */
       0x11D00, /* Masaram Gondi				     */
@@ -345,20 +350,23 @@
       0x11FB0, /* Lisu Supplement				     */
       0x11FC0, /* Tamil Supplement				     */
       0x12000, /* Cuneiform					     */
-      0x12400, /* Cuneiform Numbers and Punctuation		     */
+      0x12400, /* Cuneiform Numbers and Punctuation		     */ /* 0x100 */
       0x12480, /* Early Dynastic Cuneiform			     */
       0x12F90, /* Cypro-Minoan					     */
       0x13000, /* Egyptian Hieroglyphs				     */
       0x13430, /* Egyptian Hieroglyph Format Controls		     */
-      0x14400, /* Anatolian Hieroglyphs			     */ /* 0x100 */
+      0x13460, /* Egyptian Hieroglyphs Extended-A		     */
+      0x14400, /* Anatolian Hieroglyphs			     */
+      0x16100, /* Gurung Khema					     */
       0x16800, /* Bamum Supplement				     */
       0x16A40, /* Mro						     */
       0x16A70, /* Tangsa					     */
       0x16AD0, /* Bassa Vah					     */
       0x16B00, /* Pahawh Hmong					     */
+      0x16D40, /* Kirat Rai					     */
       0x16E40, /* Medefaidrin					     */
       0x16F00, /* Miao						     */
-      0x16FE0, /* Ideographic Symbols and Punctuation		     */
+      0x16FE0, /* Ideographic Symbols and Punctuation		     */ /* 0x110 */
       0x17000, /* Tangut					     */
       0x18800, /* Tangut Components				     */
       0x18B00, /* Khitan Small Script				     */
@@ -366,14 +374,15 @@
       0x1AFF0, /* Kana Extended-B				     */
       0x1B000, /* Kana Supplement				     */
       0x1B100, /* Kana Extended-A				     */
-      0x1B130, /* Small Kana Extension				     */ /* 0x110 */
+      0x1B130, /* Small Kana Extension				     */
       0x1B170, /* Nushu					     */
       0x1BC00, /* Duployan					     */
       0x1BCA0, /* Shorthand Format Controls			     */
+      0x1CC00, /* Symbols for Legacy Computing Supplement	     */
       0x1CF00, /* Znamenny Musical Notation			     */
       0x1D000, /* Byzantine Musical Symbols			     */
       0x1D100, /* Musical Symbols				     */
-      0x1D200, /* Ancient Greek Musical Notation		     */
+      0x1D200, /* Ancient Greek Musical Notation		     */ /* 0x120 */
       0x1D2C0, /* Kaktovik Numerals				     */
       0x1D2E0, /* Mayan Numerals				     */
       0x1D300, /* Tai Xuan Jing Symbols			     */
@@ -382,13 +391,14 @@
       0x1D800, /* Sutton SignWriting				     */
       0x1DF00, /* Latin Extended-G				     */
       0x1E000, /* Glagolitic Supplement			     */
-      0x1E030, /* Cyrillic Extended-D				     */ /* 0x120 */
+      0x1E030, /* Cyrillic Extended-D				     */
       0x1E100, /* Nyiakeng Puachue Hmong			     */
       0x1E290, /* Toto						     */
       0x1E2C0, /* Wancho					     */
       0x1E4D0, /* Nag Mundari					     */
+      0x1E5D0, /* Ol Onal					     */
       0x1E7E0, /* Ethiopic Extended-B				     */
-      0x1E800, /* Mende Kikakui				     */
+      0x1E800, /* Mende Kikakui				     */ /* 0x130 */
       0x1E900, /* Adlam					     */
       0x1EC70, /* Indic Siyaq Numbers				     */
       0x1ED00, /* Ottoman Siyaq Numbers			     */
@@ -398,13 +408,13 @@
       0x1F0A0, /* Playing Cards				     */
       0x1F100, /* Enclosed Alphanumeric Supplement		     */
       0x1F200, /* Enclosed Ideographic Supplement		     */
-      0x1F300, /* Miscellaneous Symbols and Pictographs	     */ /* 0x130 */
+      0x1F300, /* Miscellaneous Symbols and Pictographs	     */
       0x1F600, /* Emoticons					     */
       0x1F650, /* Ornamental Dingbats				     */
       0x1F680, /* Transport and Map Symbols			     */
       0x1F700, /* Alchemical Symbols				     */
       0x1F780, /* Geometric Shapes Extended			     */
-      0x1F800, /* Supplemental Arrows-C			     */
+      0x1F800, /* Supplemental Arrows-C			     */ /* 0x140 */
       0x1F900, /* Supplemental Symbols and Pictographs		     */
       0x1FA00, /* Chess Symbols				     */
       0x1FA70, /* Symbols and Pictographs Extended-A		     */
@@ -414,12 +424,13 @@
       0x2B740, /* CJK Unified Ideographs Extension D		     */
       0x2B820, /* CJK Unified Ideographs Extension E		     */
       0x2CEB0, /* CJK Unified Ideographs Extension F		     */
-      0x2EBF0, /* CJK Unified Ideographs Extension I		     */ /* 0x140 */
+      0x2EBF0, /* CJK Unified Ideographs Extension I		     */
       0x2F800, /* CJK Compatibility Ideographs Supplement	     */
       0x30000, /* CJK Unified Ideographs Extension G		     */
       0x31350, /* CJK Unified Ideographs Extension H		     */
-      0x323B0, /* reserved					     */
-      0x40000, /* reserved					     */
+      0x323B0, /* CJK Unified Ideographs Extension J (Unicode 17.0)  */
+      0x33480, /* reserved					     */
+      0x40000, /* reserved					     */ /* 0x150 */
       0x50000, /* reserved					     */
       0x60000, /* reserved					     */
       0x70000, /* reserved					     */
@@ -430,14 +441,14 @@
       0xC0000, /* reserved					     */
       0xD0000, /* reserved					     */
       0xE0000, /* Tags						     */
-      0xE0100, /* Variation Selectors Supplement		     */ /* 0x150 */
+      0xE0100, /* Variation Selectors Supplement		     */
       0xE01F0, /* reserved					     */
       0xF0000, /* Supplementary Private Use Area-A		     */
       0x100000, /* Supplementary Private Use Area-B		     */
   /* Value over 0x10FFFF is illegal under Unicode,
-     They are for some special use.  *** experimental ***  */
+     We use for some special application.  */
       0x110000, /* Reserved					     */
-      0x120000, /* Reserved					     */
+      0x120000, /* Reserved					     */ /* 0x160 */
       0x130000, /* Reserved					     */
       0x140000, /* Reserved					     */
       0x150000, /* Reserved					     */
@@ -448,12 +459,12 @@
       0x1A0000, /* Reserved					     */
       0x1B0000, /* Reserved					     */
       0x1C0000, /* Reserved					     */
-      0x1D0000, /* Reserved					     */ /* 0x160 */
+      0x1D0000, /* Reserved					     */
       0x1E0000, /* Reserved					     */
       0x1F0000, /* Reserved					     */
       0x200000, /* Reserved					     */
       0x210000, /* Reserved					     */
-      0x220000, /* Kana with Voiced Sound Mark			     */
+      0x220000, /* Kana with Voiced Sound Mark			     */ /* 0x170 */
       0x240000, /* Kana with Semi-Voiced Sound Mark		     */
       0x25E6E6, /* Emoji Flag Sequence				     */
       0x260000, /* Emoji with Modifier Fitzpatrick		     */
@@ -460,8 +471,8 @@
       0x300000, /* Reserved					     */
       0x400000, /* Standardized Variation Sequence		     */
       0x800000, /* Emoji Keycap Sequence			     */
-      0x800080, /* Ideographic Variation Sequence		     */ /* 0x16C */
-      CJK_CHAR_LIMIT, /* Ideographic Variation Sequence, VS49..VS256 */
+      0x800080, /* Ideographic Variation Sequence, VS17..VS48	     */
+      CJK_CHAR_LIMIT, /* Ideographic Variation Sequence, VS49..VS256 */ /* 0x178 */
       IVS_CHAR_LIMIT
 };
 
@@ -544,11 +555,11 @@
             if (   COMBINING_ENCLOSING_KEYCAP==c  )
             return 0x1FA;
             break;
-        case 0x12e:        /* Block : Enclosed Alphanumeric Supplement */
+        case 0x138:        /* Block : Enclosed Alphanumeric Supplement */
             if (   REGIONAL_INDICATOR_SYMBOL_LETTER_A <=c && c<= REGIONAL_INDICATOR_SYMBOL_LETTER_Z  )
             return 0x1FB;
             break;
-        case 0x130:        /* Block : Miscellaneous Symbols and Pictographs */
+        case 0x13a:        /* Block : Miscellaneous Symbols and Pictographs */
             if (   EMOJI_MODIFIER_FITZPATRIC_TYPE1_2 <=c && c<= EMOJI_MODIFIER_FITZPATRIC_TYPE6  )
             return 0x1FC;
             break;

Modified: trunk/Build/source/texk/web2c/uptexdir/tests/unibib.bbl
===================================================================
--- trunk/Build/source/texk/web2c/uptexdir/tests/unibib.bbl	2024-09-28 14:21:24 UTC (rev 72393)
+++ trunk/Build/source/texk/web2c/uptexdir/tests/unibib.bbl	2024-09-28 14:21:29 UTC (rev 72394)
@@ -1,4 +1,4 @@
-\begin{thebibliography}{1}
+\begin{thebibliography}{10}
 
 \bibitem{björk2020}
 Tomas Björk.
@@ -22,6 +22,11 @@
 \newblock 구운몽.
 \newblock 高麗書林, 1975.
 
+\bibitem{imahashi}
+𫝆𫞎姬, 𡈽𪧦, 𠮷田真.
+\newblock CJK統合漢字拡張🄑--🄓のテスト --- その🄰.
+\newblock 冬𠘨𫝷, 2024.
+
 \bibitem{nikos}
 Νίκος Καζαντζάκης.
 \newblock {\em Συμπόσιον}.

Modified: trunk/Build/source/texk/web2c/uptexdir/tests/unibib.bib
===================================================================
--- trunk/Build/source/texk/web2c/uptexdir/tests/unibib.bib	2024-09-28 14:21:24 UTC (rev 72393)
+++ trunk/Build/source/texk/web2c/uptexdir/tests/unibib.bib	2024-09-28 14:21:29 UTC (rev 72394)
@@ -79,3 +79,25 @@
   publisher = "Snowman commedian Press",
   year      = 2020,
 }
+
+%% CJK統合漢字拡張B-Dのテスト
+ at book{imahashi,
+  author    = "𫝆𫞎 姬 and 𡈽 𪧦 and 𠮷田 真",
+  yomi      = "Hime Imahashi",
+  title     = "CJK統合漢字拡張🄑--🄓のテスト --- その🄰",
+  publisher = "冬𠘨𫝷",
+  year      = 2024,
+}
+% 𫝆 U+2B746  CJK Unified Ideographs Extension D      AJ:13780
+% 𫞎 U+2B78E  CJK Unified Ideographs Extension D      AJ:13724
+% 姬 U+2F862  CJK Compatibility Ideographs Supplement AJ:13998
+% 𡈽 U+2123D  CJK Unified Ideographs Extension B      AJ:13953 J3-2F42
+% 𪧦 U+2A9E6  CJK Unified Ideographs Extension C      AJ:14145
+% 𠮷 U+20BB7  CJK Unified Ideographs Extension B      AJ:13706
+% 真 U+2F947  CJK Compatibility Ideographs Supplement AJ:13854
+% 冬 U+2F81A  CJK Compatibility Ideographs Supplement AJ:13954
+% 𠘨 U+20628  CJK Unified Ideographs Extension B      AJ:14105 J4-2332
+% 𫝷 U+2B777  CJK Unified Ideographs Extension D      AJ:13782
+% 🄑 U+1F111  Parenthesized Latin Capital Letter B
+% 🄓 U+1F113  Parenthesized Latin Capital Letter B
+% 🄰 U+1F130  Squared Latin Capital Letter A

Modified: trunk/Build/source/texk/web2c/uptexdir/tests/unibib.tex
===================================================================
--- trunk/Build/source/texk/web2c/uptexdir/tests/unibib.tex	2024-09-28 14:21:24 UTC (rev 72393)
+++ trunk/Build/source/texk/web2c/uptexdir/tests/unibib.tex	2024-09-28 14:21:29 UTC (rev 72394)
@@ -82,6 +82,15 @@
   publisher = "Snowman commedian Press",
   year      = 2020,
 }
+
+%% CJK統合漢字拡張B-Dのテスト
+ at book{imahashi,
+  author    = "𫝆𫞎 姬 and 𡈽 𪧦 and 𠮷田 真",
+  yomi      = "Hime Imahashi",
+  title     = "CJK統合漢字拡張🄑--🄓のテスト --- その🄰",
+  publisher = "冬𠘨𫝷",
+  year      = 2024,
+}
 \end{filecontents}
 
 \documentclass{ltjsarticle}

Modified: trunk/Build/source/texk/web2c/uptexdir/upbibtex.ch
===================================================================
--- trunk/Build/source/texk/web2c/uptexdir/upbibtex.ch	2024-09-28 14:21:24 UTC (rev 72393)
+++ trunk/Build/source/texk/web2c/uptexdir/upbibtex.ch	2024-09-28 14:21:29 UTC (rev 72394)
@@ -968,7 +968,7 @@
 label exit;
 var k:integer;
 begin
-  { based on upTeX-1.30 kcatcode status: 16,17,19->true / 15,18->false }
+  { based on upTeX-1.35 kcatcode status: 16,17,19->true / 15,18->false }
   is_char_kanji_upbibtex := true;
   if (is_internalUPTEX) then begin { should be in sync with |kcat_code| of uptex-m.ch }
     k := kcatcodekey(c);
@@ -985,8 +985,8 @@
     else if k=@"93 then return { Hangul Syllables }
     else if k=@"94 then return { Hangul Jamo Extended-B }
     else if k=@"99 then return { CJK Compatibility Ideographs }
-    else if (k>=@"10D)and(k<=@"110) then return { Kana Extended-B .. Small Kana Extension }
-    else if (k>=@"13B)and(k<=@"143) then return { CJK Unified Ideographs Extension B .. H }
+    else if (k>=@"115)and(k<=@"118) then return { Kana Extended-B .. Small Kana Extension }
+    else if (k>=@"145)and(k<=@"14F) then return { CJK Unified Ideographs Extension B .. J }
     else if k=@"1FE then return { Fullwidth digit and latin alphabet }
     else if k=@"1FF then return; { Halfwidth katakana }
     end

Modified: trunk/Build/source/texk/web2c/uptexdir/uptex-m.ch
===================================================================
--- trunk/Build/source/texk/web2c/uptexdir/uptex-m.ch	2024-09-28 14:21:24 UTC (rev 72393)
+++ trunk/Build/source/texk/web2c/uptexdir/uptex-m.ch	2024-09-28 14:21:29 UTC (rev 72394)
@@ -283,12 +283,12 @@
   @t\hskip10pt@>kcat_code(@"99):=kanji; { CJK Compatibility Ideographs }
   @t\hskip10pt@>kcat_code(@"9C):=modifier; { Variation Selectors }
   { \hskip10pt|kcat_code(@"A2):=other_kchar;| Halfwidth and Fullwidth Forms }
-  @+ at t\1@>for k:=@"10D to @"110 do kcat_code(k):=kana; { Kana Extended-B .. Small Kana Extension }
-  @+ at t\1@>for k:=@"13B to @"143 do kcat_code(k):=kanji; { CJK Unified Ideographs Extension B .. H }
-  @t\hskip10pt@>kcat_code(@"150):=modifier; { Variation Selectors Supplement }
-  @+ at t\1@>for k:=@"165 to @"166 do kcat_code(k):=kana; { Kana with (Semi-)Voiced Sound Mark }
-  @t\hskip10pt@>kcat_code(@"16A):=kanji; { Standardized Variation Sequence }
-  @+ at t\1@>for k:=@"16C to @"16D do kcat_code(k):=kanji; { Ideographic Variation Sequence }
+  @+ at t\1@>for k:=@"115 to @"118 do kcat_code(k):=kana; { Kana Extended-B .. Small Kana Extension }
+  @+ at t\1@>for k:=@"145 to @"14F do kcat_code(k):=kanji; { CJK Unified Ideographs Extension B .. J }
+  @t\hskip10pt@>kcat_code(@"15B):=modifier; { Variation Selectors Supplement }
+  @+ at t\1@>for k:=@"170 to @"171 do kcat_code(k):=kana; { Kana with (Semi-)Voiced Sound Mark }
+  @t\hskip10pt@>kcat_code(@"175):=kanji; { Standardized Variation Sequence }
+  @+ at t\1@>for k:=@"177 to @"178 do kcat_code(k):=kanji; { Ideographic Variation Sequence }
   @+ at t\1@>for k:=@"1F9 to @"1FC do kcat_code(k):=modifier;
     { Combining Katakana-Hiragana (Semi-)Voiced Sound Mark .. Emoji Modifier Fitzpatrick }
   @t\hskip10pt@>kcat_code(@"1FD):=not_cjk; { Latin-1 Letters }



More information about the tex-live-commits mailing list.