texlive[71091] Build/source/texk/ptexenc: ptexenc: Support upTeX new

commits+takuji at tug.org commits+takuji at tug.org
Sat Apr 27 12:52:02 CEST 2024


Revision: 71091
          https://tug.org/svn/texlive?view=revision&revision=71091
Author:   takuji
Date:     2024-04-27 12:52:02 +0200 (Sat, 27 Apr 2024)
Log Message:
-----------
ptexenc: Support upTeX new encoding for combining characters

Modified Paths:
--------------
    trunk/Build/source/texk/ptexenc/ChangeLog
    trunk/Build/source/texk/ptexenc/c-auto.in
    trunk/Build/source/texk/ptexenc/configure
    trunk/Build/source/texk/ptexenc/ptexenc/unicode.h
    trunk/Build/source/texk/ptexenc/unicode.c
    trunk/Build/source/texk/ptexenc/version.ac

Modified: trunk/Build/source/texk/ptexenc/ChangeLog
===================================================================
--- trunk/Build/source/texk/ptexenc/ChangeLog	2024-04-26 23:41:52 UTC (rev 71090)
+++ trunk/Build/source/texk/ptexenc/ChangeLog	2024-04-27 10:52:02 UTC (rev 71091)
@@ -1,3 +1,11 @@
+2024-04-27  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
+
+	* unicode.c, ptexenc/unicode.h:
+	Add new functions UVS_* to support new encoding of upTeX
+	to {,de}compose characters with multiple codepoints.
+	* version.ac: Bump to 1.5.0/dev.
+	https://github.com/texjporg/tex-jp-build/issues/46
+
 2024-03-10  Karl Berry  <karl at tug.org>
 
 	* TL'24 release.

Modified: trunk/Build/source/texk/ptexenc/c-auto.in
===================================================================
--- trunk/Build/source/texk/ptexenc/c-auto.in	2024-04-26 23:41:52 UTC (rev 71090)
+++ trunk/Build/source/texk/ptexenc/c-auto.in	2024-04-27 10:52:02 UTC (rev 71091)
@@ -6,7 +6,7 @@
 #define PTEXENC_C_AUTO_H
 
 /* ptexenc: the version string. */
-#define PTEXENCVERSION "ptexenc version 1.4.6/dev"
+#define PTEXENCVERSION "ptexenc version 1.5.0/dev"
 
 /* Define to 1 if the 'closedir' function returns void instead of int. */
 #undef CLOSEDIR_VOID

Modified: trunk/Build/source/texk/ptexenc/configure
===================================================================
--- trunk/Build/source/texk/ptexenc/configure	2024-04-26 23:41:52 UTC (rev 71090)
+++ trunk/Build/source/texk/ptexenc/configure	2024-04-27 10:52:02 UTC (rev 71091)
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.72 for ptexenc 1.4.6/dev.
+# Generated by GNU Autoconf 2.72 for ptexenc 1.5.0/dev.
 #
 # Report bugs to <tex-k at tug.org>.
 #
@@ -614,8 +614,8 @@
 # Identity of this package.
 PACKAGE_NAME='ptexenc'
 PACKAGE_TARNAME='ptexenc'
-PACKAGE_VERSION='1.4.6/dev'
-PACKAGE_STRING='ptexenc 1.4.6/dev'
+PACKAGE_VERSION='1.5.0/dev'
+PACKAGE_STRING='ptexenc 1.5.0/dev'
 PACKAGE_BUGREPORT='tex-k at tug.org'
 PACKAGE_URL=''
 
@@ -1363,7 +1363,7 @@
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-'configure' configures ptexenc 1.4.6/dev to adapt to many kinds of systems.
+'configure' configures ptexenc 1.5.0/dev to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1434,7 +1434,7 @@
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of ptexenc 1.4.6/dev:";;
+     short | recursive ) echo "Configuration of ptexenc 1.5.0/dev:";;
    esac
   cat <<\_ACEOF
 
@@ -1555,7 +1555,7 @@
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-ptexenc configure 1.4.6/dev
+ptexenc configure 1.5.0/dev
 generated by GNU Autoconf 2.72
 
 Copyright (C) 2023 Free Software Foundation, Inc.
@@ -2097,7 +2097,7 @@
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by ptexenc $as_me 1.4.6/dev, which was
+It was created by ptexenc $as_me 1.5.0/dev, which was
 generated by GNU Autoconf 2.72.  Invocation command line was
 
   $ $0$ac_configure_args_raw
@@ -2875,10 +2875,10 @@
 
 
 
-PTEXENCVERSION=1.4.6/dev
+PTEXENCVERSION=1.5.0/dev
 
 
-PTEXENC_LT_VERSINFO=5:6:4
+PTEXENC_LT_VERSINFO=6:0:5
 
 
 am__api_version='1.16'
@@ -8664,7 +8664,7 @@
 
 # Define the identity of the package.
  PACKAGE='ptexenc'
- VERSION='1.4.6/dev'
+ VERSION='1.5.0/dev'
 
 
 printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
@@ -15005,7 +15005,7 @@
 Report bugs to <bug-libtool at gnu.org>."
 
 lt_cl_version="\
-ptexenc config.lt 1.4.6/dev
+ptexenc config.lt 1.5.0/dev
 configured by $0, generated by GNU Autoconf 2.72.
 
 Copyright (C) 2011 Free Software Foundation, Inc.
@@ -16777,7 +16777,7 @@
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by ptexenc $as_me 1.4.6/dev, which was
+This file was extended by ptexenc $as_me 1.5.0/dev, which was
 generated by GNU Autoconf 2.72.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -16845,7 +16845,7 @@
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-ptexenc config.status 1.4.6/dev
+ptexenc config.status 1.5.0/dev
 configured by $0, generated by GNU Autoconf 2.72,
   with options \\"\$ac_cs_config\\"
 

Modified: trunk/Build/source/texk/ptexenc/ptexenc/unicode.h
===================================================================
--- trunk/Build/source/texk/ptexenc/ptexenc/unicode.h	2024-04-26 23:41:52 UTC (rev 71090)
+++ trunk/Build/source/texk/ptexenc/ptexenc/unicode.h	2024-04-27 10:52:02 UTC (rev 71091)
@@ -21,6 +21,11 @@
 extern long UCStoUPTEX(long ucs);
 extern long UPTEXtoUCS(long uptex);
 
+extern int  UVS_get_codepoint_length(long ucv);
+extern long UVS_get_codepoint_in_sequence(long ucv, int n);
+extern long UVS_combine_code(long ucv, long uvs);
+extern long UVS_divide_code(long code, long* uvs);
+
 #define LONG(a,b,c,d) ( ((long)(a)<<24) | ((long)(b)<<16) | ((c)<<8) | (d) )
 #define BYTE1(x) (((x)>>24) & 0xff)
 #define BYTE2(x) (((x)>>16) & 0xff)

Modified: trunk/Build/source/texk/ptexenc/unicode.c
===================================================================
--- trunk/Build/source/texk/ptexenc/unicode.c	2024-04-26 23:41:52 UTC (rev 71090)
+++ trunk/Build/source/texk/ptexenc/unicode.c	2024-04-27 10:52:02 UTC (rev 71091)
@@ -129,6 +129,214 @@
 /* using over U+10.FFFF Area */
 long UPTEXtoUCS (long uptex)
 {
-    if (uptex>=UCS_MAX) return uptex % UCS_MAX; /* for OTF package */
+    long vs[2];
+    if ((uptex>=0x220000 && uptex<=0x2FFFFF) || /* for 2-codepoint sequence */
+         uptex>=0x400000) {               /* for SVS, IVS */
+        if (UVS_divide_code(uptex, vs) == 0) return 0;
+        return uptex;
+    }
+    if (uptex>=UCS_MAX*2) return uptex;
+    if (uptex>=UCS_MAX)   return uptex % UCS_MAX; /* for OTF package */
     return uptex;
 }
+
+int
+UVS_get_codepoint_length(long ucv)
+{
+    int len = 0;
+
+    if (ucv <  0x220000) {
+        len = 1;
+    }
+    else if (ucv >= 0x25E6E6 && ucv <= 0x25FFFF) {    /* RGI Emoji Flag Sequence */
+        if ((ucv & 0xFF) < 0xE6) return 0;  /* illegal value */
+        len = 2;
+    }
+    else if (ucv >= 0x800000 && ucv <= 0x80007F) {    /* Emoji Keycap Sequence */
+        ucv &= 0x7F;
+        if ( ucv==0x23 || ucv==0x2A ||
+            (ucv>=0x30 && ucv<=0x39) ) len = 3;
+        else return 0;  /* illegal value */
+    }
+    else if (ucv <  0x300000) {
+        len = 2;
+    }
+    else if (ucv >= 0x400000 && ucv <= 0x43FFFFF) {
+        len = 2;
+    }
+
+    return len;
+}
+
+long
+UVS_get_codepoint_in_sequence(long ucv, int n)
+{
+    long uvs, cp[3], len;
+
+    cp[1] = cp[2] = 0;
+
+    if (ucv <  0x220000) {
+        cp[0] =   ucv % 0x110000;
+        len = 1;
+    }
+    else if (ucv >= 0x25E6E6 && ucv <= 0x25FFFF) {    /* RGI Emoji Flag Sequence */
+        if ((ucv & 0xFF) < 0xE6) return 0;  /* illegal value */
+        cp[0] = ((ucv >> 8) & 0xFF) + 0x1F100;
+        cp[1] =  (ucv       & 0xFF) + 0x1F100;
+        len = 2;
+    }
+    else if (ucv >= 0x800000 && ucv <= 0x80007F) {    /* Emoji Keycap Sequence */
+        cp[0] =   ucv & 0x7F;
+        cp[1] =   0xFE0F;
+        cp[2] =   0x20E3;
+        len = 3;
+    }
+    else if (ucv <  0x300000) {
+        len = 2;
+        uvs = ucv >> 16;
+        if (uvs >= 0x22 && uvs <= 0x25) {    /* (Semi-)Voiced Sound Mark */
+            cp[0] =   ucv & 0x1FFFF;
+            cp[1] = ((uvs - 0x22) >> 1) + 0x3099;
+        }
+        if (uvs >= 0x26 && uvs <= 0x2F) {    /* Emoji Modifier Fitzpatrick */
+            cp[0] =   ucv & 0x1FFFF;
+            cp[1] = ((uvs - 0x26) >> 1) + 0x1F3FB;
+        }
+    }
+    else if (ucv >= 0x400000) {
+        len = 2;
+        uvs = ucv >> 16;
+        if (uvs >= 0x40 && uvs <= 0x7F) {    /* SVS, VS1 .. VS16 */
+            cp[0] =   ucv & 0x3FFFF;
+            cp[1] = ((uvs - 0x40) >> 2) + 0xFE00;
+        }
+        if (uvs >= 0x80 && uvs <= 0x43F) {   /* IVS, VS17 .. VS256 */
+            cp[0] =   ucv & 0x3FFFF;
+            cp[1] = ((uvs - 0x80) >> 2) + 0xE0100;
+        }
+    }
+
+    if (n<0)
+        return len;
+    if (n>=1 && n<=3) {
+        return cp[n-1];
+    }
+
+    /* Unsupported */
+    return 0;
+}
+
+long
+UVS_combine_code(long ucv, long uvs)
+{
+    if (  uvs == 0x20E3 &&
+        ( ucv == 0x7C0023 || ucv == 0x7C002A ||    /* U+00xx U+FE0F U+20E3  */
+         (ucv >= 0x7C0030 && ucv <= 0x7C0039))) {  /* Emoji Keycap Sequence */
+        return 0x40000 + ucv;
+    }
+
+    if (ucv > 0x3FFFF)
+        return 0;
+
+    if (((ucv >= 0x03000 && ucv <= 0x031FF) ||
+         (ucv >= 0x1AFF0 && ucv <= 0x1B16F)) &&
+          uvs >=  0x3099 && uvs <= 0x309A) {     /* Kana (Semi-)Voiced Sound Mark */
+        return ((uvs - 0x3099) << 17) + 0x220000 + ucv;
+    }
+    if (((ucv >= 0x02600 && ucv <= 0x027BF) ||
+         (ucv >= 0x1F300 && ucv <= 0x1F9FF)) &&
+          uvs >= 0x1F3FB && uvs <= 0x1F3FF) {    /* Emoji Modifier Fitzpatrick */
+        return ((uvs - 0x1F3FB) << 17) + 0x260000 + ucv;
+    }
+    if ( (ucv >=0x1F1E6 && ucv <= 0x1F1FF) &&
+         (uvs >=0x1F1E6 && uvs <= 0x1F1FF) ) {   /* RGI Emoji Flag Sequence */
+        return ((ucv & 0xFF) << 8) + (uvs & 0xFF) + 0x250000;
+    }
+    if (ucv <= 0x2FFFF && uvs >= 0xFE00 && uvs <= 0xFE0F) { /* SVS, VS1 .. VS16 */
+        return ((uvs - 0xFE00) << 18) + 0x400000 + ucv;
+    }
+    if (uvs >= 0xE0100) {  /* IVS */
+        if (                   ucv <= 0x033FF  ||
+            (ucv >= 0x04DC0 && ucv <= 0x04DFF) ||
+            (ucv >= 0x0A000 && ucv <= 0x0F8FF) ||
+            (ucv >= 0x0FB00 && ucv <= 0x1FFFF) ) return 0;
+        if (ucv <= 0x0FFFF && uvs <=0xE01EF) { /* BMP, VS17 .. VS256 */
+            return ((uvs - 0xE0100) << 18) + 0x800000 + ucv;
+        }
+        if (ucv <= 0x2FFFF && uvs <=0xE010F) { /* SIP, VS17 .. VS32 */
+            return ((uvs - 0xE0100) << 18) + 0x800000 + ucv;
+        }
+        if (                  uvs <=0xE010F) { /* TIP, VS17 .. VS32 */
+            return ((uvs - 0xE0100) << 18) + 0x800000 + ucv;
+        }
+    }
+    /* Unsupported Combination */
+    return 0;
+}
+
+long
+UVS_divide_code(long code, long* uvs)
+{
+    long u, v, p;
+
+    u = code & 0x1FFFF;  /* upto U+1FFFF */
+    v = code >> 16;
+    /* for Combining Katakana-Hiragana (Semi-)Voiced Sound Mark */
+    if (((u>=0x03000 && u<=0x031FF) ||
+         (u>=0x1AFF0 && u<=0x1B16F)) &&
+          v>=0x22 && v<=0x25) {
+        p = (v - 0x22) >> 1;  /* Voiced or Semi-Voiced */
+        if (uvs) *uvs = 0x3099 + p;
+        return u;
+    }
+    /* for Emoji Modifier Fitzpatrick */
+    if (((u>=0x02600 && u<=0x027BF) ||
+         (u>=0x1F300 && u<=0x1F9FF)) &&
+          v>=0x26 && v<=0x2F) {
+        p = (v - 0x26) >> 1;  /* Emoji Modifier Fitzpatrick Type-1..6 */
+        if (uvs) *uvs = 0x1F3FB + p;
+        return u;
+    }
+    /* for RGI Emoji Flag Sequence */
+    if (  u>=0x1E6E6 && v==0x25 ) {
+        if ((u & 0xFF) < 0xE6) goto Undefined;  /* illegal value */
+        u = ((u >> 8) & 0xFF) + 0x1F100;
+        v = ( u       & 0xFF) + 0x1F100; /* Regional Indicator Symbol Letter */
+        if (!uvs) goto Undefined;
+        *uvs = v;
+        return u;
+    }
+
+    if (code<0x400000 || code>=0x4400000) {
+      /* Undefined */
+        goto Undefined;
+    }
+
+    /* for Variation Selector */
+    u = code & 0x3FFFF;  /* upto U+3FFFF */
+    v = code >> 18;
+    p = u >> 16;
+    if (v < 0x20) {  /* SVS     VS1 .. VS16 */
+        if (p==3) goto Undefined;
+        if (uvs) *uvs = v - 0x10 + 0xFE00;
+        return u;
+    } else
+    if (v == 0x20 && u <= 0x7F ) { /* for Emoji Keycap Sequence, need space for U+20E3 */
+        if (uvs) { *uvs = 0xFE0F; *(uvs+1) = 0x20E3; }
+        return u;
+    } else           /* IVS */
+    if (v < 0x40) {          /* VS17 .. VS32 */
+        if (p==1) goto Undefined;
+        if (uvs) *uvs = v - 0x20 + 0xE0100;
+        return u;
+    } else if (v <= 0x1FF) { /* VS33 .. VS256 */
+        if (p>0) goto Undefined;
+        if (uvs) *uvs = v - 0x20 + 0xE0100;
+        return u;
+    }
+
+ Undefined:
+    /* Undefined */
+    if (uvs) *uvs = 0;
+    return 0;
+}

Modified: trunk/Build/source/texk/ptexenc/version.ac
===================================================================
--- trunk/Build/source/texk/ptexenc/version.ac	2024-04-26 23:41:52 UTC (rev 71090)
+++ trunk/Build/source/texk/ptexenc/version.ac	2024-04-27 10:52:02 UTC (rev 71091)
@@ -11,4 +11,4 @@
 dnl see kpathsea/version.ac.
 dnl
 dnl This file is m4-included from configure.ac.
-m4_define([ptexenc_version], [1.4.6/dev])
+m4_define([ptexenc_version], [1.5.0/dev])



More information about the tex-live-commits mailing list.