texlive[71288] Build/source/texk/web2c: xetex: Accept over-BMP when

commits+takuji at tug.org commits+takuji at tug.org
Sat May 18 13:06:21 CEST 2024


Revision: 71288
          https://tug.org/svn/texlive?view=revision&revision=71288
Author:   takuji
Date:     2024-05-18 13:06:21 +0200 (Sat, 18 May 2024)
Log Message:
-----------
xetex: Accept over-BMP when getting charcode from control symbols by "`"

Modified Paths:
--------------
    trunk/Build/source/texk/web2c/Makefile.in
    trunk/Build/source/texk/web2c/omegafonts/Makefile.in
    trunk/Build/source/texk/web2c/xetexdir/ChangeLog
    trunk/Build/source/texk/web2c/xetexdir/am/xetex.am
    trunk/Build/source/texk/web2c/xetexdir/xetex.ch
    trunk/Build/source/texk/web2c/xetexdir/xetex.web

Added Paths:
-----------
    trunk/Build/source/texk/web2c/xetexdir/tests/ctrlsym.log
    trunk/Build/source/texk/web2c/xetexdir/tests/ctrlsym.tex
    trunk/Build/source/texk/web2c/xetexdir/xetex-ctrlsym.test

Modified: trunk/Build/source/texk/web2c/Makefile.in
===================================================================
--- trunk/Build/source/texk/web2c/Makefile.in	2024-05-18 11:06:15 UTC (rev 71287)
+++ trunk/Build/source/texk/web2c/Makefile.in	2024-05-18 11:06:21 UTC (rev 71288)
@@ -3445,7 +3445,8 @@
 	xetexdir/unicode-char-prep.pl xetexdir/xewebmac.tex \
 	$(xetex_tests) xetexdir/tests/bug73.log \
 	xetexdir/tests/bug73.tex xetexdir/tests/filedump.log \
-	xetexdir/tests/filedump.tex omegaware/README \
+	xetexdir/tests/filedump.tex xetexdir/tests/ctrlsym.log \
+	xetexdir/tests/ctrlsym.tex omegaware/README \
 	omegaware/ChangeLog $(odvicopy_sources) $(odvitype_sources) \
 	omegaware/ofm2opl.web omegaware/ofm2opl.up \
 	omegaware/ofm2opl.ch omegaware/opl2ofm.web \
@@ -3579,26 +3580,28 @@
 	$(nodist_libluatex_sources) luaimage.* luajitimage.* \
 	$(nodist_xetex_SOURCES) xetex.web xetex-final.ch xetex-web2c \
 	xetex.p xetex.pool xetex-tangle bug73.fmt bug73.log bug73.out \
-	bug73.tex filedump.log filedump.out filedump.tex xetests/fn*.* \
-	$(omegaware_programs:=.c) $(omegaware_programs:=.h) \
-	$(omegaware_programs:=.p) $(omegaware_programs:=-web2c) \
-	ofm2opl.web opl2ofm.web ovf2ovp.web ovp2ovf.web \
-	omegaware/bad*.* omegaware/tests/charwdr.* \
-	omegaware/tests/charwdv.* omegaware/tests/xcheck* \
-	omegaware/tests/xlevel1.* omegaware/tests/xlig*.* ofont*vf \
-	omegaware/tests/xpagenum.* omegaware/tests/xofont* \
-	omegaware/tests/Cherokee.tfm omegaware/tests/OCherokee.ofm \
-	omegaware/tests/OCherokee.opl omegaware/tests/OCherokee.ovf \
-	omegaware/tests/xCherokee.* omegaware/tests/xOCherokee.* \
-	ocftest.* omegaware/tests/xinbmp* omegaware/tests/xoverbmp* \
-	omegaware/tests/xrealnum.* omegaware/tests/xrepeated.* \
-	omegaware/tests/sample*.ofm omegaware/tests/sample*.ovf \
-	omegaware/tests/sample1-h.opl omegaware/tests/xsample*.out \
-	omegaware/tests/shortend.* omegaware/tests/specialhex.ofm \
-	omegaware/tests/specialhex.opl omegaware/tests/specialhex.ovf \
-	omegaware/tests/xspecialhex.* omegaware/tests/yrepeat* \
-	omegaware/tests/*yarabic* $(nodist_aleph_SOURCES) aleph.web \
-	aleph.ch aleph-web2c aleph.p aleph.pool aleph-tangle
+	bug73.tex filedump.log filedump.out filedump.tex \
+	xe-ctrlsym.fmt xe-ctrlsym.log xe-ctrlsym.out xe-ctrlsym.tex \
+	xetests/fn*.* $(omegaware_programs:=.c) \
+	$(omegaware_programs:=.h) $(omegaware_programs:=.p) \
+	$(omegaware_programs:=-web2c) ofm2opl.web opl2ofm.web \
+	ovf2ovp.web ovp2ovf.web omegaware/bad*.* \
+	omegaware/tests/charwdr.* omegaware/tests/charwdv.* \
+	omegaware/tests/xcheck* omegaware/tests/xlevel1.* \
+	omegaware/tests/xlig*.* ofont*vf omegaware/tests/xpagenum.* \
+	omegaware/tests/xofont* omegaware/tests/Cherokee.tfm \
+	omegaware/tests/OCherokee.ofm omegaware/tests/OCherokee.opl \
+	omegaware/tests/OCherokee.ovf omegaware/tests/xCherokee.* \
+	omegaware/tests/xOCherokee.* ocftest.* omegaware/tests/xinbmp* \
+	omegaware/tests/xoverbmp* omegaware/tests/xrealnum.* \
+	omegaware/tests/xrepeated.* omegaware/tests/sample*.ofm \
+	omegaware/tests/sample*.ovf omegaware/tests/sample1-h.opl \
+	omegaware/tests/xsample*.out omegaware/tests/shortend.* \
+	omegaware/tests/specialhex.ofm omegaware/tests/specialhex.opl \
+	omegaware/tests/specialhex.ovf omegaware/tests/xspecialhex.* \
+	omegaware/tests/yrepeat* omegaware/tests/*yarabic* \
+	$(nodist_aleph_SOURCES) aleph.web aleph.ch aleph-web2c aleph.p \
+	aleph.pool aleph-tangle
 CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LIBRARIES) $(EXTRA_LTLIBRARIES)
 TRIPTRAP_CLEAN = $(am__append_8) $(am__append_18) $(am__append_27) \
 	$(am__append_36) $(am__append_44) $(am__append_60) \
@@ -5370,6 +5373,7 @@
 xetex_tests = \
 	xetexdir/xetex-filedump.test \
 	xetexdir/xetex-bug73.test \
+	xetexdir/xetex-ctrlsym.test \
 	xetexdir/xetex.test \
 	xetexdir/wcfname.test
 
@@ -22202,7 +22206,8 @@
 xetex-final.ch: tie$(EXEEXT) $(xetex_ch_srcs)
 	$(tie_c) $(xetex_ch_srcs)
 $(libxetex_a_OBJECTS): $(libxetex_prereq)
-xetexdir/xetex-filedump.log xetexdir/xetex-bug73.log xetexdir/xetex.log \
+xetexdir/xetex-filedump.log xetexdir/xetex-bug73.log \
+  xetexdir/xetex-ctrlsym.log xetexdir/xetex.log \
   xetexdir/wcfname.log: xetex$(EXEEXT)
 
 # (end of xetex.am)

Modified: trunk/Build/source/texk/web2c/omegafonts/Makefile.in
===================================================================
--- trunk/Build/source/texk/web2c/omegafonts/Makefile.in	2024-05-18 11:06:15 UTC (rev 71287)
+++ trunk/Build/source/texk/web2c/omegafonts/Makefile.in	2024-05-18 11:06:21 UTC (rev 71288)
@@ -108,6 +108,7 @@
 	$(top_srcdir)/../../m4/kpse-harfbuzz-flags.m4 \
 	$(top_srcdir)/../../m4/kpse-icu-flags.m4 \
 	$(top_srcdir)/../../m4/kpse-kpathsea-flags.m4 \
+	$(top_srcdir)/../../m4/kpse-largefile.m4 \
 	$(top_srcdir)/../../m4/kpse-lex.m4 \
 	$(top_srcdir)/../../m4/kpse-lib-version.m4 \
 	$(top_srcdir)/../../m4/kpse-libpng-flags.m4 \

Modified: trunk/Build/source/texk/web2c/xetexdir/ChangeLog
===================================================================
--- trunk/Build/source/texk/web2c/xetexdir/ChangeLog	2024-05-18 11:06:15 UTC (rev 71287)
+++ trunk/Build/source/texk/web2c/xetexdir/ChangeLog	2024-05-18 11:06:21 UTC (rev 71288)
@@ -1,3 +1,13 @@
+2024-05-18  Hironori Kitagawa  <h_kitagawa2001 at yahoo.co.jp>
+            TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
+
+	* xetex.web, xetex.ch:
+	Accept characters of over-BMP when getting character codes
+	from control symbols by using backquote "`".
+	* xetex-ctrlsym.test, tests/ctrlsym.{log,tex}: Add tests.
+	* am/xetex.am: Adjusted.
+	https://github.com/texjporg/tex-jp-build/issues/167
+
 2024-03-10  Karl Berry  <karl at tug.org>
 
 	* TL'24 release.

Modified: trunk/Build/source/texk/web2c/xetexdir/am/xetex.am
===================================================================
--- trunk/Build/source/texk/web2c/xetexdir/am/xetex.am	2024-05-18 11:06:15 UTC (rev 71287)
+++ trunk/Build/source/texk/web2c/xetexdir/am/xetex.am	2024-05-18 11:06:21 UTC (rev 71288)
@@ -206,9 +206,11 @@
 xetex_tests = \
 	xetexdir/xetex-filedump.test \
 	xetexdir/xetex-bug73.test \
+	xetexdir/xetex-ctrlsym.test \
 	xetexdir/xetex.test \
 	xetexdir/wcfname.test
-xetexdir/xetex-filedump.log xetexdir/xetex-bug73.log xetexdir/xetex.log \
+xetexdir/xetex-filedump.log xetexdir/xetex-bug73.log \
+  xetexdir/xetex-ctrlsym.log xetexdir/xetex.log \
   xetexdir/wcfname.log: xetex$(EXEEXT)
 
 EXTRA_DIST += $(xetex_tests)
@@ -225,6 +227,10 @@
 EXTRA_DIST += xetexdir/tests/filedump.log xetexdir/tests/filedump.tex
 DISTCLEANFILES += filedump.log filedump.out filedump.tex
 
+## xetex-ctrlsym.test
+EXTRA_DIST += xetexdir/tests/ctrlsym.log xetexdir/tests/ctrlsym.tex
+DISTCLEANFILES += xe-ctrlsym.fmt xe-ctrlsym.log xe-ctrlsym.out xe-ctrlsym.tex
+
 ## wcfname.test
 DISTCLEANFILES += xetests/fn*.*
 

Added: trunk/Build/source/texk/web2c/xetexdir/tests/ctrlsym.log
===================================================================
--- trunk/Build/source/texk/web2c/xetexdir/tests/ctrlsym.log	                        (rev 0)
+++ trunk/Build/source/texk/web2c/xetexdir/tests/ctrlsym.log	2024-05-18 11:06:21 UTC (rev 71288)
@@ -0,0 +1,20 @@
+entering extended mode
+ restricted \write18 enabled.
+ %&-line parsing enabled.
+**xe-ctrlsym
+(./xe-ctrlsym.tex 
+A: 38634
+
+\雪->snow
+B: 38634. 雪\雪 snow
+macro:->snow.
+macro:->snow.
+
+C: 127820
+
+\🍌 ->banana
+D: 127820. 🍌\🍌 banana
+macro:->banana.
+macro:->banana.
+ )
+No pages of output.

Added: trunk/Build/source/texk/web2c/xetexdir/tests/ctrlsym.tex
===================================================================
--- trunk/Build/source/texk/web2c/xetexdir/tests/ctrlsym.tex	                        (rev 0)
+++ trunk/Build/source/texk/web2c/xetexdir/tests/ctrlsym.tex	2024-05-18 11:06:21 UTC (rev 71288)
@@ -0,0 +1,34 @@
+% Copyright 2024 Japanese TeX Development Community <issue at texjp.org>
+% You may freely use, modify and/or distribute this file.
+%
+\ifx\fmtname\undefined
+  \input basic
+  \def\fmtname{basic}
+  \expandafter\dump
+\fi
+%==================
+\begingroup % Just there to minimize tracing output
+  \tracingmacros=2
+  \tracingonline=1
+%--
+  \newlinechar=10
+  \count255=0
+  \count255=`雪 \relax
+  \message{^^JA: \the\count255^^J}
+  \def\雪{snow}
+  \count255=`\雪 \relax
+  \message{B: \the\count255. 雪\string\雪 \雪^^J}
+  \message{\expandafter\meaning\csname 雪\endcsname.^^J}
+  \message{\expandafter\meaning\csname \Uchar"96EA\endcsname.^^J}%
+  \count4095=0
+  \count4095=`🍌 \relax
+  \message{^^JC: \the\count4095^^J}
+  \def\🍌{banana}
+  \count4095=`\🍌 \relax
+  \message{D: \the\count4095. 🍌\string\🍌 \🍌^^J}
+  \message{\expandafter\meaning\csname 🍌\endcsname.^^J}
+  \message{\expandafter\meaning\csname \Uchar"1F34C\endcsname.^^J}%
+%--
+  \setbox0=\vbox{} % no dvi/xdv/pdf output
+\endgroup
+\end

Added: trunk/Build/source/texk/web2c/xetexdir/xetex-ctrlsym.test
===================================================================
--- trunk/Build/source/texk/web2c/xetexdir/xetex-ctrlsym.test	                        (rev 0)
+++ trunk/Build/source/texk/web2c/xetexdir/xetex-ctrlsym.test	2024-05-18 11:06:21 UTC (rev 71288)
@@ -0,0 +1,28 @@
+#! /bin/sh -vx
+# Copyright 2024 Japanese TeX Development Community <issue at texjp.org>
+# You may freely use, modify and/or distribute this file.
+
+BinDir=${BinDir:-.}
+ExeExt=${ExeExt:-}
+_xetex=$BinDir/xetex$ExeExt
+
+LC_ALL=C; export LC_ALL;  LANGUAGE=C; export LANGUAGE
+
+TEXMFCNF=$srcdir/../kpathsea;export TEXMFCNF
+TEXINPUTS=".;$srcdir/tests"; export TEXINPUTS
+TEXFORMATS=.; export TEXFORMATS
+
+# get same filename in log
+rm -f xe-ctrlsym.tex
+$LN_S $srcdir/xetexdir/tests/ctrlsym.tex ./xe-ctrlsym.tex
+
+#exit 77
+
+$_xetex -ini -etex xe-ctrlsym || exit 1
+
+$_xetex -etex -fmt=xe-ctrlsym xe-ctrlsym || exit 2
+
+sed 1d xe-ctrlsym.log >xe-ctrlsym.out
+
+diff $srcdir/xetexdir/tests/ctrlsym.log xe-ctrlsym.out || exit 3
+


Property changes on: trunk/Build/source/texk/web2c/xetexdir/xetex-ctrlsym.test
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Modified: trunk/Build/source/texk/web2c/xetexdir/xetex.ch
===================================================================
--- trunk/Build/source/texk/web2c/xetexdir/xetex.ch	2024-05-18 11:06:15 UTC (rev 71287)
+++ trunk/Build/source/texk/web2c/xetexdir/xetex.ch	2024-05-18 11:06:21 UTC (rev 71288)
@@ -111,6 +111,14 @@
 @!bound_default:integer; {temporary for setup}
 @z
 
+ at x [5.??] type of trick_buf[]
+@!trick_buf:array[0..ssup_error_line] of ASCII_code; {circular buffer for
+  pseudoprinting}
+ at y
+@!trick_buf:array[0..ssup_error_line] of UnicodeScalar; {circular buffer for
+  pseudoprinting}
+ at z
+
 @x [5.61] l.1556 - Print rest of banner, eliminate misleading `(no format preloaded)'.
 if translate_filename then begin
   wterm(' (');

Modified: trunk/Build/source/texk/web2c/xetexdir/xetex.web
===================================================================
--- trunk/Build/source/texk/web2c/xetexdir/xetex.web	2024-05-18 11:06:15 UTC (rev 71287)
+++ trunk/Build/source/texk/web2c/xetexdir/xetex.web	2024-05-18 11:06:21 UTC (rev 71288)
@@ -1631,7 +1631,7 @@
 calling |print_ln| in the middle of such character.
 
 @<Basic printing...@>=
-procedure print_raw_char(@!s:ASCII_code;@!incr_offset:boolean); {prints a single character}
+procedure print_raw_char(@!s:UnicodeScalar;@!incr_offset:boolean); {prints a single character}
 label exit; {label is not used but nonetheless kept (for other changes?)}
 begin
 case selector of
@@ -5372,7 +5372,7 @@
 equivalents.
 
 In the first region we have |number_usvs| equivalents for ``active characters''
-that act as control sequences, followed by |number_usvs| equivalents for
+that act as control sequences, followed by |too_big_char| equivalents for
 single-character control sequences.
 
 Then comes region~2, which corresponds to the hash table that we will
@@ -5384,7 +5384,9 @@
 @d active_base=1 {beginning of region 1, for active character equivalents}
 @d single_base=active_base+number_usvs
    {equivalents of one-character control sequences}
- at d null_cs=single_base+number_usvs {equivalent of \.{\\csname\\endcsname}}
+   {single-character sequence whose character code is ${}>|@"FFFF|$ are
+    treated as a multiletter sequence, because UTF-16 is used in the string pool. }
+ at d null_cs=single_base+too_big_char {equivalent of \.{\\csname\\endcsname}}
 @d hash_base=null_cs+1 {beginning of region 2, for the hash table}
 @d frozen_control_sequence=hash_base+hash_size {for error recovery}
 @d frozen_protection=frozen_control_sequence {inaccessible but definable}
@@ -10436,7 +10438,19 @@
   end
 else if cur_tok<cs_token_flag+single_base then
   cur_val:=cur_tok-cs_token_flag-active_base
-else cur_val:=cur_tok-cs_token_flag-single_base;
+else if cur_tok<cs_token_flag+null_cs then
+  cur_val:=cur_tok-cs_token_flag-single_base
+else { check the cs is a single-letter whose character code is ${}>|@"FFFF|$ }
+  begin m:=text(cur_tok-cs_token_flag);
+  if str_start_macro(m+1)=2+str_start_macro(m) then begin
+    m:=str_start_macro(m);
+    if (so(str_pool[m])>=@"D800) and (so(str_pool[m])<=@"DBFF)
+      and (so(str_pool[m+1])>=@"DC00) and (so(str_pool[m+1])<=@"DFFF) then
+      cur_val:=@"10000 + (so(str_pool[m])-@"D800) * @"400 + so(str_pool[m+1])-@"DC00
+    else cur_val:=too_big_usv;
+    end
+  else cur_val:=too_big_usv;
+  end;
 if cur_val>biggest_usv then
   begin print_err("Improper alphabetic constant");
 @.Improper alphabetic constant@>



More information about the tex-live-commits mailing list.