texlive[70121] Build/source/texk/web2c/xetexdir: xetex: Accept file

commits+takuji at tug.org commits+takuji at tug.org
Sat Feb 24 04:11:58 CET 2024


Revision: 70121
          https://tug.org/svn/texlive?view=revision&revision=70121
Author:   takuji
Date:     2024-02-24 04:11:58 +0100 (Sat, 24 Feb 2024)
Log Message:
-----------
xetex: Accept file names with characters of over-BMP

Modified Paths:
--------------
    trunk/Build/source/texk/web2c/xetexdir/ChangeLog
    trunk/Build/source/texk/web2c/xetexdir/wcfname.test
    trunk/Build/source/texk/web2c/xetexdir/xetex.ch
    trunk/Build/source/texk/web2c/xetexdir/xetex.web

Modified: trunk/Build/source/texk/web2c/xetexdir/ChangeLog
===================================================================
--- trunk/Build/source/texk/web2c/xetexdir/ChangeLog	2024-02-24 02:24:04 UTC (rev 70120)
+++ trunk/Build/source/texk/web2c/xetexdir/ChangeLog	2024-02-24 03:11:58 UTC (rev 70121)
@@ -1,3 +1,13 @@
+2024-02-24  TANAKA Takuji  <ttk at t-lab.opal.ne.jp>
+
+	* xetex.web, xetex.ch:
+	Accept file names with characters of over-BMP.
+	* wcfname.test:
+	Add tests for file names with characters of over-BMP.
+	https://tug.org/pipermail/tex-live/2023-November/049665.html
+	https://tug.org/pipermail/tex-live/2023-November/049685.html
+	https://github.com/texjporg/tex-jp-build/issues/164
+
 2024-02-04  Karl Berry  <karl at tug.org>
 
         * NEWS,

Modified: trunk/Build/source/texk/web2c/xetexdir/wcfname.test
===================================================================
--- trunk/Build/source/texk/web2c/xetexdir/wcfname.test	2024-02-24 02:24:04 UTC (rev 70120)
+++ trunk/Build/source/texk/web2c/xetexdir/wcfname.test	2024-02-24 03:11:58 UTC (rev 70121)
@@ -61,7 +61,7 @@
 rm -f $testdir/fn*-euc.tex $testdir/fn*-sjis.tex
 
 fenc="utf8"
-for doc in fn-$fenc fn£¥µÆÇñß-$fenc fnさざ波-$fenc; do
+for doc in fn-$fenc fn£¥µÆÇñß-$fenc fnさざ波-$fenc fnΔДदダ打다𝕯🎉-$fenc; do
 
   echo '>>> Document:'$doc '  File Encoding:'$fenc
   job=$doc-xe

Modified: trunk/Build/source/texk/web2c/xetexdir/xetex.ch
===================================================================
--- trunk/Build/source/texk/web2c/xetexdir/xetex.ch	2024-02-24 02:24:04 UTC (rev 70120)
+++ trunk/Build/source/texk/web2c/xetexdir/xetex.ch	2024-02-24 03:11:58 UTC (rev 70121)
@@ -327,7 +327,15 @@
       quote_char:="""" + "'" - quote_char;
       print(quote_char);
     end;
-    print(str_pool[j]);
+    if (so(str_pool[j])>=@"D800) and (so(str_pool[j])<=@"DBFF)
+      and (j+1<str_start_macro(#+1))
+      and (so(str_pool[j+1])>=@"DC00) and (so(str_pool[j+1])<=@"DFFF) then
+      begin print_char(@"10000 + (so(str_pool[j])-@"D800) * @"400
+                     + so(str_pool[j+1])-@"DC00);
+      incr(j);
+      end
+    else
+      print(str_pool[j]);
   end
 @z
 
@@ -371,10 +379,25 @@
       else if (c < @"800) then begin
         name_of_file[k]:=@"C0 + c div @"40; incr(k);
         name_of_file[k]:=@"80 + c mod @"40;
-      end else begin
+      end else if (c < @"D800) then begin
 		name_of_file[k]:=@"E0 + c div @"1000; incr(k);
 		name_of_file[k]:=@"80 + (c mod @"1000) div @"40; incr(k);
-		name_of_file[k]:=@"80 + (c mod @"1000) mod @"40;
+		name_of_file[k]:=@"80 + c mod @"40;
+      end else if (c < @"DC00) and (k<file_name_size) then begin
+		name_of_file[k]:=@"F0 + (c - @"D7C0) div @"1000; incr(k);
+		name_of_file[k]:=@"80 + ((c - @"D7C0) mod @"1000) div @"4; incr(k);
+		name_of_file[k]:=@"80 + (c - @"D7C0) mod @"4 * @"10;
+      end else if (c < @"E000) and (k>0) then begin
+		name_of_file[k-1]:=name_of_file[k-1] + (c - @"DC00) div @"40;
+		name_of_file[k]:=@"80 + (c - @"DC00) mod @"40;
+      end else if (c < @"10000) then begin
+		name_of_file[k]:=@"E0 + c div @"1000; incr(k);
+		name_of_file[k]:=@"80 + (c mod @"1000) div @"40; incr(k);
+		name_of_file[k]:=@"80 + c mod @"40;
+      end else begin { replacement character U+FFFD }
+		name_of_file[k]:=@"EF; incr(k);
+		name_of_file[k]:=@"BF; incr(k);
+		name_of_file[k]:=@"BD;
       end
     end
   end

Modified: trunk/Build/source/texk/web2c/xetexdir/xetex.web
===================================================================
--- trunk/Build/source/texk/web2c/xetexdir/xetex.web	2024-02-24 02:24:04 UTC (rev 70120)
+++ trunk/Build/source/texk/web2c/xetexdir/xetex.web	2024-02-24 03:11:58 UTC (rev 70121)
@@ -1287,7 +1287,13 @@
 apologetic error message if there isn't enough room.
 
 @d append_char(#) == {put |ASCII_code| \# at the end of |str_pool|}
-begin str_pool[pool_ptr]:=si(#); incr(pool_ptr);
+begin
+  if (si(#)>@"FFFF) then begin
+    str_pool[pool_ptr]:=si((# - @"10000) div @"400 + @"D800); incr(pool_ptr);
+    str_pool[pool_ptr]:=si((#          ) mod @"400 + @"DC00); incr(pool_ptr);
+  end else begin
+    str_pool[pool_ptr]:=si(#); incr(pool_ptr);
+  end;
 end
 @d flush_char == decr(pool_ptr) {forget the last character in the pool}
 @d str_room(#) == {make sure that the pool hasn't overflowed}
@@ -2105,7 +2111,7 @@
 @<Error hand...@>=
 procedure error; {completes the job of error reporting}
 label continue,exit;
-var c:ASCII_code; {what the user types}
+var c:UnicodeScalar; {what the user types}
 @!s1,@!s2,@!s3,@!s4:integer;
   {used to save global variables when deleting tokens}
 begin if history<error_message_issued then history:=error_message_issued;
@@ -12134,9 +12140,12 @@
 string, instead of assigning an absolute address like |pool_ptr| to them.
 @^system dependencies@>
 
- at p function more_name(@!c:ASCII_code):boolean;
+ at p function more_name(@!c:UnicodeScalar):boolean;
 begin if c=" " then more_name:=false
-else  begin str_room(1); append_char(c); {contribute |c| to the current string}
+else  begin
+  if (c>@"FFFF) then str_room(2)
+  else str_room(1);
+  append_char(c); {contribute |c| to the current string}
   if (c=">")or(c=":") then
     begin area_delimiter:=cur_length; ext_delimiter:=0;
     end
@@ -12187,7 +12196,7 @@
 
 @p procedure pack_file_name(@!n,@!a,@!e:str_number);
 var k:integer; {number of positions filled in |name_of_file|}
-@!c: ASCII_code; {character being packed}
+@!c: UnicodeScalar; {character being packed}
 @!j:pool_pointer; {index into |str_pool|}
 begin k:=0;
 for j:=str_start_macro(a) to str_start_macro(a+1)-1 do append_to_name(so(str_pool[j]));
@@ -12326,7 +12335,7 @@
 label done;
 begin name_in_progress:=true; begin_name;
 @<Get the next non-blank non-call...@>;
-loop at +begin if (cur_cmd>other_char)or(cur_chr>biggest_char) then
+loop at +begin if (cur_cmd>other_char)or(cur_chr>biggest_usv) then
     {not a character}
     begin back_input; goto done;
     end;
@@ -16576,15 +16585,9 @@
   i, len: integer;
 begin
   if font_mapping[f] <> 0 then begin
-    if c > @"FFFF then begin
-      str_room(2);
-      append_char((c - @"10000) div 1024 + @"D800);
-      append_char((c - @"10000) mod 1024 + @"DC00);
-    end
-    else begin
-      str_room(1);
-      append_char(c);
-    end;
+    if (c>@"FFFF) then str_room(2)
+    else str_room(1);
+    append_char(c);
     len:=apply_mapping(font_mapping[f], addressof(str_pool[str_start_macro(str_ptr)]), cur_length);
     pool_ptr:=str_start_macro(str_ptr); { flush the string, as we'll be using the mapped text instead }
 



More information about the tex-live-commits mailing list.