texlive[46530] Build/source/texk/web2c/xetexdir: xetex.web: sync with

Sat Feb 3 15:10:32 CET 2018

Revision: 46530
          http://tug.org/svn/texlive?view=revision&revision=46530
Author:   kakuto
Date:     2018-02-03 15:10:32 +0100 (Sat, 03 Feb 2018)
Log Message:
-----------
xetex.web: sync with the upstream

Modified Paths:
--------------
    trunk/Build/source/texk/web2c/xetexdir/ChangeLog
    trunk/Build/source/texk/web2c/xetexdir/xetex.web

Modified: trunk/Build/source/texk/web2c/xetexdir/ChangeLog
===================================================================

--- trunk/Build/source/texk/web2c/xetexdir/ChangeLog	2018-02-03 13:53:40 UTC (rev 46529)
+++ trunk/Build/source/texk/web2c/xetexdir/ChangeLog	2018-02-03 14:10:32 UTC (rev 46530)
@@ -12,6 +12,12 @@
 	native fonts. Fixes #145.
 	* XeTeX_ext.[ch]: Patch from Hironobu Yamashita to properly
 	close input pipes. Fixes 147.
+	* xetex.web: Leave text as UTF-16 in trick_buf for better
+	error context printing. (Mostly) fixes #146.
+	It's still possible that error context could include half of
+	a supplementary-plane character (i.e. an unpaired surrogate).
+	Should be a lot less likely, and less troublesome than broken
+	UTF-8, however.
 	* XeTeXFontMgr.cpp, XeTeXFontMgr.h: Convert optical size info
 	in OpenType 'size' feature from PostScript to TeX points.
 	Note that this will change behavior for existing documents

Modified: trunk/Build/source/texk/web2c/xetexdir/xetex.web
===================================================================
--- trunk/Build/source/texk/web2c/xetexdir/xetex.web	2018-02-03 13:53:40 UTC (rev 46529)
+++ trunk/Build/source/texk/web2c/xetexdir/xetex.web	2018-02-03 14:10:32 UTC (rev 46530)
@@ -1697,6 +1697,9 @@
   begin print_visible_char("^"); print_visible_char("^");
   print_lc_hex((s mod @"100) div @"10); print_lc_hex(s mod @"10);
   end
+ else if selector=pseudo then
+  print_visible_char(s) { Don't UTF8-encode text in |trick_buf|,
+                          we'll handle that when printing error context. }
  else begin
   { |char >= 128|: encode as UTF8 }
   if s<@"800 then begin
@@ -6077,7 +6080,7 @@
 @d prim_prime=431 {about 85\pct! of |primitive_size|}
 @d prim_base=1
 @d prim_next(#) == prim[#].lh {link for coalesced lists}
- at d prim_text(#) == prim[#].rh {string number for control sequence name, plus one}
+ at d prim_text(#) == prim[#].rh {string number for control sequence name}
 @d prim_is_full == (prim_used=prim_base) {test if all positions are occupied}
 @d prim_eq_level_field(#)==#.hh.b1
 @d prim_eq_type_field(#)==#.hh.b0
@@ -6188,30 +6191,27 @@
 @!k:pointer; {index in string pool}
 @!j,@!l:integer;
 begin
-if s<=biggest_char then begin
-  if s<0 then begin p:=undefined_primitive; goto found; end
-  else p:=(s mod prim_prime)+prim_base; {we start searching here}
-  end
+if s<256 then begin
+  p:=s;
+  if (p<0) or (prim_eq_level(p)<>level_one) then
+    p:=undefined_primitive;
+end
 else begin
   j:=str_start_macro(s);
   if s = str_ptr then l:=cur_length else l:=length(s);
   @<Compute the primitive code |h|@>;
-  p:=h+prim_base; {we start searching here; note that |0<=h<prim_prime|}
-  end;
-loop at +begin
-  if prim_text(p)>1+biggest_char then { |p| points a multi-letter primitive }
-    begin if length(prim_text(p)-1)=l then
-      if str_eq_str(prim_text(p)-1,s) then goto found;
-    end
-  else if prim_text(p)=1+s then goto found; { |p| points a single-letter primitive }
-  if prim_next(p)=0 then
-    begin if no_new_control_sequence then
-      p:=undefined_primitive
-    else @<Insert a new primitive after |p|, then make
-      |p| point to it@>;
-    goto found;
+  p:=h+prim_base; {we start searching here; note that |0<=h<hash_prime|}
+  loop at +begin if prim_text(p)>0 then if length(prim_text(p))=l then
+    if str_eq_str(prim_text(p),s) then goto found;
+    if prim_next(p)=0 then
+      begin if no_new_control_sequence then
+        p:=undefined_primitive
+      else @<Insert a new primitive after |p|, then make
+        |p| point to it@>;
+      goto found;
+      end;
+    p:=prim_next(p);
     end;
-  p:=prim_next(p);
   end;
 found: prim_lookup:=p;
 end;
@@ -6224,7 +6224,7 @@
   until prim_text(prim_used)=0; {search for an empty location in |prim|}
   prim_next(p):=prim_used; p:=prim_used;
   end;
-prim_text(p):=s+1;
+prim_text(p):=s;
 end
 
 @ The value of |prim_prime| should be roughly 85\pct! of
@@ -6297,7 +6297,7 @@
 @!prim_val:integer; {needed to fill |prim_eqtb|}
 begin if s<256 then begin
   cur_val:=s+single_base;
-  prim_val:=prim_lookup(s);
+  prim_val:=s;
 end
 else  begin k:=str_start_macro(s); l:=str_start_macro(s+1)-k;
     {we will move |s| into the (possibly non-empty) |buffer|}
@@ -7700,11 +7700,11 @@
 else  begin print("..."); p:=l+first_count-half_error_line+3;
   n:=half_error_line;
   end;
-for q:=p to first_count-1 do print_visible_char(trick_buf[q mod error_line]);
+for q:=p to first_count-1 do print_char(trick_buf[q mod error_line]);
 print_ln;
 for q:=1 to n do print_visible_char(" "); {print |n| spaces to begin line~2}
 if m+n<=error_line then p:=first_count+m else p:=first_count+(error_line-n-3);
-for q:=first_count to p-1 do print_visible_char(trick_buf[q mod error_line]);
+for q:=first_count to p-1 do print_char(trick_buf[q mod error_line]);
 if m+n>error_line then print("...")
 
 @ But the trick is distracting us from our current goal, which is to
@@ -8638,7 +8638,7 @@
 begin save_scanner_status:=scanner_status; scanner_status:=normal;
 get_token; scanner_status:=save_scanner_status;
 if cur_cs < hash_base then
-  cur_cs:=prim_lookup(cur_cs-single_base)
+  cur_cs:=prim_lookup(cur_cs-257)
 else
   cur_cs:=prim_lookup(text(cur_cs));
 if cur_cs<>undefined_primitive then begin
@@ -11218,7 +11218,7 @@
   get_next;
   scanner_status:=save_scanner_status;
   if cur_cs < hash_base then
-    m:=prim_lookup(cur_cs-single_base)
+    m:=prim_lookup(cur_cs-257)
   else
     m:=prim_lookup(text(cur_cs));
   b :=((cur_cmd<>undefined_cs) and
@@ -24113,7 +24113,7 @@
     get_next;
     scanner_status:=t;
     if cur_cs < hash_base then
-      cur_cs:=prim_lookup(cur_cs-single_base)
+      cur_cs:=prim_lookup(cur_cs-257)
     else
       cur_cs :=prim_lookup(text(cur_cs));
     if cur_cs<>undefined_primitive then begin