Index: lib/texmfmp.c =================================================================== --- lib/texmfmp.c (revision 1686) +++ lib/texmfmp.c (working copy) @@ -38,7 +38,9 @@ Unfortunately there's no way to get the banner into this code, so just repeat the text. */ #ifdef TeX -#if defined (eTeX) +#if defined(XeTeX) +#include +#elif defined (eTeX) #include #elif defined (pdfTeX) #include @@ -103,10 +105,63 @@ /* The main program, etc. */ +#ifdef XeTeX +#include "xetexdir/XeTeX_ext.h" + +/* For Unicode encoding form interpretation... */ +static UInt32 +offsetsFromUTF8[6] = { + 0x00000000UL, + 0x00003080UL, + 0x000E2080UL, + 0x03C82080UL, + 0xFA082080UL, + 0x82082080UL +}; + +static UInt8 +bytesFromUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +static UInt8 +firstByteMark[7] = { + 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC +}; + +const int halfShift = 10; +const UInt32 halfBase = 0x0010000UL; +const UInt32 halfMask = 0x3FFUL; +const UInt32 kSurrogateHighStart = 0xD800UL; +const UInt32 kSurrogateHighEnd = 0xDBFFUL; +const UInt32 kSurrogateLowStart = 0xDC00UL; +const UInt32 kSurrogateLowEnd = 0xDFFFUL; +const UInt32 byteMask = 0x000000BFUL; +const UInt32 byteMark = 0x00000080UL; +#endif + + /* What we were invoked as and with. */ char **argv; int argc; +#ifdef XeTeX +/* if the user specifies a paper size or output driver program */ +static string papersize; +#ifdef XETEX_MAC +static string outputdriver = "xdv2pdf"; /* default for backward compatibility on Mac OS X */ +#else +static string outputdriver = "xdvipdfmx"; /* for linux version with preliminary dvipdfmx-based driver */ +#endif +#endif + /* If the user overrides argv[0] with -progname. */ static string user_progname; @@ -260,10 +315,12 @@ if (mltexp) { fprintf(stderr, "-mltex only works with -ini\n"); } +#if !defined(XeTeX) if (enctexp) { fprintf(stderr, "-enc only works with -ini\n"); } #endif +#endif #if defined(eTeX) || defined(pdfeTeX) || defined(Aleph) if (etexp) { fprintf(stderr, "-etex only works with -ini\n"); @@ -354,17 +411,56 @@ { int i; +#ifdef XeTeX + static UFILE termin_file; + if (termin == 0) { + termin = &termin_file; + termin->f = stdin; + termin->savedChar = -1; + termin->skipNextLF = 0; + termin->encodingMode = UTF8; + termin->conversionData = 0; + inputfile[0] = termin; + } +#endif + buffer[first] = 0; /* In case there are no arguments. */ if (optind < argc) { /* We have command line arguments. */ int k = first; for (i = optind; i < argc; i++) { +#ifdef XeTeX + unsigned char *ptr = (unsigned char *)&(argv[i][0]); + /* need to interpret UTF8 from the command line */ + UInt32 rval; + while (rval = *(ptr++)) { + UInt16 extraBytes = bytesFromUTF8[rval]; + switch (extraBytes) { // note: code falls through cases! + case 5: rval <<= 6; if (*ptr) rval += *(ptr++); + case 4: rval <<= 6; if (*ptr) rval += *(ptr++); + case 3: rval <<= 6; if (*ptr) rval += *(ptr++); + case 2: rval <<= 6; if (*ptr) rval += *(ptr++); + case 1: rval <<= 6; if (*ptr) rval += *(ptr++); + case 0: ; + }; + rval -= offsetsFromUTF8[extraBytes]; + /* now rval is a USV; if it's >=64K, we need to put surrogates in the buffer */ + if (rval > 0xFFFF) { + rval -= 0x10000; + buffer[k++] = 0xd800 + rval / 0x0400; + buffer[k++] = 0xdc00 + rval % 0x0400; + } + else + buffer[k++] = rval; + } +#else char *ptr = &(argv[i][0]); /* Don't use strcat, since in Omega the buffer elements aren't single bytes. */ while (*ptr) { buffer[k++] = *(ptr++); } +#endif buffer[k++] = ' '; } argc = 0; /* Don't do this again. */ @@ -384,12 +480,90 @@ /* One more time, this time converting to TeX's internal character representation. */ -#if !defined(Omega) && !defined(eOmega) && !defined(Aleph) +#if !defined(Omega) && !defined(eOmega) && !defined(Aleph) && !defined(XeTeX) for (i = first; i < last; i++) buffer[i] = xord[buffer[i]]; #endif } +#ifdef XeTeX + +boolean +u_open_in(unicodefile* f, int filefmt, const_string fopen_mode, int mode, int encodingData) +{ + boolean rval; + *f = malloc(sizeof(UFILE)); + (*f)->encodingMode = 0; + (*f)->conversionData = 0; + (*f)->savedChar = -1; + (*f)->skipNextLF = 0; + rval = open_input (&((*f)->f), filefmt, fopen_mode); + if (rval) { + int B1, B2; + if (mode == AUTO) { + /* sniff encoding form */ + B1 = getc((*f)->f); + B2 = getc((*f)->f); + if (B1 == 0xfe && B2 == 0xff) + mode = UTF16BE; + else if (B2 == 0xfe && B1 == 0xff) + mode = UTF16LE; + else if (B1 == 0 && B2 != 0) { + mode = UTF16BE; + fseek((*f)->f, SEEK_SET, 0); + } + else if (B2 == 0 && B1 != 0) { + mode = UTF16LE; + fseek((*f)->f, SEEK_SET, 0); + } + else if (B1 == 0xef && B2 == 0xbb) { + int B3 = getc((*f)->f); + if (B3 == 0xbf) + mode = UTF8; + } + if (mode == AUTO) { + fseek((*f)->f, SEEK_SET, 0); + mode = UTF8; + } + } + + setinputfileencoding(*f, mode, encodingData); + } + return rval; +} + +boolean +open_dvi_output(FILE** fptr) +{ + if (nopdfoutput) { + return open_output(fptr, "w"); + } + else { + char* cmd2 = concat(outputdriver, " -o \""); + char* cmd = concat3(cmd2, (char*)nameoffile+1, "\""); + free(cmd2); + if (papersize != 0) { + cmd2 = concat3(cmd, " -p ", papersize); + free(cmd); + cmd = cmd2; + } + *fptr = popen(cmd, "w"); + free(cmd); + return (*fptr != 0); + } +} + +void +dviclose(FILE* fptr) +{ + if (nopdfoutput) + fclose(fptr); + else + pclose(fptr); +} +#endif + + /* IPC for TeX. By Tom Rokicki for the NeXT; it makes TeX ship out the DVI file in a pipe to TeXView so that the output can be displayed incrementally. Shamim Mohamed adapted it for Web2c. */ @@ -583,7 +757,7 @@ #if defined (TeX) || defined (MF) || defined (MP) /* TCX and Omega get along like sparks and gunpowder. */ -#if !defined(Omega) && !defined(eOmega) && !defined(Aleph) +#if !defined(Omega) && !defined(eOmega) && !defined(Aleph) && !defined(XeTeX) /* Return the next number following START, setting POST to the following character, as in strtol. Issue a warning and return -1 if no number @@ -684,7 +858,7 @@ WARNING1 ("Could not open char translation file `%s'", orig_filename); } } -#endif /* !Omega && !eOmega && !Aleph */ +#endif /* !Omega && !eOmega && !Aleph && !XeTeX */ #endif /* TeX || MF || MP [character translation] */ /* Normalize quoting of filename -- that is, only quote if there is a space, @@ -772,7 +946,9 @@ #endif /* IPC */ #if !defined(Omega) && !defined(eOmega) && !defined(Aleph) { "mltex", 0, &mltexp, 1 }, +#if !defined(XeTeX) { "enc", 0, &enctexp, 1 }, +#endif /* !XeTeX */ #endif /* !Omega && !eOmega && !Aleph */ #if defined (eTeX) || defined(pdfeTeX) || defined(Aleph) { "etex", 0, &etexp, 1 }, @@ -799,6 +975,11 @@ { "translate-file", 1, 0, 0 }, { "default-translate-file", 1, 0, 0 }, { "8bit", 0, &eightbitp, 1 }, +#if defined(XeTeX) + { "no-pdf", 0, &nopdfoutput, 1 }, + { "output-driver", 1, 0, 0 }, + { "papersize", 1, 0, 0 }, +#endif /* XeTeX */ #endif /* TeX || MF || MP */ #if defined (TeX) || defined (MF) { "mktex", 1, 0, 0 }, @@ -834,6 +1015,13 @@ if (ARGUMENT_IS ("kpathsea-debug")) { kpathsea_debug |= atoi (optarg); +#ifdef XeTeX + } else if (ARGUMENT_IS ("papersize")) { + papersize = optarg; + } else if (ARGUMENT_IS ("output-driver")) { + outputdriver = optarg; +#endif + } else if (ARGUMENT_IS ("progname")) { user_progname = optarg; @@ -1321,7 +1509,117 @@ to eof. Otherwise, we return `true' and set last = first + length(line except trailing whitespace). */ +#ifdef XeTeX +int +get_uni_c(UFILE* f) +{ + int rval; + + if (f->savedChar != -1) { + rval = f->savedChar; + f->savedChar = -1; + return rval; + } + + switch (f->encodingMode) { + case UTF8: + // FIXME: we don't currently check for malformed UTF-8 + rval = getc(f->f); + if (rval != EOF) { + UInt16 extraBytes = bytesFromUTF8[rval]; + switch (extraBytes) { // note: code falls through cases! + case 5: rval <<= 6; rval += getc(f->f); + case 4: rval <<= 6; rval += getc(f->f); + case 3: rval <<= 6; rval += getc(f->f); + case 2: rval <<= 6; rval += getc(f->f); + case 1: rval <<= 6; rval += getc(f->f); + case 0: ; + }; + rval -= offsetsFromUTF8[extraBytes]; + if (rval > 0xFFFF) { + rval -= 0x10000; + f->savedChar = 0xdc00 + rval % 0x0400; + rval = 0xd800 + rval / 0x0400; + } + } + break; + + case UTF16BE: + rval = getc(f->f); + rval <<= 8; + rval += getc(f->f); + break; + + case UTF16LE: + rval = getc(f->f); + rval += (getc(f->f) << 8); + break; + + case RAW: + rval = getc(f->f); + break; + + default: + /* this can't happen */ + fprintf(stderr, "! Internal error---file input mode=%d.\n", f->encodingMode); + uexit(3); + } + + return rval; +} + boolean +input_line(UFILE* f) +{ + int i; + + if (f->encodingMode == ICUMAPPING) + return input_line_icu(f); + + /* Recognize either LF or CR as a line terminator; skip initial LF if prev line ended with CR. */ + i = get_uni_c(f); + if (f->skipNextLF) { + f->skipNextLF = 0; + if (i == '\n') + i = get_uni_c(f); + } + + last = first; + if (last < bufsize && i != EOF && i != '\n' && i != '\r') + buffer[last++] = i; + if (i != EOF && i != '\n' && i != '\r') + while (last < bufsize && (i = get_uni_c(f)) != EOF && i != '\n' && i != '\r') + buffer[last++] = i; + + if (i == EOF && errno != EINTR && last == first) + return false; + + /* We didn't get the whole line because our buffer was too small. */ + if (i != EOF && i != '\n' && i != '\r') { + fprintf (stderr, "! Unable to read an entire line---bufsize=%u.\n", + (unsigned) bufsize); + fputs ("Please increase buf_size in texmf.cnf.\n", stderr); + uexit (1); + } + + buffer[last] = ' '; + if (last >= maxbufstack) + maxbufstack = last; + + /* If line ended with CR, remember to skip following LF. */ + if (i == '\r') + f->skipNextLF = 1; + + /* Trim trailing whitespace. */ + while (last > first && ISBLANK (buffer[last - 1])) + --last; + + return true; +} + +#else /* !XeTeX */ + +boolean input_line P1C(FILE *, f) { int i; @@ -1366,6 +1664,7 @@ return true; } + #endif /* !XeTeX */ /* This string specifies what the `e' option does in response to an error message. */ @@ -1391,7 +1690,11 @@ /* Close any open input files, since we're going to kill the job. */ for (i = 1; i <= inopen; i++) +#ifdef XeTeX + xfclose (inputfile[i]->f, "inputfile"); +#else xfclose (inputfile[i], "inputfile"); +#endif /* Replace the default with the value of the appropriate environment variable or config file value, if it's set. */ @@ -1622,19 +1925,88 @@ } #if !defined(pdfTeX) && !defined(pdfeTeX) -static int +#ifndef XeTeX /* XeTeX uses this from xetexmac.c */ +static +#endif +int maketexstring(const_string s) { size_t len; +#ifdef XeTeX + UInt32 rval; + unsigned char* cp = (unsigned char*)s; +#endif assert (s != 0); len = strlen(s); - checkpoolpointer (poolptr, len); + checkpoolpointer (poolptr, len); /* in the XeTeX case, this may be more than enough */ +#ifdef XeTeX + while (rval = *(cp++)) { + UInt16 extraBytes = bytesFromUTF8[rval]; + switch (extraBytes) { // note: code falls through cases! + case 5: rval <<= 6; if (*cp) rval += *(cp++); + case 4: rval <<= 6; if (*cp) rval += *(cp++); + case 3: rval <<= 6; if (*cp) rval += *(cp++); + case 2: rval <<= 6; if (*cp) rval += *(cp++); + case 1: rval <<= 6; if (*cp) rval += *(cp++); + case 0: ; + }; + rval -= offsetsFromUTF8[extraBytes]; + if (rval > 0xffff) { + rval -= 0x10000; + strpool[poolptr++] = 0xd800 + rval / 0x0400; + strpool[poolptr++] = 0xdc00 + rval % 0x0400; + } + else + strpool[poolptr++] = rval; + } +#else while (len-- > 0) strpool[poolptr++] = *s++; +#endif + return (makestring()); } #endif +#ifdef XeTeX +void +makeutf16name() +{ + unsigned char* s = nameoffile + 1; + UInt32 rval; + UInt16* t; + static int name16len = 0; + if (name16len <= namelength) { + if (nameoffile16 != 0) + free(nameoffile16); + name16len = namelength + 10; + nameoffile16 = xmalloc(name16len * sizeof(UInt16)); + } + t = nameoffile16; + while (s <= nameoffile + namelength) { + rval = *(s++); + UInt16 extraBytes = bytesFromUTF8[rval]; + switch (extraBytes) { // note: code falls through cases! + case 5: rval <<= 6; if (*s) rval += *(s++); + case 4: rval <<= 6; if (*s) rval += *(s++); + case 3: rval <<= 6; if (*s) rval += *(s++); + case 2: rval <<= 6; if (*s) rval += *(s++); + case 1: rval <<= 6; if (*s) rval += *(s++); + case 0: ; + }; + rval -= offsetsFromUTF8[extraBytes]; + if (rval > 0xffff) { + rval -= 0x10000; + *(t++) = 0xd800 + rval / 0x0400; + *(t++) = 0xdc00 + rval % 0x0400; + } + else + *(t++) = rval; + } + namelength16 = t - nameoffile16; +} +#endif /* XeTeX */ + strnumber makefullnamestring() { @@ -1671,18 +2043,22 @@ return ret; } +#ifdef XeTeX +#define strstartar strstart +#endif + string gettexstring P1C(strnumber, s) { poolpointer i, len; string name; -#if !defined(Omega) && !defined(eOmega) && !defined(Aleph) +#if !defined(Omega) && !defined(eOmega) && !defined(Aleph) && !defined(XeTeX) len = strstart[s + 1] - strstart[s]; #else len = strstartar[s + 1 - 65536L] - strstartar[s - 65536L]; #endif name = (string)xmalloc (len + 1); -#if !defined(Omega) && !defined(eOmega) && !defined(Aleph) +#if !defined(Omega) && !defined(eOmega) && !defined(Aleph) && !defined(XeTeX) strncpy (name, (string)&strpool[strstart[s]], len); #else /* Don't use strncpy. The strpool is not made up of chars. */ @@ -1692,6 +2068,10 @@ return name; } +#ifdef XeTeX +#undef strstartar +#endif + boolean isnewsource P2C(strnumber, srcfilename, int, lineno) { Index: texmfmem.h =================================================================== --- texmfmem.h (revision 1686) +++ texmfmem.h (working copy) @@ -113,22 +113,40 @@ twohalves hhfield; #endif #ifdef WORDS_BIGENDIAN +#ifdef XeTeX + struct + { + integer CINT; + integer CINT1; + } x; +#else integer cint; +#endif fourquarters qqqq; #else /* not WORDS_BIGENDIAN */ +#ifdef XeTeX struct { + integer CINT1; + integer CINT; + } x; +#else + struct + { #if defined (TeX) && !defined (SMALLTeX) || defined (MF) && !defined (SMALLMF) || defined (MP) && !defined (SMALLMP) halfword junk; #endif /* big {TeX,MF,MP} */ integer CINT; } u; +#endif struct { +#ifndef XeTeX #if defined (TeX) && !defined (SMALLTeX) || defined (MF) && !defined (SMALLMF) || defined (MP) && !defined (SMALLMP) halfword junk; #endif /* big {TeX,MF,MP} */ +#endif fourquarters QQQQ; } v; #endif /* not WORDS_BIGENDIAN */ @@ -141,13 +159,28 @@ typedef union { #ifdef WORDS_BIGENDIAN +#ifdef XeTeX + struct + { + integer CINT; + } x; +#else integer cint; +#endif fourquarters qqqq; #else /* not WORDS_BIGENDIAN */ +#ifdef XeTeX struct { + halfword junk; integer CINT; + } x; +#else + struct + { + integer CINT; } u; +#endif struct { @@ -166,8 +199,16 @@ #define rh v.RH #define lhfield v.LH +#ifdef XeTeX +#define cint x.CINT +#define cint1 x.CINT1 +#else #ifndef WORDS_BIGENDIAN #define cint u.CINT +#endif +#endif /* XeTeX */ + +#ifndef WORDS_BIGENDIAN #define qqqq v.QQQQ #endif Index: texmfmp.h =================================================================== --- texmfmp.h (revision 1686) +++ texmfmp.h (working copy) @@ -6,6 +6,21 @@ #include /* for IS_DIR_SEP, used in the change files */ #include /* for kpse_make_tex_discard_errors */ +#ifdef XeTeX +/* added typedefs for unicodefile and voidpointer */ +#define XETEX_UNICODE_FILE_DEFINED 1 +typedef struct { + FILE* f; + long savedChar; + short skipNextLF; + short encodingMode; + void* conversionData; +} UFILE; +typedef UFILE* unicodefile; + +typedef void* voidpointer; +#endif + /* If we have these macros, use them, as they provide a better guide to the endianess when cross-compiling. */ #if defined (BYTE_ORDER) && defined (BIG_ENDIAN) && defined (LITTLE_ENDIAN) @@ -37,6 +52,9 @@ #elif defined (eTeX) #define TEXMFPOOLNAME "etex.pool" #define TEXMFENGINENAME "etex" +#elif defined (XeTeX) +#define TEXMFPOOLNAME "xetex.pool" +#define TEXMFENGINENAME "xetex" #elif defined (Omega) #define TEXMFPOOLNAME "omega.pool" #define TEXMFENGINENAME "omega" @@ -146,7 +164,11 @@ /* Read a line of input as quickly as possible. */ #define inputln(stream, flag) input_line (stream) +#ifdef XeTeX +extern boolean input_line P1H(UFILE *); +#else extern boolean input_line P1H(FILE *); +#endif /* This routine has to return four values. */ #define dateandtime(i,j,k,l) get_date_and_time (&(i), &(j), &(k), &(l)) @@ -182,6 +204,9 @@ #define wopenin(f) open_input (&(f), DUMP_FORMAT, FOPEN_RBIN_MODE) #define wopenout bopenout #define wclose aclose +#ifdef XeTeX +#define uopenin(f,p,m,d) u_open_in(&(f), p, FOPEN_RBIN_MODE, m, d) +#endif /* Used in tex.ch (section 1338) to get a core dump in debugging mode. */ #ifdef unix Index: texmfmp-help.h =================================================================== --- texmfmp-help.h (revision 1686) +++ texmfmp-help.h (working copy) @@ -450,6 +450,66 @@ }; #endif /* pdfeTeX */ +#ifdef XeTeX +const_string XETEXHELP[] = { + "Usage: xetex [OPTION]... [TEXNAME[.tex]] [COMMANDS]", + " or: xetex [OPTION]... \\FIRST-LINE", + " or: xetex [OPTION]... &FMT ARGS", + " Run XeTeX on TEXNAME, usually creating TEXNAME.pdf.", + " Any remaining COMMANDS are processed as XeTeX input, after TEXNAME is read.", + " If the first line of TEXNAME is %&FMT, and FMT is an existing .fmt file,", + " use it. Else use `NAME.fmt', where NAME is the program invocation name,", + " most commonly `xetex'.", + "", + " Alternatively, if the first non-option argument begins with a backslash,", + " interpret all non-option arguments as a line of XeTeX input.", + "", + " Alternatively, if the first non-option argument begins with a &, the", + " next word is taken as the FMT to read, overriding all else. Any", + " remaining arguments are processed as above.", + "", + " If no arguments or options are specified, prompt for input.", + "", + "[-no]-file-line-error disable/enable file:line:error style messages", + "-fmt=FMTNAME use FMTNAME instead of program name or a %& line", + "-halt-on-error stop processing at the first error", + "-ini be xeinitex, for dumping formats; this is implicitly", + " true if the program name is `xeinitex'", + "-interaction=STRING set interaction mode (STRING=batchmode/nonstopmode/", + " scrollmode/errorstopmode)", + "-jobname=STRING set the job name to STRING", + "-kpathsea-debug=NUMBER set path searching debugging flags according to", + " the bits of NUMBER", + "[-no]-mktex=FMT disable/enable mktexFMT generation (FMT=tex/tfm)", + "-mltex enable MLTeX extensions such as \\charsubdef", + "-output-comment=STRING use STRING for XDV file comment instead of date", + "-output-directory=DIR use DIR as the directory to write files to", + "-output-driver=CMD use CMD as the XDV-to-PDF driver instead of " +#ifdef XETEX_MAC + "xdv2pdf" +#else + "xdvipdfmx" +#endif + , + "-no-pdf generate XDV (extended DVI) output rather than PDF", + "[-no]-parse-first-line disable/enable parsing of the first line of the", + " input file", + "-papersize=STRING set PDF media size to STRING", + "-progname=STRING set program (and fmt) name to STRING", + "-recorder enable filename recorder", + "[-no]-shell-escape disable/enable \\write18{SHELL COMMAND}", + "-src-specials insert source specials into the XDV file", + "-src-specials=WHERE insert source specials in certain places of", + " the XDV file. WHERE is a comma-separated value", + " list: cr display hbox math par parend vbox", + "-translate-file=TCXNAME (ignored)", + "-8bit make all characters printable, don't use ^^X sequences", + "-help display this help and exit", + "-version output version information and exit", + NULL +}; +#endif /* XeTeX */ + #ifdef TeX const_string TEXHELP[] = { "Usage: tex [OPTION]... [TEXNAME[.tex]] [COMMANDS]",