[pdftex] Patch ImageTypeDetection
Heiko Oberdiek
oberdiek at uni-freiburg.de
Sat Feb 8 04:51:51 CET 2003
Hello,
in the newsgroup de.comp.text.tex a thread can be found
in these days with subject:
"\includegraphics und umbenannte pdf-Graphiken"
The problem is that graphics files with non standard
file extensions are not detected by pdfTeX:
Error: pdflatex (file g.xxx): unknown type of image
==> Fatal error occurred, the output PDF file is not finished!
I think, this is unnecessary, because the file type can be
easily detected by the magic headers (see unix "file" command).
Therefore I implemented this in the appended patch for
pdftexdir/writeimg.c
based on the sources of teTeX 2.0:
%%% cut %%% writeimg.c.diff %%% cut %%%
*** writeimg.c.org Sat Feb 8 00:42:01 2003
--- writeimg.c Sat Feb 8 03:58:47 2003
***************
*** 118,129 ****
return img_pages(img);
}
integer readimage(strnumber s, integer page_num, strnumber page_name,
integer pdfversion, integer pdfoptionalwaysusepdfpagebox)
{
- char *image_suffix;
char *dest = 0;
integer img = new_image_entry();
/* need to allocate new string as makecstring's buffer is
already used by cur_file_name */
if (page_name != 0)
--- 118,240 ----
return img_pages(img);
}
+ /*
+ Patch ImageTypeDetection 2003/02/08 by Heiko Oberdiek.
+
+ Function "readimage" performs some basic initializations.
+ Then it looks at the file extension to determine the
+ image type and calls specific code/functions.
+ The main disadvantage is that standard file extensions
+ have to be used, otherwise pdfTeX is not able to detect
+ the correct image type.
+
+ The patch now looks at the file header first regardless of
+ the file extension. This is implemented in function
+ "checktypebyheader". If this check fails, the traditional
+ test of standard file extension is tried, done in function
+ "checktypebyextension".
+
+ Magic headers:
+
+ * "PNG (Portable Network Graphics) Specification", Version 1.2
+ (http://www.libpng.org/pub/png):
+
+ | 3.1. PNG file signature
+ |
+ | The first eight bytes of a PNG file always contain the following
+ | (decimal) values:
+ |
+ | 137 80 78 71 13 10 26 10
+
+ Translation to C: "\x89PNG\r\n\x1A\n"
+
+ * "JPEG File Interchange Format", Version 1.02:
+
+ | o you can identify a JFIF file by looking for the following
+ | sequence: X'FF', SOI X'FF', APP0, <2 bytes to be skipped>,
+ | "JFIF", X'00'.
+
+ Function "checktypebyheader" only looks at the first two bytes:
+ "\xFF\xD8"
+
+ * "PDF Reference", third edition:
+ * The first line should contain "%PDF-1.0" until "%PDF-1.4"
+ (section 3.4.1 "File Header").
+ * The "implementation notes" say:
+
+ | 3.4.1, File Header
+ | 12. Acrobat viewers require only that the header appear
+ | somewhere within the first 1024 bytes of the file.
+ | 13. Acrobat viewers will also accept a header of the form
+ | %!PS-Adobe-N.n PDF-M.m
+
+ The check in function "checktypebyheader" only implements
+ the first issue. The implementation notes are not considered.
+ Therefore files with garbage at start of file must have the
+ standard extension.
+
+ Functions "checktypebyheader" and "checktypebyextension":
+ img_type(img) is set to IMAGE_TYPE_NONE by new_image_entry().
+ Both functions tries to detect a type and set img_type(img).
+ Thus a value other than IMAGE_TYPE_NONE indicates that a
+ type is found.
+ */
+
+ #define HEADER_JPG "\xFF\xD8"
+ #define HEADER_PNG "\x89PNG\r\n\x1A\n"
+ #define HEADER_PDF "%PDF-1."
+ #define MAX_HEADER (sizeof(HEADER_PNG)-1)
+ static void checktypebyheader(integer img)
+ {
+ int i;
+ FILE *file = NULL;
+ char header[MAX_HEADER];
+
+ if (img_type(img) != IMAGE_TYPE_NONE) /* nothing to do */
+ return;
+
+ /* read the header */
+ file = xfopen(img_name(img), FOPEN_RBIN_MODE);
+ for (i=0; i<MAX_HEADER; i++) {
+ header[i] = xgetc(file);
+ if (feof(file))
+ pdftex_fail("reading image file failed");
+ }
+ xfclose(file, img_name(img));
+
+ /* tests */
+ if (strncmp(header, HEADER_JPG, sizeof(HEADER_JPG)-1) == 0)
+ img_type(img) = IMAGE_TYPE_JPG;
+ else if (strncmp(header, HEADER_PNG, sizeof(HEADER_PNG)-1) == 0)
+ img_type(img) = IMAGE_TYPE_PNG;
+ else if (strncmp(header, HEADER_PDF, sizeof(HEADER_PDF)-1) == 0)
+ img_type(img) = IMAGE_TYPE_PDF;
+ }
+
+ static void checktypebyextension(integer img)
+ {
+ char *image_suffix;
+
+ if (img_type(img) != IMAGE_TYPE_NONE) /* nothing to do */
+ return;
+ /* tests */
+ if ((image_suffix = strrchr(cur_file_name, '.')) == 0)
+ img_type(img) = IMAGE_TYPE_NONE;
+ else if (strcasecmp(image_suffix, ".pdf") == 0)
+ img_type(img) = IMAGE_TYPE_PDF;
+ else if (strcasecmp(image_suffix, ".png") == 0)
+ img_type(img) = IMAGE_TYPE_PNG;
+ else if (strcasecmp(image_suffix, ".jpg") == 0 ||
+ strcasecmp(image_suffix, ".jpeg") == 0)
+ img_type(img) = IMAGE_TYPE_JPG;
+ }
+
integer readimage(strnumber s, integer page_num, strnumber page_name,
integer pdfversion, integer pdfoptionalwaysusepdfpagebox)
{
char *dest = 0;
integer img = new_image_entry();
+
/* need to allocate new string as makecstring's buffer is
already used by cur_file_name */
if (page_name != 0)
***************
*** 132,141 ****
img_name(img) = kpse_find_file(cur_file_name, kpse_tex_format, true);
if (img_name(img) == 0)
pdftex_fail("cannot find image file");
! if ((image_suffix = strrchr(cur_file_name, '.')) == 0)
! pdftex_fail("cannot find image file name extension");
! if (strcasecmp(image_suffix, ".pdf") == 0) {
! img_type(img) = IMAGE_TYPE_PDF;
pdf_ptr(img) = xtalloc(1, pdf_image_struct);
pdf_ptr(img)->page_box = pdflastpdfboxspec;
pdf_ptr(img)->always_use_pdfpagebox = pdfoptionalwaysusepdfpagebox;
--- 243,254 ----
img_name(img) = kpse_find_file(cur_file_name, kpse_tex_format, true);
if (img_name(img) == 0)
pdftex_fail("cannot find image file");
! /* type checks */
! checktypebyheader(img);
! checktypebyextension(img);
! /* read image */
! switch (img_type(img)) {
! case IMAGE_TYPE_PDF:
pdf_ptr(img) = xtalloc(1, pdf_image_struct);
pdf_ptr(img)->page_box = pdflastpdfboxspec;
pdf_ptr(img)->always_use_pdfpagebox = pdfoptionalwaysusepdfpagebox;
***************
*** 147,167 ****
pdf_ptr(img)->orig_y = bp2int(epdf_orig_y);
pdf_ptr(img)->selected_page = page_num;
pdf_ptr(img)->doc = epdf_doc;
! }
! else if (strcasecmp(image_suffix, ".png") == 0) {
! img_type(img) = IMAGE_TYPE_PNG;
img_pages(img) = 1;
read_png_info(img);
! }
! else if (strcasecmp(image_suffix, ".jpg") == 0 ||
! strcasecmp(image_suffix, ".jpeg") == 0) {
jpg_ptr(img) = xtalloc(1, JPG_IMAGE_INFO);
- img_type(img) = IMAGE_TYPE_JPG;
img_pages(img) = 1;
read_jpg_info(img);
! }
! else
pdftex_fail("unknown type of image");
xfree(dest);
cur_file_name = 0;
return img;
--- 260,278 ----
pdf_ptr(img)->orig_y = bp2int(epdf_orig_y);
pdf_ptr(img)->selected_page = page_num;
pdf_ptr(img)->doc = epdf_doc;
! break;
! case IMAGE_TYPE_PNG:
img_pages(img) = 1;
read_png_info(img);
! break;
! case IMAGE_TYPE_JPG:
jpg_ptr(img) = xtalloc(1, JPG_IMAGE_INFO);
img_pages(img) = 1;
read_jpg_info(img);
! break;
! default:
pdftex_fail("unknown type of image");
+ }
xfree(dest);
cur_file_name = 0;
return img;
%%% cut %%% writeimg.c.diff %%% cut %%%
Yours sincerely
Heiko <oberdiek at uni-freiburg.de>
--
More information about the pdftex
mailing list