[pdftex] Patch ImageTypeDetection

Heiko Oberdiek oberdiek at uni-freiburg.de
Sat Feb 8 04:51:51 CET 2003


Hello,

in the newsgroup de.comp.text.tex a thread can be found
in these days with subject:
  "\includegraphics und umbenannte pdf-Graphiken"

The problem is that graphics files with non standard
file extensions are not detected by pdfTeX:
   Error: pdflatex (file g.xxx): unknown type of image
    ==> Fatal error occurred, the output PDF file is not finished!
I think, this is unnecessary, because the file type can be
easily detected by the magic headers (see unix "file" command).
Therefore I implemented this in the appended patch for
  pdftexdir/writeimg.c
based on the sources of teTeX 2.0:

%%% cut %%% writeimg.c.diff %%% cut %%%
*** writeimg.c.org	Sat Feb  8 00:42:01 2003
--- writeimg.c	Sat Feb  8 03:58:47 2003
***************
*** 118,129 ****
      return img_pages(img);
  }
  
  integer readimage(strnumber s, integer page_num, strnumber page_name,
                    integer pdfversion, integer pdfoptionalwaysusepdfpagebox)
  {
-     char *image_suffix;
      char *dest = 0;
      integer img = new_image_entry();
      /* need to allocate new string as makecstring's buffer is 
         already used by cur_file_name */
      if (page_name != 0)
--- 118,240 ----
      return img_pages(img);
  }
  
+ /*
+   Patch ImageTypeDetection 2003/02/08 by Heiko Oberdiek.
+ 
+   Function "readimage" performs some basic initializations.
+   Then it looks at the file extension to determine the
+   image type and calls specific code/functions.
+     The main disadvantage is that standard file extensions
+   have to be used, otherwise pdfTeX is not able to detect
+   the correct image type.
+ 
+   The patch now looks at the file header first regardless of
+   the file extension. This is implemented in function
+   "checktypebyheader". If this check fails, the traditional
+   test of standard file extension is tried, done in function
+   "checktypebyextension".
+ 
+   Magic headers:
+ 
+   * "PNG (Portable Network Graphics) Specification", Version 1.2
+     (http://www.libpng.org/pub/png):
+ 
+   |   3.1. PNG file signature
+   |
+   |      The first eight bytes of a PNG file always contain the following
+   |      (decimal) values:
+   |
+   |         137 80 78 71 13 10 26 10
+ 
+   Translation to C: "\x89PNG\r\n\x1A\n"
+ 
+   * "JPEG File Interchange Format", Version 1.02:
+ 
+   | o you can identify a JFIF file by looking for the following
+   |   sequence: X'FF', SOI X'FF', APP0, <2 bytes to be skipped>,
+   |   "JFIF", X'00'.
+ 
+   Function "checktypebyheader" only looks at the first two bytes:
+     "\xFF\xD8"
+ 
+   * "PDF Reference", third edition:
+     * The first line should contain "%PDF-1.0" until "%PDF-1.4"
+       (section 3.4.1 "File Header").
+     * The "implementation notes" say:
+ 
+     | 3.4.1,  File Header
+     |   12. Acrobat viewers require only that the header appear
+     |       somewhere within the first 1024 bytes of the file.
+     |   13. Acrobat viewers will also accept a header of the form
+     |           %!PS-Adobe-N.n PDF-M.m
+ 
+     The check in function "checktypebyheader" only implements
+     the first issue. The implementation notes are not considered.
+     Therefore files with garbage at start of file must have the
+     standard extension.
+ 
+     Functions "checktypebyheader" and "checktypebyextension":
+     img_type(img) is set to IMAGE_TYPE_NONE by new_image_entry().
+     Both functions tries to detect a type and set img_type(img).
+     Thus a value other than IMAGE_TYPE_NONE indicates that a
+     type is found.
+ */
+ 
+ #define HEADER_JPG "\xFF\xD8"
+ #define HEADER_PNG "\x89PNG\r\n\x1A\n"
+ #define HEADER_PDF "%PDF-1."
+ #define MAX_HEADER (sizeof(HEADER_PNG)-1)
+ static void checktypebyheader(integer img)
+ {
+     int i;
+     FILE *file = NULL;
+     char header[MAX_HEADER];
+ 
+     if (img_type(img) != IMAGE_TYPE_NONE) /* nothing to do */
+         return;
+ 
+     /* read the header */
+     file = xfopen(img_name(img), FOPEN_RBIN_MODE);
+     for (i=0; i<MAX_HEADER; i++) {
+         header[i] = xgetc(file);
+         if (feof(file))
+             pdftex_fail("reading image file failed");
+     }
+     xfclose(file, img_name(img));
+ 
+     /* tests */
+     if (strncmp(header, HEADER_JPG, sizeof(HEADER_JPG)-1) == 0)
+         img_type(img) = IMAGE_TYPE_JPG;
+     else if (strncmp(header, HEADER_PNG, sizeof(HEADER_PNG)-1) == 0)
+         img_type(img) = IMAGE_TYPE_PNG;
+     else if (strncmp(header, HEADER_PDF, sizeof(HEADER_PDF)-1) == 0)
+         img_type(img) = IMAGE_TYPE_PDF;
+ }
+ 
+ static void checktypebyextension(integer img)
+ {
+     char *image_suffix;
+ 
+     if (img_type(img) != IMAGE_TYPE_NONE) /* nothing to do */
+         return;
+     /* tests */
+     if ((image_suffix = strrchr(cur_file_name, '.')) == 0)
+         img_type(img) = IMAGE_TYPE_NONE;
+     else if (strcasecmp(image_suffix, ".pdf") == 0)
+         img_type(img) = IMAGE_TYPE_PDF;
+     else if (strcasecmp(image_suffix, ".png") == 0)
+         img_type(img) = IMAGE_TYPE_PNG;
+     else if (strcasecmp(image_suffix, ".jpg") == 0 ||
+              strcasecmp(image_suffix, ".jpeg") == 0)
+         img_type(img) = IMAGE_TYPE_JPG;
+ }
+ 
  integer readimage(strnumber s, integer page_num, strnumber page_name,
                    integer pdfversion, integer pdfoptionalwaysusepdfpagebox)
  {
      char *dest = 0;
      integer img = new_image_entry();
+ 
      /* need to allocate new string as makecstring's buffer is 
         already used by cur_file_name */
      if (page_name != 0)
***************
*** 132,141 ****
      img_name(img) = kpse_find_file(cur_file_name, kpse_tex_format, true);
      if (img_name(img) == 0)
          pdftex_fail("cannot find image file");
!     if ((image_suffix = strrchr(cur_file_name, '.')) == 0)
!         pdftex_fail("cannot find image file name extension");
!     if (strcasecmp(image_suffix, ".pdf") == 0) {
!         img_type(img) = IMAGE_TYPE_PDF;
          pdf_ptr(img) = xtalloc(1, pdf_image_struct);
          pdf_ptr(img)->page_box = pdflastpdfboxspec;
          pdf_ptr(img)->always_use_pdfpagebox = pdfoptionalwaysusepdfpagebox;
--- 243,254 ----
      img_name(img) = kpse_find_file(cur_file_name, kpse_tex_format, true);
      if (img_name(img) == 0)
          pdftex_fail("cannot find image file");
!     /* type checks */
!     checktypebyheader(img);
!     checktypebyextension(img);
!     /* read image */
!     switch (img_type(img)) {
!     case IMAGE_TYPE_PDF:
          pdf_ptr(img) = xtalloc(1, pdf_image_struct);
          pdf_ptr(img)->page_box = pdflastpdfboxspec;
          pdf_ptr(img)->always_use_pdfpagebox = pdfoptionalwaysusepdfpagebox;
***************
*** 147,167 ****
          pdf_ptr(img)->orig_y = bp2int(epdf_orig_y);
          pdf_ptr(img)->selected_page = page_num;
          pdf_ptr(img)->doc = epdf_doc;
!     }
!     else if (strcasecmp(image_suffix, ".png") == 0) {
!         img_type(img) = IMAGE_TYPE_PNG;
          img_pages(img) = 1;
          read_png_info(img);
!     }
!     else if (strcasecmp(image_suffix, ".jpg") == 0 ||
!              strcasecmp(image_suffix, ".jpeg") == 0) {
          jpg_ptr(img) = xtalloc(1, JPG_IMAGE_INFO);
-         img_type(img) = IMAGE_TYPE_JPG;
          img_pages(img) = 1;
          read_jpg_info(img);
!     }
!     else 
          pdftex_fail("unknown type of image");
      xfree(dest);
      cur_file_name = 0;
      return img;
--- 260,278 ----
          pdf_ptr(img)->orig_y = bp2int(epdf_orig_y);
          pdf_ptr(img)->selected_page = page_num;
          pdf_ptr(img)->doc = epdf_doc;
!         break;
!     case IMAGE_TYPE_PNG:
          img_pages(img) = 1;
          read_png_info(img);
!         break;
!     case IMAGE_TYPE_JPG:
          jpg_ptr(img) = xtalloc(1, JPG_IMAGE_INFO);
          img_pages(img) = 1;
          read_jpg_info(img);
!         break;
!     default:
          pdftex_fail("unknown type of image");
+     }
      xfree(dest);
      cur_file_name = 0;
      return img;
%%% cut %%% writeimg.c.diff %%% cut %%%

Yours sincerely
  Heiko <oberdiek at uni-freiburg.de>
-- 


More information about the pdftex mailing list