Texinfo/Info/HTML version of 0.999

Wed, 24 Jan 1996 09:31:29 +0100

>     automatic conversion script, but getting it to produce good quality
>     would probably have been more work than doing a hand-conversion once.

> But we're going to have to do the conversion more than once.
> Whatever. I'll try to come up with something, I guess.

Well, I had another try.  I did it in elisp, which seemed to be a more
suitable choice for producing Texinfo.  Besides, it might also be more
platform-independent than sed/awk/grep.  The following script should
do the conversion of everything except for the header and trailer. 
It's still a little rough, though.  It still misses some commentary 
and doc strings.  And there may be some areas that need improving.

Cheers, Ulrik.

P.S.  It should be clear that this is not a general LaTeX to Texinfo
converter despite the name `ltx2texi' to fit into 8+3 file systems.

;;; ltx2texi.el --- convert LaTeX to Texinfo

;; Copyright (C) 1996

;; Author: Ulrik Vieth <vieth@thphy.uni-duesseldorf.de>
;; Keywords: 
;; Version:

;;; This file is *not* part of GNU Emacs.

;;; Commentary:

;;; Code:

(defvar ltx2texi-source-file "tds.tex")

(defvar ltx2texi-target-file "tds.texi")

(defvar ltx2texi-logos-alist
  '(("\\TeX{}"    . "TeX")	; no need for "@TeX{}" in Info or HTML
    ("\\MF{}"     . "METAFONT")
    ("\\MP{}"     . "MetaPost")
    ("\\BibTeX{}" . "BibTeX")
    ("{\\TeX}"    . "TeX")
    ("{\\LaTeX}"  . "LaTeX")
    ("{\\LaTeXe}" . "LaTeX2e")
    ("{\\AMSTeX}" . "AMS-TeX")
    ("{\\AmS}"    . "AMS")
    ("{\\iniTeX}" . "INITEX")
    ("{\\iniMF}"  . "INIMF")
    ("{\\iniMP}"  . "INIMP")
    ("{\\PS}"     . "PostScript")
    ("{\\copyright}" . "@copyright{}")
    ))

;;; utility functions:

(defun ltx2texi-string-replace (x-string x-replace)
  "Searches for occurences of X-STRING, replacing them by X-REPLACE."
  (save-excursion
    (while (search-forward x-string nil t)
      (replace-match x-replace t t)))) 		; use fixed case!

(defun ltx2texi-regexp-replace (x-regexp x-replace)
  "Searches for occurences of X-REGEXP, replacing them by X-REPLACE."
  (save-excursion
    (while (re-search-forward x-regexp nil t)
      (replace-match x-replace nil nil))))

(defun ltx2texi-alist-replace (x-regexp x-alist)
  "Searches for ocurrences of X-REGEXP, replacing them using X-ALIST.
If no match is found in X-ALIST, leaves the original text unchanged."
  (save-excursion
    (let (x-match 
	  x-replace)
      (while (re-search-forward x-regexp nil t)
	(setq x-match (match-string 1))
	(setq x-replace
	      (or (cdr (assoc x-match x-alist)) x-match))
	(replace-match x-replace t t)))))	; use fixed case!

;;;

(defun ltx2texi-convert ()

  (interactive)
  (let (target-buffer)

;    (setq target-buffer (get-buffer-create ltx2texi-target-file))
;    (set-buffer target-buffer)
;    (erase-buffer)
;    (insert-file-contents-literally ltx2texi-source-file)

    (untabify (point-min) (point-max))
    (goto-char (point-min))

    ;; literal `@' -- must come before anything else!
    (ltx2texi-regexp-replace "\\([^\\\\]\\)@" "\\1@@")

    ;; fancy spacing -- should come early before there are too many `@'
    (ltx2texi-regexp-replace "\\\\@\\([.?!]\\)" "@\\1")
    (ltx2texi-regexp-replace "\\.\\\\\\([ \n]+\\)" ".@:\\1")
    (ltx2texi-regexp-replace "\\\\\\([ \n]+\\)" "\\1")

    (ltx2texi-regexp-replace "\\\\,\\(dpi\\|pt\\)" "@dmn{\\1}")

    ;; special TeX characters that needn't be quoted in Texinfo:
    (ltx2texi-string-replace "\\_"  "_")
    (ltx2texi-string-replace "\\&"  "&")
    (ltx2texi-string-replace "\\%"  "%")
    (ltx2texi-string-replace "\\pm" "+-")

    (ltx2texi-string-replace "$" "")	; avoid using @math{...}
    (ltx2texi-string-replace "~" " ")	; maybe avoid using @w{...}

    (ltx2texi-string-replace "\\slash " "/")

    ;; fancy TeX logos -- these are used in arguments of sections
    (ltx2texi-alist-replace "\\(\\\\[A-Za-z]+{}\\)" ltx2texi-logos-alist)
    (ltx2texi-alist-replace "\\({\\\\[^}]+}\\)" ltx2texi-logos-alist)

    ;; acronyms -- these are also used in arguments of sections
    ;; there's no need to use @sc markup, just upcase them.
    (save-excursion
      (while (re-search-forward "\\\\abbr{\\([^}]+\\)}" nil t)
	(replace-match (upcase (match-string 1)) nil t)))

    ;; applications -- since they are now shown in the default font
    ;; there's no need to use @r markup either.
    (ltx2texi-regexp-replace "\\\\application{\\([^}]+\\)}" "\\1")

    ;; sectioning commands:
    ;;

    ;; first do @chapter and @appendix by narrowing
    (save-excursion
      (save-restriction
	(narrow-to-region 
	 (point-min) (search-forward "\\appendix" nil t))
	(goto-char (point-min))
	(ltx2texi-regexp-replace 
	 "\\\\section{\\([^}]+\\)}[ ]*" "@node \\1\n@chapter \\1\n")
	))
    (save-excursion
      (save-restriction
	(narrow-to-region 
	 (search-forward "\\appendix" nil t) (point-max))
	(ltx2texi-regexp-replace 
	 "\\\\section{\\([^}]+\\)}[ ]*" "@node \\1\n@appendix \\1\n")
	))

    ;; @section and @subsection are easy now
    (ltx2texi-regexp-replace 
     "\\\\subsection{\\([^}]+\\)}[ ]*"    "@node \\1\n@section \\1\n")
    (ltx2texi-regexp-replace 
     "\\\\subsubsection{\\([^}]+\\)}[ ]*" "@node \\1\n@subsection \\1\n")

    ;; now we no longer need \apendix as a marker
    (ltx2texi-regexp-replace "\\\\appendix[ ]*\n" "")
    (ltx2texi-regexp-replace "%?\\\\newpage[ ]*\n" "")

    ;; \labels are redundant since we have @nodes
    (ltx2texi-regexp-replace "\\\\label{sec:\\([^}]+\\)}[ ]*\n" "")
    (ltx2texi-regexp-replace "\\\\ref{sec:\\([^}]+\\)}" "@ref{\\1}")

    ;; this might be redundant in Info as well -- not quite sure! 
    ;; (ltx2texi-regexp-replace "\\(Appendix\\|Section\\)[~ ]" "")

    ;; now we can do the remaining instances of literal `~' 
    ;; unfortunately this will get lost in the HTML conversion
    ;; because &nbsp; or &#160; are not yet standard HTML tags
    (ltx2texi-regexp-replace 
     "\\([A-Za-z]+\\)~\\([A-Za-z]+\\)" "@w{\\1 \\2}")

    ;; various tag markup:
    ;; 

    ;; \emphasis:
    (ltx2texi-string-replace "\\emphasis" "@emph")

    ;; \citetitle -- no need to match and replace the argument,
    ;; otherwise we might run into trouble with @w{Volume E}
    (ltx2texi-string-replace "\\citetitle" "@cite")

    ;; special tags:
    (ltx2texi-string-replace "\\texmf{}" "@file{texmf}")
    (ltx2texi-string-replace "\\CTAN:" "@file{@var{CTAN}:}")

    ;; \\systemitem -- a silly tag, used exactly once!
    (ltx2texi-regexp-replace
     "\\\\systemitem{\\([^}]+\\)}{\\([^}]+\\)}" "@file{\\2}")

    ;; \path -- here we can't avoid shuffling the argument
    (ltx2texi-regexp-replace "\\\\path|\\([^|]+\\)|" "@file{\\1}") 

    ;; \literal -- we simply use the same markup as \path
    ;; since the distinction isn't very clear
    (ltx2texi-string-replace "\\literal" "@file")

    ;; \replaceable -- within ttdisplay we can simply use @var
    (save-excursion
      (while (search-forward "\\begin{ttdisplay}" nil t)
	(save-restriction
	  (narrow-to-region
	   (point) (search-forward "\\end{ttdisplay}" nil t))
	  (goto-char (point-min))
	  (ltx2texi-string-replace "\\replaceable" "@var")
	  )))

    ;; ... but otherwise we have to use @file{@var{...}}
    ;; to get the quotation marks consistent
    (ltx2texi-regexp-replace 
     "\\\\replaceable{\\([^}]+\\)}" "@file{@var{\\1}}")

    ;; eliminate redundant quotation marks:
    (ltx2texi-regexp-replace "``\\(@file{[^}]+}\\)''" "\\1")
    (ltx2texi-regexp-replace  "`\\(@file{[^}]+}\\)'"  "\\1")

    ;; combine multiple @file{}s in one line:
    (ltx2texi-regexp-replace 
     "@file{\\(.*\\)}@file{\\(.*\\)}@file{\\(.*\\)}" "@file{\\1\\2\\3}")
    (ltx2texi-regexp-replace 
     "@file{\\(.*\\)}@file{\\(.*\\)}" "@file{\\1\\2}")

    ;; environments:
    ;;

    ;; ttdisplay and tdsSummary -- @example is good for both
    (ltx2texi-string-replace "\\begin{ttdisplay}"  "\n@example")
    (ltx2texi-string-replace "\\end{ttdisplay}"    "@end example\n")

    (ltx2texi-string-replace "\\begin{tdsSummary}" "\n@example")
    (ltx2texi-string-replace "\\end{tdsSummary}"   "@end example")

    ;; enumerate:
    (ltx2texi-string-replace "\\begin{enumerate}"  "@enumerate")
    (ltx2texi-string-replace "\\end{enumerate}"    "@end @enumerate")

    ;; itemize -- always use @itemize @bullet, since it's difficult
    ;; to determine when to use @itemize @minus for inner levels
    (ltx2texi-regexp-replace 
     "\\\\begin{\\(description\\|itemize\\)}"         "@itemize @bullet")
    (ltx2texi-regexp-replace 
     "\\\\end{\\(description\\|itemize\\)}"           "@end @itemize")

    ;; in case of itemize-squueze add extra newlines
    (ltx2texi-regexp-replace 
     "\\\\begin{\\(description\\|itemize\\)-squeeze}" "\n@itemize @bullet")
    (ltx2texi-regexp-replace 
     "\\\\end{\\(description\\|itemize\\)-squeeze}"   "@end @itemize")

    ;; description items -- add extra newlines where appropriate
    (ltx2texi-regexp-replace
     "\\\\item\\[\\([^]]+\\)\\][ ]*\n" "@item \\1\n")
    (ltx2texi-regexp-replace
     "\\\\item\\[\\([^]]+\\)\\][ ]*"   "@item \\1\n")

    ;; normal items -- nothing special
    (ltx2texi-string-replace "\\item"  "@item")

    ;; replace @file by @samp after @item and add extra newlines
    (ltx2texi-string-replace
     "@item @file" "@item @samp")
    (ltx2texi-regexp-replace 
     "@item @samp\\([^,\n]*\\),[ ]*" "@item @samp\\1,\n")

    ))