ltx2texi.el - improved version

Ulrik Vieth vieth@thphy.uni-duesseldorf.de


Hi Karl, 

here's an improved version of my ltx2texi.el.  I guess it isn't
necessary to bother everybody on twg-tds about it this time.  

BTW, it would be nice if some of the whitespace before and after
environments could be somewhat normalized in the LaTeX version.
It doesn't really matter there, but it sometimes does in the Info
file generated from it, and it's too painful to try to correct
that in the conversion process.

Cheers, Ulrik.

P.S. I'll probably leave the file as it is for some time now.
Feel free to improve it as you see fit.  BTW, this is only the
second time that I'm doing some programming in elisp besides
hacking my .emacs file and it took no more than two evenings.
Still, I think it was necessary to do a hand-conversion first
to gain some experience what is necessary in the automatic
conversion.


;;; ltx2texi.el --- convert LaTeX to Texinfo

;; Copyright (C) 1996 TeX Users Group

;; Author: Ulrik Vieth <vieth@thphy.uni-duesseldorf.de>
;; Keywords: latex, texinfo, converter
;; Version: 0.3

;;; This file is *not* part of GNU Emacs.

;;; Commentary:

;; Description:
;;
;; This file provides a limited LaTeX-to-Texinfo conversion function
;; `ltx2texi-convert' that is primarily intended to convert the LaTeX
;; source of the TDS draft document `tds.tex' which uses a couple of
;; special markup tags defined in the doucment class `tdsguide.cls'.
;; It is definitely *not* suitable to be used as a general-purpose
;; LaTeX-to-TeXinfo converter and is not intended to be used as such.

;; Usage:
;;
;; M-x ltx2texi-convert

;; Bugs:
;;
;; * the last comma in the list of contributors should be a period
;; * the text of the copyright notice should be copied into @ifinfo
;; * whitespace before and after environments should be distributed
;;   more equally (in the LaTeX version!)

;; History:
;;
;; v 0.0 -- 1996/01/23  UV  created
;; v 0.1 -- 1996/01/24  UV  first rough version, posted to twg-tds
;; v 0.2 -- 1996/01/24  UV  added some commentary and doc strings
;; v 0.3 -- 1996/01/25  UV  modularized code, handle header and trailer,
;;                          call texinfo routines for @nodes and @menus


;;; Code:

(require 'texinfo)

(defvar ltx2texi-source-file "tds.tex"
  "File name of TDS LaTeX source to be converted.")

(defvar ltx2texi-target-file "tds.texi"
  "File name of TDS Texinfo source to be created.")

(defvar ltx2texi-filename "tds.info"
  "File name of Info file to be inserted in Texinfo header.")


(defvar ltx2texi-logos-alist
  '(("\\TeX{}"       . "TeX")		; no need to use "@TeX{}"
    ("{\\TeX}"       . "TeX")		; when only doing Info
    ("{\\LaTeX}"     . "LaTeX")
    ("{\\LaTeXe}"    . "LaTeX2e")
    ("{\\AmS}"       . "AMS")
    ("{\\AMSTeX}"    . "AMS-TeX")
    ("\\MF{}"        . "METAFONT")
    ("\\MP{}"        . "MetaPost")
    ("\\BibTeX{}"    . "BibTeX")
    ("{\\iniTeX}"    . "INITEX")
    ("{\\iniMF}"     . "INIMF")
    ("{\\iniMP}"     . "INIMP")
    ("{\\PS}"        . "PostScript")
    ("{\\copyright}" . "@copyright{}")
    )
  "List of TeX logos and their replacement text after conversion.")

(defvar ltx2texi-logos-regexp-1 "\\(\\\\[A-Za-z]+{}\\)"
  "Regexp for TeX logos to be conveted using `ltx2texi-logos-alist'.")

(defvar ltx2texi-logos-regexp-2 "\\({\\\\[^}]+}\\)"
  "Regexp for TeX logos to be conveted using `ltx2texi-logos-alist'.")


(defvar ltx2texi-tags-alist
  '(("\\emphasis"    . "@emph")
    ("\\citetitle"   . "@cite")
    ("\\literal"     . "@file")
    ("\\replaceable" . "@var")
    ("\\command"     . "@code")			; defined, but not used
    ;; ("\\application" . "@r")
    ;; ("\\abbr"        . "@sc")
    )
  "List of markup tags and their replacement text after conversion.")

(defvar ltx2texi-tags-regexp "\\(\\\\[a-z]+\\)"
  "Regexp for markup tags to be converted using `ltx2texi-tags-alist'.")


(defvar ltx2texi-env-alist
  '(("\\begin{ttdisplay}"           . "@example")
    ("\\end{ttdisplay}"             . "@end example")
    ("\\begin{tdsSummary}"          . "@example")
    ("\\end{tdsSummary}"            . "@end example")
    ("\\begin{enumerate}"           . "@enumerate")
    ("\\end{enumerate}"             . "@end enumerate")
    ("\\begin{enumerate-squeeze}"   . "@enumerate")
    ("\\end{enumerate-squeeze}"     . "@end enumerate")
    ("\\begin{itemize}"             . "@itemize @bullet")
    ("\\end{itemize}"               . "@end itemize")
    ("\\begin{itemize-squeeze}"     . "@itemize @bullet")
    ("\\end{itemize-squeeze}"       . "@end itemize")
    ("\\begin{description}"         . "@table @samp")
    ("\\end{description}"           . "@end table")
    ("\\begin{description-squeeze}" . "@table @samp")
    ("\\end{description-squeeze}"   . "@end table")
    ("\\begin{legalnotice}"         . "@titlepage")
    ("\\end{legalnotice}"           . "@end titlepage")
    ("\\begin{tabbing}"             . "@quotation")
    ("\\end{tabbing}"               . "@end quotation")
    )
  "List of environments and their replacement text after conversion.")

(defvar ltx2texi-env-regexp "\\(\\\\\\(begin\\|end\\){[^}]+}\\)"
  "Regexp for environments to be converted using `ltx2texi-env-alist'.")

;;;

(defun ltx2texi-string-replace (x-string x-replace)
  "Searches for occurences of X-STRING, replacing them by X-REPLACE."
  (save-excursion
    (while (search-forward x-string nil t)
      (replace-match x-replace t t)))) 		; use fixed case!

(defun ltx2texi-regexp-replace (x-regexp x-replace)
  "Searches for occurences of X-REGEXP, replacing them by X-REPLACE."
  (save-excursion
    (while (re-search-forward x-regexp nil t)
      (replace-match x-replace t nil)))) 	; use fixed case!

(defun ltx2texi-alist-replace (x-regexp x-alist)
  "Searches for ocurrences of X-REGEXP, replacing them using X-ALIST.
If no match is found in X-ALIST, leaves the original text unchanged."
  (save-excursion
    (let (x-match 
	  x-replace)
      (while (re-search-forward x-regexp nil t)
	(setq x-match (match-string 1))
	(setq x-replace (or (cdr (assoc x-match x-alist)) x-match))
	(replace-match x-replace t t)))))	; use fixed case!

;;;

(defun ltx2texi-convert ()
  "Have a try at converting LaTeX to TeXinfo.  Good luck!"
  (interactive)
  (let (target-buffer)    
;    (setq target-buffer (get-buffer-create ltx2texi-target-file))
;    (set-buffer target-buffer)
;    (erase-buffer)
;    (insert-file-contents-literally ltx2texi-source-file)

    ;; tab characters can mess up tds-summary envrionments,
    ;; so get rid of them as soon as possible
    (untabify (point-min) (point-max))
    (goto-char (point-min))

    (ltx2texi-do-simple-tags)
    (ltx2texi-do-fancy-logos)
    (ltx2texi-do-sectioning)
    (ltx2texi-do-markup-tags)
    (ltx2texi-do-environments)

    (ltx2texi-do-header)
    (ltx2texi-do-trailer)

    (texinfo-every-node-update)
    (texinfo-all-menus-update)
    (texinfo-master-menu nil)
    ))


(defun ltx2texi-do-simple-tags ()
  "First step of \\[ltx2texi-convert].  Not useable by itself."

  ;; literal `@' -- should come before anything else, since it's
  ;; the Texinfo control character.
  (ltx2texi-regexp-replace "\\([^\\\\]\\)@" "\\1@@")
  
  ;; fancy spacing -- should come early before we have many `@'
  
  ;; "\@" -- space factor corrections before sentence end `.'
  (ltx2texi-regexp-replace "\\\\@\\." "@.")
  ;; "\ " -- control space after `.' in the middle of sentences
  (ltx2texi-regexp-replace "\\.\\\\\\([ \n]+\\)" ".@:\\1")
  ;; "\ " -- control space used otherwise
  (ltx2texi-regexp-replace "\\\\\\([ \n]+\\)" "\\1")

  ;; "\," -- thin space used with dimensions like "dpi" or "pt"
  (ltx2texi-regexp-replace "\\\\,\\([a-z]+\\)" "@dmn{\\1}")

  ;; special TeX characters that needn't be quoted in Texinfo:
  (ltx2texi-string-replace "\\_" "_")
  (ltx2texi-string-replace "\\&" "&")
  (ltx2texi-string-replace "\\%" "%")

  ;; special TeX characters that we prefer to transliterate:
  (ltx2texi-regexp-replace "\\\\slash[ \t\n]*" "/")

  ;; we could translate $...$ into @math{...}, but why bother
  ;; when we can transliterate it easily?
  (ltx2texi-string-replace "$" "")
  (ltx2texi-string-replace "\\pm" "+-")

  ;; we could use @w{word1 word2} (this is handled elsewhere),
  ;; but why bother when it'll get lost in texi2html anyway?
  (ltx2texi-string-replace "~" " ")
  )

(defun ltx2texi-do-fancy-logos ()
  "Second step of \\[ltx2texi-convert].  Not useable by itself."

  ;; fancy TeX logos -- these are used in arguments of sections,
  ;; so we have to do them early before doing sectioning commands.
  (ltx2texi-alist-replace ltx2texi-logos-regexp-1 ltx2texi-logos-alist)
  (ltx2texi-alist-replace ltx2texi-logos-regexp-2 ltx2texi-logos-alist)
  
  ;; acronyms -- these are also used in arguments of sections.
  ;; There's no need to use @sc markup, just upcase the argument.
  (save-excursion
    (while (re-search-forward "\\\\abbr{\\([^}]+\\)}" nil t)
      (replace-match (upcase (match-string 1)) nil t)))
  
  ;; applications -- similar to acronyms, so done here as well.
  ;; There's no need to use @r markup either, it's the default!
  (ltx2texi-regexp-replace "\\\\application{\\([^}]+\\)}" "\\1")
  )

(defun ltx2texi-do-sectioning ()
  "Third step of \\[ltx2texi-convert].  Not useable by itself."

  ;; first do @chapter and @appendix by narrowing
  (save-excursion
    (save-restriction
      (narrow-to-region 
       (point-min) (search-forward "\\appendix" nil t))
      (goto-char (point-min))
      (ltx2texi-regexp-replace 
       "\\\\section{\\([^}]+\\)}[ ]*" "@node \\1\n@chapter \\1\n")
      ))
  (save-excursion
    (save-restriction
      (narrow-to-region 
       (search-forward "\\appendix" nil t) (point-max))
      (ltx2texi-regexp-replace 
       "\\\\section{\\([^}]+\\)}[ ]*" "@node \\1\n@appendix \\1\n")
      ))
  
  ;; @section and @subsection are just shifted a level up
  (ltx2texi-regexp-replace 
   "\\\\subsection{\\([^}]+\\)}[ ]*"    "@node \\1\n@section \\1\n")
  (ltx2texi-regexp-replace 
   "\\\\subsubsection{\\([^}]+\\)}[ ]*" "@node \\1\n@subsection \\1\n")
  
  ;; now we no longer need \apendix as a marker
  (ltx2texi-regexp-replace "\\\\appendix[ ]*\n" "")

  ;; \newpage can go as well
  (ltx2texi-regexp-replace "%?\\\\newpage[ ]*\n" "")
  
  ;; \labels are redundant since we have @nodes
  (ltx2texi-regexp-replace "\\\\label{sec:\\([^}]+\\)}[ ]*\n" "")

  ;; \refs now refer to @nodes instead of \labels
  (ltx2texi-regexp-replace "\\\\ref{sec:\\([^}]+\\)}" "@ref{\\1}")

  ;; this might be redundant in Info as well -- not quite sure! 
  ;; (ltx2texi-regexp-replace "\\(Appendix\\|Section\\)[~ ]" "")
  )

(defun ltx2texi-do-markup-tags ()
  "Fourth step of \\[ltx2texi-convert].  Not usable by itself."

  ;; special tags -- for \CTAN, we must used fixed-case replace!
  (ltx2texi-string-replace "\\texmf{}" "@file{texmf}")
  (ltx2texi-string-replace "\\CTAN:"   "@file{@var{CTAN}:}")
  
  ;; convert simple tags without expanding their arguments:
  ;; \emphasis, \citetitle, \literal, \replaceable
  (ltx2texi-alist-replace ltx2texi-tags-regexp ltx2texi-tags-alist)
  
  ;; \\systemitem -- a silly tag with an extra argument that
  ;; isn't printed.  It is used exactly once!
  (ltx2texi-regexp-replace
   "\\\\systemitem{\\([^}]+\\)}{\\([^}]+\\)}" "@file{\\2}")
  
  ;; \path -- here we can't avoid shuffling the argument
  (ltx2texi-regexp-replace "\\\\path|\\([^|]+\\)|" "@file{\\1}") 
  
  ;; After turning \replaceable into @var above we now have to
  ;; turn @var{...} into @file{@var{...}} to get quotation marks
  ;; around file names consistent.  (Read: those extra quotation
  ;; marks inserted automatically by makeinfo in the @file tag.)

  ;; For simplicity we first do the change everywhere and then
  ;; undo it again inside ttdisplay environments, where we can
  ;; leave @var by itself as @file isn't used there anyway.

  (ltx2texi-regexp-replace "@var{\\([^}]+\\)}" "@file{@var{\\1}}")
  (save-excursion
    (while (search-forward "\\begin{ttdisplay}" nil t)			     		   
      (save-restriction
	(narrow-to-region
	 (point) (search-forward "\\end{ttdisplay}" nil t))
	(goto-char (point-min))
	(ltx2texi-regexp-replace "@file{@var{\\([^}]+\\)}}" "@var{\\1}")
	)))
  
  ;; eliminate redundant quotation marks around @file
  (ltx2texi-regexp-replace "``\\(@file{[^}]+}\\)''" "\\1")
  (ltx2texi-regexp-replace "`\\(@file{[^}]+}\\)'" "\\1")
  
  ;; ... and combine multiple @file{}s in one line:
  (ltx2texi-regexp-replace 
   "@file{\\(.*\\)}@file{\\(.*\\)}@file{\\(.*\\)}" "@file{\\1\\2\\3}")
  (ltx2texi-regexp-replace 
   "@file{\\(.*\\)}@file{\\(.*\\)}" "@file{\\1\\2}")

  ;; literal `~' -- if it hasn't been converted to space earlier,
  ;; we can now do the conversion to @w{word1 word2} without
  ;; running the risk of confusion the regexp matcher somewhere.
  ;; Unfortunately @w will get lost again in the HTML conversion
  ;; because &nbsp; or &#160; are not yet standard HTML tags.
  (ltx2texi-regexp-replace 
   "\\([A-Za-z]+\\)~\\([A-Za-z]+\\)" "@w{\\1 \\2}")
  )

(defun ltx2texi-do-environments ()
  "Fifth step of \\[ltx2texi-convert].  Not useable by itself."
  
  ;; convert \begin and \end of environments
  (ltx2texi-alist-replace ltx2texi-env-regexp ltx2texi-env-alist)

  ;; convert \items
  (ltx2texi-string-replace "\\item"  "@item")

  ;; insert newlines after description items where appropriate
  (ltx2texi-regexp-replace
   "@item\\[\\([^]]+\\)\\][ ]*\n" "@item \\1\n")
  (ltx2texi-regexp-replace
   "@item\\[\\([^]]+\\)\\][ ]*"   "@item \\1\n")

  ;; insert newlines after @item @file{...},
  (ltx2texi-regexp-replace 
   "@item @file\\([^,\n]*\\),[ ]*" "@item @file\\1,\n")
  )

(defun ltx2texi-do-header ()
  "Convert LaTeX header to Texinfo.  Used in \\[ltx2texi-convert]."
  (let (title-string
	author-string
	version-string)

    ;; collect information
    (save-excursion
      (re-search-forward "\\\\title{\\(.*\\)}" nil t)
      (setq title-string (match-string 1))
      (re-search-forward "\\\\author{\\(.*\\)}" nil t)
      (setq author-string (match-string 1))
      )
    (save-excursion
      (re-search-forward "\\\\tdsVersion{\\(.*\\)}" nil t)
      (setq version-string (match-string 1))
      )

    ;; discard information lines
    (ltx2texi-regexp-replace "\\\\title{.*}[ ]*\n"       "")
    (ltx2texi-regexp-replace "\\\\author{.*}[ ]*\n"      "")
    (ltx2texi-regexp-replace "\\\\tdsVersion{.*}[ ]*\n"  "")
    (ltx2texi-regexp-replace "\\\\maketitle[ ]*\n"       "")
    
    ;; discard pre-title lines
    (ltx2texi-regexp-replace "%&latex[ ]*\n"             "")
    (ltx2texi-regexp-replace "\\\\NeedsTeXFormat.*\n"    "")
    
    ;; convert "\documentclass" to "\input texinfo"
    (ltx2texi-regexp-replace "\\\\documentclass.*\n"  "\\\\input texinfo\n")

    ;; insert Texinfo header lines
    (save-excursion
      (goto-char (search-forward "texinfo\n" nil t))
      (insert "@setfilename " ltx2texi-filename "\n")
      (insert "@settitle " title-string "\n\n")
      (insert "@set version " version-string "\n\n")
      )
 
    ;; discard "\begin{document}"
    (ltx2texi-regexp-replace "\\\\begin{document}[ ]*\n" "")
    (ltx2texi-regexp-replace "\\\\tableofcontents[ ]*\n" "")

    ;; insert stuff for title page -- note that @titlepage
    ;; used to be \begin{legalnotice}
    (save-excursion
      (goto-char (search-forward "@titlepage\n" nil t))
      (insert "@title " title-string "\n")
      (insert "@subtitle Version \\value{version}\n")
      (insert "@author " author-string "\n\n")
      (insert "@page\n@vskip 0pt plus 1filll\n")
      )

    ;; insert stuff for @node Top and master menu
    (save-excursion
      (goto-char (search-forward "@end titlepage\n" nil t))
      (insert "\n@ifinfo\n")
      (insert "@node Top\n@top " title-string "\n\n")
      (insert "@menu\n@end menu\n")
      (insert "@end ifinfo\n\n")
      )
    ))

(defun ltx2texi-do-trailer ()
  "Convert LaTeX trailer to Texinfo.  Used in \\[ltx2texi-convert]."
  
  ;; list of contributors --  we have to find something simpler
  ;; in Info and HTML to replace the tabbing environment

  (ltx2texi-regexp-replace "\\\\hspace.*\n" "")	; discard
  ;; "\>" in tabbing environment
  (ltx2texi-regexp-replace "[ ]*\\\\>[ ]*" ", ")
  ;; "\\" in tabbing environment, already converted to "\" 
  ;; during the earlier conversion of "\ " (including newline)
  (ltx2texi-regexp-replace "[ ]*\\\\[ ]*" ", ")

  ;; convert "\end{document}"
  (ltx2texi-string-replace "\\end{document}" "@contents\n@bye\n")
  )