;;; html2mml.l -- translate HyperText Markup Language to Maker Markup Language.
;;; $Id: html2mml.l,v 1.1 92/08/19 18:37:59 connolly Exp $
;;;
;;; USE
;;; sgmls file.html | xlisp html2mml.l >file.mml
;;;
;;; Where xlisp is Tom Almy's improved release of David Betz's XLISP 2.1,
;;; available in export.lcs.mit.edu:/contrib/winterp/xlisp/xlisp-2.1.almy.tar.Z
;;; and sgmls is built from
;;; ifi.uio.no:/pub/SGML/SGMLS/sgmls-0.8.tar
;;; aka
;;; ftp.uu.net:/pub/text-processing/sgml/sgmls-0.8.tar.Z
;;;
;;; The resulting file will have the OS Banner from XLisp at the
;;; top. For some reason, XLisp writes everything to stdout.
;;; I patched it to write diagnostic output to stderr. I'll have
;;; to get the patches incorporated soon.
;;;
;;; Anyway, just edit the banner out so the first line of the file is
;;;
;;;
;;; Then import the mml file to FrameMaker.
;;;
(setq *tracenable* t)
(setq *breakenable* t)
(princ "\n")
(setq *para-tags*
'(title h1 h2 h3 h4 h5 document ol ul dl menu dir address xmp listing))
(setq *literal-tags* '(xmp listing))
(setq *style-sheet* "
>
>
>
>
>
>
>
>
\t\">
>
>
>
>
>
>
>
>
>
>
")
(princ *style-sheet*)
;; From almy2.1
;; push and pop treat variable v as a stack
(defmacro push (v l)
`(setf ,l (cons ,v ,l)))
(defmacro pop (l)
`(prog1 (first ,l) (setf ,l (rest ,l))))
(defun start-para (stream tag)
(format stream "<~A>~%" tag)
)
(defun end-para (stream)
(format stream "~%~%")
)
(defun convert-data (literal)
(do ((c (read-char) (read-char))
d1 d2 d3
)
((eq c #\Newline) nil)
(cond ((eq c #\\)
(cond ((setf d1 (digit-char-p (setf c (read-char))))
(setf d2 (digit-char-p (read-char)))
(setf d3 (digit-char-p (read-char)))
(princ (int-char (+ d3 (* 8 (+ d2 (* 8 d1))))))
)
((eq c #\\) (princ "\\\\"))
((eq c #\n) (format t (if literal "" " ")))
((eq c #\|) ;;nothing
)
((eq c #\s) (princ " "))
) )
((member c '(#\< #\>)) (format t "\\~A" c))
((eq c #\space) (format t (if literal "" " ")))
((eql c 7) (format t ""))
(t (princ c))
) ) )
(defun html2mml ()
(do ((c (read-char) (read-char))
stack
tag
attrs
)
((null c)) ;; quit at end of file
(case c
(#\Newline ;; do nothing
)
(#\( (let ((gi (read))
)
;; open tag
(push gi stack)
(cond ((member gi *para-tags*)
(setq tag gi)
(start-para t tag)
)
((eq gi 'a)
(let ((href (second (assoc 'href attrs)))
)
;; watch out for >'s and 's
(format t " >" href)
)
)
)
(setq attrs nil)
))
(#\) (let ((gi (read))
)
(pop stack)
(cond ((member gi *para-tags*)
(setq tag nil))
((eq gi 'a) (format t ""))
((eq gi 'dd) (format t ""))
((member gi '(p dt li)) (format t ""))
)
))
(#\-
(unless tag
(end-para t)
(dolist (gi stack)
(when (member gi *para-tags*)
(setq tag gi)
(return)
) )
(start-para t tag)
)
(convert-data (member tag *literal-tags*))
)
(#\& (let ((name (read))
)
;; name
))
(#\? (let ((pi (read-line))
)
;; processing instruction
))
(#\A (let ((name (read))
(token (read))
)
(case token
(IMPLIED ;; nothing
)
(CDATA (let ((data (read-line))
)
(push (list name data) attrs)
))
(TOKEN (let ((tokens (read-line)) ;;@@ read tokens til \n
)
;; tokens
))
(NOTATION (let ((name (read))
)
;; notation
))
(ENTITY (let ((name (read))
)
;; general entity
))
(ID (let ((id (read))
)
;; id
))
(IDREF (let ((ids (read-line)) ;; @@ read ids til \n
)
;; id's
))
) ) )
(#\D (read-line) ;; do like A but for external data name
)
) )
)
(html2mml)