;; wiki.wiki -- simple scheme wiki parser
;;
;; Copyright (c) 2003 by Alex Shinn
;; BSD-style license. See the file COPYING for details.
(define-module wiki.wiki
(use srfi-13)
(export wiki-parse wiki-word-encode))
(select-module wiki.wiki)
;; handle *very* simple stemming to avoid the annoying problems with
;; inflected variations of wiki-words (which should resolve to the
;; same page as wiki-word)
(define (wiki-word-normalize str)
(let ((s (string-downcase str)))
(rxmatch-case s
(#/(.*)ies$/ (#f x) x)
(#/(.*x)e[ns]$/ (#f x) x)
(#/(.*)s$/ (#f x) x)
(else s))))
(define (wiki-word-encode str)
(define wiki-unsafe-rx
(string->regexp "[\\\/._]|[^\\x01-\\x7e]"))
(regexp-replace-all
wiki-unsafe-rx (wiki-word-normalize str)
(lambda (m)
(format "_~4,'0X" (char->integer (string-ref (m 0) 0))))))
(define (wiki-parse . keys)
(let-keywords* keys ((port (current-input-port))
(root (sys-getcwd))
(exists? (lambda (f)
(file-exists?
(string-append root "/" f))))
(langs? #t))
(define (wiki-defined-word? str)
(file-exists? (wiki-word-encode str)))
;; regular expressions
(define wiki-list-rx
(string->regexp "^\\s*([*]+)"))
(define wiki-indent-rx
(string->regexp "^\\s+"))
(define wiki-word-rx
(string->regexp "[\\[][\\[]([^\\]]+)[\\]][\\]]|(^|\\s)((\\w:)*\\w+(?:-+\\w+)+)\\b"))
(define wiki-bold-rx
(string->regexp "(^|\\s)\\*([^*]+)\\*(\\W|$)"))
(define wiki-italic-rx
(string->regexp "(^|\\s)/([^/]+)/(\\W|$)"))
(define wiki-uline-rx
(string->regexp "(^|\\s)_([^_]+)_(\\W|$)"))
(define wiki-header-rx
(string->regexp "(^|\\s)=(=+)([^=]+)=+(\\W|$)"))
(define wiki-par-rx
(string->regexp "^\\s*$"))
(define wiki-hr-rx
(string->regexp "^\\s*----+\\s*$"))
(define wiki-url-rx
(string->regexp "(((http)|(ftp)|(news)):/+[\\-+.,_/%?&~=:\\w]+[\\-+_/%?&~=:\\w])"))
(define wiki-lang-rx
(string->regexp "^=([-_\\w]+)$"))
(define wiki-entity-rx
(string->regexp "[&<>]"))
(define (wiki-word-rx-get m)
(let ((first (m 1)))
(if (and first (not (equal? first "")))
(values "" first)
(values (m 2) (m 3)))))
(define (wiki-list-level line)
(let ((r (rxmatch wiki-list-rx line)))
(if r
(- (rxmatch-end r 1) (rxmatch-start r 1))
0)))
(define (wiki-trim-list line)
(regexp-replace wiki-list-rx line ""))
(define wiki-entities
(let ((h (make-hash-table 'equal?)))
(hash-table-put! h "&" "&")
(hash-table-put! h "<" "<")
(hash-table-put! h ">" ">")
h))
(define (wiki-encode-entity m)
(let ((ent (rxmatch-substring m)))
(hash-table-get wiki-entities ent ent)))
(define (wiki-parse-line line)
(cond
;; paragraphs
((rxmatch wiki-par-rx line)
(set! line "
"))
;; horizontal rules
((rxmatch wiki-hr-rx line)
(set! line "
"))
(else
;; word emphasis
(set! line (regexp-replace-all
wiki-header-rx line
(lambda (m)
(let* ((h (m 2))
(depth (max (string-length h) 6)))
(format "~A~A~A"
(m 1) depth (m 3) depth (m 4))))))
(for-each
(lambda (x)
(set! line (regexp-replace-all (car x) line (cdr x))))
`((,wiki-bold-rx . "\\1\\2\\3")
(,wiki-italic-rx . "\\1\\2\\3")
(,wiki-uline-rx . "\\1\\2\\3")
(,wiki-url-rx . "\\1")
))))
line)
(let ((res (list))
(buf (list ""))
(pre? #f)
(list-level 0))
(let loop ((line (read-line port)))
(if (eof-object? line)
;; input done, accumulate as single string
(reverse! (cons (apply string-append
(reverse! (cons "
" buf))) res))
(begin
;; first encode any html
(set! line (regexp-replace-all wiki-entity-rx line
wiki-encode-entity))
;; then handle list levels and indentation
(let* ((new-level (wiki-list-level line))
(indent? (and (zero? new-level)
(rxmatch wiki-indent-rx line))))
(set! line (wiki-trim-list line))
(when (> new-level 0)
(set! line (string-append "" line)))
(if (< new-level list-level)
(while (< new-level list-level)
(set! line (string-append "" line))
(set! list-level (- list-level 1))))
(if (> new-level list-level)
(while (> new-level list-level)
(set! line (string-append "" line))
(set! list-level (+ list-level 1))))
(if indent?
(unless pre?
(set! pre? #t)
(set! line (string-append "
" line)))
(when pre?
(set! pre? #f)
(set! line (string-append line "
")))))
;; split into wiki words and perform in-line parsing
(let loop2 ((str line))
(let ((m (rxmatch wiki-word-rx str)))
(if m
(begin
(receive (w-pre word) (wiki-word-rx-get m)
(push! buf (wiki-parse-line (m 'before)))
(if w-pre (push! buf w-pre))
(push! res (apply string-append (reverse! buf)))
(let ((file (wiki-word-encode word)))
(push! res (list word file (exists? file))))
(set! buf (list))
(loop2 (m 'after))))
(push! buf (string-append (wiki-parse-line str) "\n")))))
;; append and loop
(loop (read-line port))))))))
(provide "wiki/wiki")