;; wiki.wiki -- simple scheme wiki parser ;; ;; Copyright (c) 2003 by Alex Shinn ;; BSD-style license. See the file COPYING for details. (define-module wiki.wiki (use srfi-13) (export wiki-parse wiki-word-encode)) (select-module wiki.wiki) ;; handle *very* simple stemming to avoid the annoying problems with ;; inflected variations of wiki-words (which should resolve to the ;; same page as wiki-word) (define (wiki-word-normalize str) (let ((s (string-downcase str))) (rxmatch-case s (#/(.*)ies$/ (#f x) x) (#/(.*x)e[ns]$/ (#f x) x) (#/(.*)s$/ (#f x) x) (else s)))) (define (wiki-word-encode str) (define wiki-unsafe-rx (string->regexp "[\\\/._]|[^\\x01-\\x7e]")) (regexp-replace-all wiki-unsafe-rx (wiki-word-normalize str) (lambda (m) (format "_~4,'0X" (char->integer (string-ref (m 0) 0)))))) (define (wiki-parse . keys) (let-keywords* keys ((port (current-input-port)) (root (sys-getcwd)) (exists? (lambda (f) (file-exists? (string-append root "/" f)))) (langs? #t)) (define (wiki-defined-word? str) (file-exists? (wiki-word-encode str))) ;; regular expressions (define wiki-list-rx (string->regexp "^\\s*([*]+)")) (define wiki-indent-rx (string->regexp "^\\s+")) (define wiki-word-rx (string->regexp "[\\[][\\[]([^\\]]+)[\\]][\\]]|(^|\\s)((\\w:)*\\w+(?:-+\\w+)+)\\b")) (define wiki-bold-rx (string->regexp "(^|\\s)\\*([^*]+)\\*(\\W|$)")) (define wiki-italic-rx (string->regexp "(^|\\s)/([^/]+)/(\\W|$)")) (define wiki-uline-rx (string->regexp "(^|\\s)_([^_]+)_(\\W|$)")) (define wiki-header-rx (string->regexp "(^|\\s)=(=+)([^=]+)=+(\\W|$)")) (define wiki-par-rx (string->regexp "^\\s*$")) (define wiki-hr-rx (string->regexp "^\\s*----+\\s*$")) (define wiki-url-rx (string->regexp "(((http)|(ftp)|(news)):/+[\\-+.,_/%?&~=:\\w]+[\\-+_/%?&~=:\\w])")) (define wiki-lang-rx (string->regexp "^=([-_\\w]+)$")) (define wiki-entity-rx (string->regexp "[&<>]")) (define (wiki-word-rx-get m) (let ((first (m 1))) (if (and first (not (equal? first ""))) (values "" first) (values (m 2) (m 3))))) (define (wiki-list-level line) (let ((r (rxmatch wiki-list-rx line))) (if r (- (rxmatch-end r 1) (rxmatch-start r 1)) 0))) (define (wiki-trim-list line) (regexp-replace wiki-list-rx line "")) (define wiki-entities (let ((h (make-hash-table 'equal?))) (hash-table-put! h "&" "&") (hash-table-put! h "<" "<") (hash-table-put! h ">" ">") h)) (define (wiki-encode-entity m) (let ((ent (rxmatch-substring m))) (hash-table-get wiki-entities ent ent))) (define (wiki-parse-line line) (cond ;; paragraphs ((rxmatch wiki-par-rx line) (set! line "

")) ;; horizontal rules ((rxmatch wiki-hr-rx line) (set! line "


")) (else ;; word emphasis (set! line (regexp-replace-all wiki-header-rx line (lambda (m) (let* ((h (m 2)) (depth (max (string-length h) 6))) (format "~A~A~A" (m 1) depth (m 3) depth (m 4)))))) (for-each (lambda (x) (set! line (regexp-replace-all (car x) line (cdr x)))) `((,wiki-bold-rx . "\\1\\2\\3") (,wiki-italic-rx . "\\1\\2\\3") (,wiki-uline-rx . "\\1\\2\\3") (,wiki-url-rx . "\\1") )))) line) (let ((res (list)) (buf (list "

")) (pre? #f) (list-level 0)) (let loop ((line (read-line port))) (if (eof-object? line) ;; input done, accumulate as single string (reverse! (cons (apply string-append (reverse! (cons "

" buf))) res)) (begin ;; first encode any html (set! line (regexp-replace-all wiki-entity-rx line wiki-encode-entity)) ;; then handle list levels and indentation (let* ((new-level (wiki-list-level line)) (indent? (and (zero? new-level) (rxmatch wiki-indent-rx line)))) (set! line (wiki-trim-list line)) (when (> new-level 0) (set! line (string-append "
  • " line))) (if (< new-level list-level) (while (< new-level list-level) (set! line (string-append "" line)) (set! list-level (- list-level 1)))) (if (> new-level list-level) (while (> new-level list-level) (set! line (string-append "