;;;; This library contains a number of functions which collect and skip characters in a text file. ;;;; These functions may, for instance, be used to parse a file.<p> ;;;; It is assumed that the variable ip references an input port. The assignment of ip must be done ;;;; exernally to this library, and after the library is loaded.<p> ;;;; The main functions can be found in the section Collection and skipping functions below.<p> ;;;; This library has been developed as part of an SGML Document Type Definition (DTD) parser. ;;;; There exists <a href="../../tools/dtd-parser/doc/html/index.html">internal documentation</a> of the DTD parser, ;;;; as such also of some aspects of the functions in this library.![]()
![]()
(define ip #f) ; ======================================================================================================================== ;;; Look ahead buffer and queue. ;;; The functions in this section manipulates a look ahead queue, which is in between the input port ip ;;; and the applications. Via this buffer it is possible to implement look ahead in the input port. ;; The length of the cyclic look ahead buffer. Predefined to 2000 characters.![]()
![]()
(define max-look-ahead 2000)![]()
![]()
(define look-ahead-vector (make-vector max-look-ahead #\space))![]()
![]()
(define next-write 0)![]()
![]()
(define next-read 0)![]()
![]()
(define look-ahead-length 0)![]()
![]()
(define end-of-file? #f) ;; Reset the look ahead buffer.![]()
![]()
(define (reset-look-ahead-buffer) (set! ip #f) (set! next-write 0) (set! next-read 0) (set! look-ahead-length 0) (set! look-ahead-vector (make-vector max-look-ahead #\space)) (set! end-of-file? #f) (set! collection-buffer (make-string buffer-length #\space)) ) ;; Peek a character from the input port, but queues it for subsequent reading ;; at "the peek end". ;; This function always reads one character via read-char.![]()
![]()
(define (peek-a-char) (let ((ch (read-char ip))) (if (eof-object? ch) (begin (set! end-of-file? #t) ch) (begin (vector-set! look-ahead-vector next-write ch) (set! next-write (+ 1 next-write)) (set! look-ahead-length (+ 1 look-ahead-length)) (if (> look-ahead-length max-look-ahead) (error "Lookahead buffer capacity exceeded")) (if (>= next-write max-look-ahead) (set! next-write 0)) ch)))) ;; Peeks n charcters![]()
![]()
(define (peek-chars n) (cond ((> n 0) (begin (let ((ch (peek-a-char))) (if (not (eof-object? ch)) (peek-chars (- n 1)))))) ((< n 0) (error "peek-chars: Called with negative argument")))) ;; Read from the the look ahead buffer. Only if this buffer is empty, read from the port. ;; Reads from "the read end" of the queue.![]()
![]()
(define (read-a-char) (if (> look-ahead-length 0) (let ((ch (vector-ref look-ahead-vector next-read))) (set! next-read (+ next-read 1)) (set! look-ahead-length (- look-ahead-length 1)) (if (>= next-read max-look-ahead) (set! next-read 0)) ch) (let ((ch (read-char ip))) (if (eof-object? ch) (set! end-of-file? #t)) ch))) ;; Read and return a string of length n. ;; Should take eof into account such that a string shorter than n can be returned.![]()
![]()
(define (read-a-string n) (let ((res (make-string n #\space))) (read-a-string-1 0 n res) res))![]()
![]()
(define (read-a-string-1 i n str) (cond ((>= i n) str) (else (begin (string-set! str i (read-a-char)) (read-a-string-1 (+ i 1) n str))))) ;; Return a lgt character string from the peeked chars in the queue.![]()
![]()
(define (look-ahead-prefix lgt) (if (>= look-ahead-length lgt) (look-ahead-prefix-1 0 next-read lgt (make-string lgt #\space)) (error (string-append "look-ahead-prefix: requires the look ahead to be in the queue, " (as-string lgt) )))) ; i is the index into the formed string. ; j is the index into the look-ahead queue ; lgt is the desired length of the extracted string ; res is the (tail recursive) result.![]()
![]()
(define (look-ahead-prefix-1 i j n res) (if (>= i n) res (begin (string-set! res i (vector-ref look-ahead-vector j)) (look-ahead-prefix-1 (+ i 1) (if (= j (- max-look-ahead 1)) 0 (+ j 1)) ;n res)))) ;; Return the entire look ahead queue as a string
![]()
![]()
(define (max-look-ahead-prefix) (look-ahead-prefix look-ahead-length)) ;; Return the first character in the look ahead vector. ;; As a precondition, the look ahead queue is assumed not to be empty![]()
![]()
(define (look-ahead-char) (if (>= look-ahead-length 1) (vector-ref look-ahead-vector next-read) (error "look-ahead-char: Cannot look ahead in emtpy look ahead queue"))) ;; Return whether the queue contents match the string str. ;; The queue must contain (length str) characters in order to call this function. ;; If not, an error is issued. ;; This is a proper function (appart from the error condition).![]()
![]()
(define (match-look-ahead? str) (let* ((lgt (string-length str))) (if (>= look-ahead-length lgt) (equal? (look-ahead-prefix lgt) str) (error "match-look-ahead?: String matching requires sufficient peeked characters")))) ;; Make sure that there is at least n characters in the look ahead queue![]()
![]()
(define (ensure-look-ahead n) (if (< look-ahead-length n) (peek-chars (- n look-ahead-length)))) ; ---------------------------------------------------------------------------- ; Put back facility at the write end. Part of the look ahead queue. ; Alternatively - and more useful - put back should take place at the read end. ;; Put ch back at the rear end of the queue (where peek-a-char operates).![]()
![]()
(define (put-back-a-char-write-end ch) (vector-set! look-ahead-vector next-write ch) (set! next-write (+ 1 next-write)) (set! look-ahead-length (+ 1 look-ahead-length)) (if (> look-ahead-length max-look-ahead) (error "Lookahead buffer capacity exceeded")) (if (>= next-write max-look-ahead) (set! next-write 0))) ;; Put ch back at the front end of the "queue" (where read-a-char operates).![]()
![]()
(define (put-back-a-char-read-end ch) (if (<= next-read 0) (set! next-read (- max-look-ahead 1))) (set! look-ahead-length (+ look-ahead-length 1)) (if (>= look-ahead-length max-look-ahead) (error "Lookahead buffer capacity exceeded")) (set! next-read (- next-read 1)) (vector-set! look-ahead-vector next-read ch)) ;; Put str back in queue. The second parameter which-end controls whether to put back ;; in read end or write end. Possible values 'read-end and 'write-end.![]()
![]()
(define (put-back-a-string str which-end) (cond ((= 0 (string-length str)) 'nothing) ((eq? which-end 'write-end) (put-back-a-string-write-end str 0 (- (string-length str) 1))) ((eq? which-end 'read-end) (put-back-a-string-read-end str 0 (- (string-length str) 1))) (else (error "put-back-a-string: Unknown end indicator"))))![]()
![]()
(define (put-back-a-string-write-end str i max) (put-back-a-char-write-end (string-ref str i)) (if (< i max) (put-back-a-string-write-end str (+ i 1) max)))![]()
![]()
(define (put-back-a-string-read-end str min i) (put-back-a-char-read-end (string-ref str i)) (if (> i min) (put-back-a-string-read-end str min (- i 1)))) ;; Provided that there is at least n characters in the reading queue, advance ;; next-read with n positions. Hereby queued characters are skipped. Not used in dtd parsing.![]()
![]()
(define (advance-look-ahead n) (if (> n look-ahead-length) (error (string-append "Cannot advance the look ahead with " (as-string n) " positions"))) (if (> n 0) (begin (set! next-read (+ next-read 1)) (set! look-ahead-length (- look-ahead-length 1)) (if (>= next-read max-look-ahead) (set! next-read 0)) (advance-look-ahead (- n 1))))) ; End of look ahead buffer (queue) ; ; ---------------------------------------------------- ;;; Collection and skipping functions. ;;; This section contains a number of higher level collection and skipping functions. ;;; These functions use the funtions from the previous section. The functions in this ;;; section are the most important of this library.![]()
![]()
(define buffer-length 10000)![]()
![]()
(define collection-buffer (make-string buffer-length #\space)) ;; Return the string collected from the input port ip. ;; The collection stops when the predicate p holds holds on the character read. ;; The last read character (the first character on which p holds) is left as the oldest character in the queue.![]()
![]()
(define (collect-until p) (collect-until-1 p ip collection-buffer 0) )![]()
![]()
(define (collect-until-1 p ip buffer next) (cond ((>= next buffer-length) (error "collect-until-1: Collection buffer is filled. You can enlarge it via the variable buffer-length")) ((and (> look-ahead-length 0) (p (as-char (look-ahead-prefix 1)))) (substring buffer 0 next)) ((and (> look-ahead-length 0) (not (p (as-char (look-ahead-prefix 1))))) (let ((ch (read-a-char))) (string-set! buffer next ch) (collect-until-1 p ip buffer (+ 1 next)))) ((and (= look-ahead-length 0)) (let ((ch (peek-a-char))) (if (p ch) (substring buffer 0 next) (begin (string-set! buffer next ch) (read-a-char) (collect-until-1 p ip buffer (+ 1 next)))))))) ;; This collection procedure returns a balanced collection given two char predicates. ;; Return the string collected from the input port ip. The collection stops when the predicate char-pred-2 holds holds on the character read. ;; However, if char-pred-1 becomes true it has to be matched by char-pred-2 without causing a termination of the collection. ;; The last read character (the first character on which char-pred-2 holds) is processed by this function. ;; As a precondition assume that if char-pred-1 holds then char-pred-2 does not hold, and vice versa.![]()
![]()
(define (collect-balanced-until char-pred-1 char-pred-2) (collect-balanced-until-1 char-pred-1 char-pred-2 ip collection-buffer 0 0))![]()
![]()
(define (collect-balanced-until-1 q p ip buffer next bal-count) (ensure-look-ahead 1) (cond ((>= next buffer-length) (parse-error "collect-until-1: Collection buffer is filled. You can enlarge it via the variable buffer-length")) ((and (p (as-char (look-ahead-prefix 1))) (= bal-count 0)) (parse-error "End delimitor matched before start delimitor")) ((and (p (as-char (look-ahead-prefix 1))) (= bal-count 1)) (string-set! buffer next (read-a-char)) (substring buffer 0 (+ next 1))) ((and (p (as-char (look-ahead-prefix 1))) (> bal-count 1)) (let ((ch (read-a-char))) (string-set! buffer next ch) (collect-balanced-until-1 q p ip buffer (+ 1 next) (- bal-count 1)))) ((and (q (as-char (look-ahead-prefix 1)))) (let ((ch (read-a-char))) (string-set! buffer next ch) (collect-balanced-until-1 q p ip buffer (+ 1 next) (+ bal-count 1)))) ((and (not (p (as-char (look-ahead-prefix 1)))) (not (q (as-char (look-ahead-prefix 1))))) (let ((ch (read-a-char))) (string-set! buffer next ch) (collect-balanced-until-1 q p ip buffer (+ 1 next) bal-count))))) ;; Skip characters while p holds. ;; The first character on which p fails is left as the oldest character in the queue ;; The predicate does not hold if end of file![]()
![]()
(define (skip-while p) (cond ((and (not end-of-file?) (> look-ahead-length 0) (p (as-char (look-ahead-prefix 1)))) (begin (read-a-char) (skip-while p))) ((and (not end-of-file?) (= look-ahead-length 0)) (begin (peek-a-char) (if (and (not end-of-file?) (p (as-char (look-ahead-prefix 1)))) (begin (read-a-char) (skip-while p))))))) ;; Assume that str is just in front of us. Skip through it. ;; If str is not in front of us, a fatal error occurs with if-not-message as error message.![]()
![]()
(define (skip-string str if-not-message) (let ((str-1 (read-a-string (string-length str)))) (if (not (equal? str str-1)) (error if-not-message)))) ;; Skip characters until str is encountered. If inclusive, also skip str. ;; It is assumed as a precondition that the length of str is at least one.![]()
![]()
(define (skip-until-string str . inclusive) (let* ((str-lgt (string-length str)) (first-ch (string-ref str 0)) (incl (if (null? inclusive) #f (car inclusive)))) (skip-until-string-1 str str-lgt first-ch incl)))![]()
![]()
(define (skip-until-string-1 str str-lgt first-ch incl) (skip-while (negate (char-predicate first-ch))) (ensure-look-ahead str-lgt) (if (equal? (look-ahead-prefix str-lgt) str) (if incl (read-a-string str-lgt)) (begin (read-a-char) ; eat the matched first char (skip-until-string-1 str str-lgt first-ch incl)))) ;; Collect characters until str is encountered. If inclusive, also collect str. ;; It is assumed as a precondition that the length of str is at least one.![]()
![]()
(define (collect-until-string str . inclusive) (let* ((str-lgt (string-length str)) (first-ch (string-ref str 0)) (incl (if (null? inclusive) #f (car inclusive)))) (collect-until-string-1 str str-lgt first-ch incl)))![]()
![]()
(define (collect-until-string-1 str str-lgt first-ch incl) (let ((res (collect-until (char-predicate first-ch)))) (ensure-look-ahead str-lgt) (if (equal? (look-ahead-prefix str-lgt) str) (if incl (string-append res (read-a-string str-lgt)) res) (string-append res (as-string (read-a-char)) (collect-until-string-1 str str-lgt first-ch incl))))) ; --------------------------------------------- ;;; Useful predicates for skipping and collecting. ;; Is ch a white space character?![]()
![]()
(define (is-white-space? ch) (if (eof? ch) #f (let ((n (as-number ch))) (or (eq? n 32) (eq? n 9) (eq? n 10) (eq? n 12) (eq? n 13))))) ;; Is ch an end of line charcter?![]()
![]()
(define (end-of-line? ch) (if (eof? ch) #f (let ((n (as-number ch))) (or (eq? n 10) (eq? n 13))))) ;; Is ch an end of file character?![]()
![]()
(define (eof? ch) (eof-object? ch)) ;; Return a predicate functions which matches the character ch. ;; A higher order function.![]()
![]()
(define (char-predicate ch) (lambda (c) (eq? c ch)))