;;; LL test (defparameter *ll-grammar* '((e t e1) (e1 '+ t e1) (e1) (t f t1) (t1 '* f t1) (t1) (f '< e '>) (f 'id))) ;;; LR test (defparameter *lr-grammar* '((s e) (e e '+ t) (e t) (t t '* f) (t f) (f '< e '>) (f 'id))) ;;; more useful assoc for our use case (defun assoc* (item alist) (assoc item alist :test #'equal)) ;;; check if two sets are equal. uses equal (defun set-equal (set1 set2) (null (set-exclusive-or set1 set2 :test #'equal))) ;;; union with any number of arguments. uses equal (defun union-n (&rest rest) (labels ((union-equal (s1 s2) (union s1 s2 :test #'equal))) (and rest (reduce #'union-equal rest)))) ;;; like union but append at the end (defun union-end (lst1 lst2) (append lst1 (remove-if #'(lambda (x) (member x lst1 :test #'equal)) lst2))) ;;; return whether a symbol is a terminal (defun termsym-p (sym) (or (null sym) ; empty string (and (listp sym) (eq (car sym) 'quote)))) ;;; return only type of terminal symbol (defun termsym-type (sym) (if (atom sym) sym (cadr sym))) ;;; get all productions for a given non-terminal (defun get-productions (grammar sym) (remove-if-not #'(lambda (prod) (eq (car prod) sym)) grammar)) ;;; does this production have a terminal? (defun has-terminal (production) (some #'(lambda (sym) (and (not (atom sym)) (eq (car sym) 'quote))) (cdr production))) ;;; make a list of all non-terminal symbols (defun get-nonterms (grammar) (remove-duplicates (mapcar #'car grammar))) ;;; can we directly derive the empty string from this symbol? (defun has-empty-production (grammar sym) (some #'(lambda (rule) (eq (cadr rule) nil)) (get-productions grammar sym))) #| ;;; make a list of symbols that can derive the empty string (defun get-empty-syms (grammar) (let* ((rules (remove-if #'has-terminal grammar)) (empty-syms (remove-if-not #'(lambda (x) (has-empty-production grammar x)) (get-nonterms rules)))) (do (prev-empty-syms) ;; if nothing new was added we're done ((equal empty-syms prev-empty-syms) empty-syms) (setq prev-empty-syms empty-syms) ;; remove symbols that can derive the empty string from right sides (let ((null-rules (mapcar #'(lambda (prod) (cons (car prod) (remove-if #'(lambda (sym) (member sym empty-syms)) (cdr prod)))) rules))) ;; if a rule has an empty right side now, that symbol ;; can derive the empty string. So add all of these symbols. (setq empty-syms (union-n empty-syms (mapcar #'car (remove-if #'cdr null-rules)))))))) |# ;;; get FIRST set for string of symbols (defun get-first-string (first-sets str) (labels ((empty-first (sym) (member nil (cdr (assoc sym first-sets)))) (get-chain-len (chain) (cond ((null chain) 0) ((null (car chain)) 1) (t (+ 1 (get-chain-len (cdr chain)))))) (lookup-first (sym) (if (termsym-p sym) (list sym) (remove nil (cdr (assoc sym first-sets)))))) (let* (;; check which symbols can become empty (empty-syms (mapcar #'empty-first str)) ;; only interested in the leading symbols that can be empty ;; and the first after that (chain (subseq str 0 (get-chain-len empty-syms)))) (union-n ;; FIRST items from all symbols in the chain (apply #'union-n (mapcar #'lookup-first chain)) ;; whole string can become nil (and (every #'empty-first str) '(nil)))))) ;;; make FIRST sets for all non-terminals (defun make-first (grammar) (let ((first-sets (mapcar #'list (get-nonterms grammar)))) (do (done) (done first-sets) (setq done t) ;; build up FIRST sets from all productions (dolist (prod grammar) (let* ((as (assoc (car prod) first-sets)) (new-set (union-n (cdr as) (get-first-string first-sets (cdr prod))))) (when (not (set-equal (cdr as) new-set)) (setq done nil)) (rplacd as new-set)))))) ;;; make FOLLOW sets for all non-terminals (defun make-follow (grammar) (let ((first-sets (make-first grammar)) (follow-sets (mapcar #'list (get-nonterms grammar)))) ;;; starting symbol is LHS of first production by convention (rplacd (car follow-sets) '($)) (do (done) (done follow-sets) (setq done t) ;; build up FOLLOW sets from all productions and FIRST sets (dolist (prod grammar) (do ((str (cdr prod) (cdr str))) ((null str)) ;; interested in (non-term . suffix) (when (not (termsym-p (car str))) (let* ((as (assoc (car str) follow-sets)) (fst (get-first-string first-sets (cdr str))) (new-set (union-n (cdr as) ;; FIRST of suffix (remove nil fst) ;; FOLLOW of production symbol if at end (and (member nil fst) (cdr (assoc (car prod) follow-sets)))))) (when (not (set-equal (cdr as) new-set)) (setq done nil)) (rplacd as new-set)))))))) (defun make-ll1-table (grammar) (let ((first-sets (make-first grammar)) (follow-sets (make-follow grammar)) parse-table) (labels ((add-terminals (prod lst) (dolist (term (remove nil lst)) (let ((key (cons (car prod) (termsym-type term)))) (when (assoc* key parse-table) (format t "ERROR: grammar ambiguous~%")) (setq parse-table (acons key prod parse-table)))))) (dolist (prod grammar) (let ((fst (get-first-string first-sets (cdr prod))) (flw (cdr (assoc (car prod) follow-sets)))) (add-terminals prod fst) (when (member nil fst) (add-terminals prod flw)))) parse-table))) ;;; LL parsing algorithm (defun parse-ll (grammar table input) (do ((stack (list (caar grammar) '$)) (input (append input '($)))) ((eq (car stack) '$) t) ; (format t "stack: ~A~%" stack) ; (format t "input: ~A~%" input) (if (or (termsym-p (car stack)) (eq (car stack) '$)) ;; pop terminal (cond ((eq (termsym-type (car stack)) (termsym-type (car input))) (pop stack) (pop input)) (t (print 'error) (return-from parse-ll nil))) ;; apply production rule (let ((entry (assoc* (cons (car stack) (termsym-type (car input))) table))) (cond (entry (pop stack) (setq stack (append (cdr (cdr entry)) stack)) (print (cdr entry))) (t (print 'error) (return-from parse-ll nil))))))) ;;; make an initial LR(0) item (defun make-item-0 (prod) (list 1 prod)) ;;; make an initial LR(1) item (defun make-item-1 (prod next-sym) (list 1 prod next-sym)) (defun advance-item (item) (cons (1+ (car item)) (cdr item))) (defun item-prod (item) (cadr item)) (defun item-tail (item) (nthcdr (car item) (item-prod item))) (defun item-next-sym (item) (caddr item)) ;;; construct closure for set of LR(0) items (defun make-closure-0 (grammar items) (do (done) (done items) (setq done t) (dolist (item items) (let* ((sym (car (item-tail item))) (prods (remove-if-not #'(lambda (prod) (eq (car prod) sym)) grammar)) (new-items (union-end items (mapcar #'make-item-0 prods)))) (when (not (set-equal items new-items)) (setq done nil)) (setq items new-items))))) ;;; construct goto set for a set of LR(0) items (defun make-goto-0 (grammar items sym) (make-closure-0 grammar (mapcar #'advance-item (remove-if-not #'(lambda (item) (equal (car (item-tail item)) sym)) items)))) ;;; make canonical sets of items for LR(0) (defun make-sets-of-items (grammar) (let ((sets (list (make-closure-0 grammar (list (make-item-0 (car grammar)))))) (grammar-symbols (remove-duplicates (apply #'append grammar) :test #'equal :from-end t))) (do (done) (done sets) (setq done t) (dolist (items sets) (dolist (sym grammar-symbols) (let ((goto (make-goto-0 grammar items sym))) (when (and goto (not (member goto sets :test #'equal))) (nconc sets (list goto)) ; could cons to front but this is nicer (setq done nil)))))))) ;; construct action and goto tables for SLR(1) parser (defun make-slr-tables (grammar) (let ((item-sets (coerce (make-sets-of-items grammar) 'vector)) (follow-sets (make-follow grammar)) (top-sym (caar grammar)) action goto) (labels ((set-action (state terminal act) (let ((key (cons state terminal))) (when (assoc* key action) (format t "ERROR: conflict~%")) (setq action (acons key act action)))) (set-goto (state nonterm next-state) (setq goto (acons (cons state nonterm) next-state goto)))) (dotimes (i (length item-sets)) (let ((items (svref item-sets i))) ;; action table (dolist (item items) (let ((next-sym (car (item-tail item)))) (cond ;; accept at end of top production ((and (eq (car (item-prod item)) top-sym) (null next-sym)) (set-action i '$ '(accept))) ;; reduce at end of production ((null next-sym) (let ((rule (position (item-prod item) grammar :test #'equal))) (dolist (terminal (cdr (assoc (car (item-prod item)) follow-sets))) (set-action i (termsym-type terminal) (list 'reduce rule))))) ;; shift to next state after terminal ((termsym-p next-sym) (let* ((goto (make-goto-0 grammar items next-sym)) (next-state (position goto item-sets :test #'equal))) (set-action i (termsym-type next-sym) (list 'shift next-state))))))) ;; goto table (dolist (nonterm (get-nonterms grammar)) (let* ((goto (make-goto-0 grammar items nonterm)) (next-state (position goto item-sets :test #'equal))) (when next-state (set-goto i nonterm next-state)))))) (list action goto)))) ;;; LR parsing algorithm (defun parse-lr (grammar tables input) (let ((grammar-vec (coerce grammar 'vector)) (action-table (car tables)) (goto-table (cadr tables)) (stack (list 0)) (input (append input '($)))) (do nil (nil) (let* ((key (cons (car stack) (termsym-type (car input)))) (action (cdr (assoc* key action-table)))) (case (car action) (shift (push (termsym-type (car input)) stack) (push (cadr action) stack) (setq input (cdr input))) (reduce (let ((prod (svref grammar-vec (cadr action)))) (dotimes (i (length (cdr prod))) (pop stack) (pop stack)) (push (car prod) stack) (push (cdr (assoc* (cons (cadr stack) (car prod)) goto-table)) stack) (print prod))) (accept (return-from parse-lr t)) (t (print 'error) (return-from parse-lr nil))))))) (let ((table (make-ll1-table *ll-grammar*))) (print (parse-ll *ll-grammar* table '(id + id * id))) (print (parse-ll *ll-grammar* table '(id + id * id + id)))) (let ((tables (make-slr-tables *lr-grammar*))) (print (parse-lr *lr-grammar* tables '(id * id + id))) (print (parse-lr *lr-grammar* tables '(id * id + id * id))))