Pattern matching

Scheme macrology: a generalized pattern matcher

This post defines and tests the match/non-overlapping Scheme macro which is:

a simple pattern matcher with guards in the style of Dijkstra’s Guarded Commands (Dijkstra 1975). It ensures that the patterns and optional guards of different clauses do not overlap.3 This non-overlapping property ensures that the ordering of the clauses does not matter, and is required for writing correct relational programs (Byrd 2009). By ensuring the non-overlapping property holds in the Scheme version of the interpreter, we simplify the translation to miniKanren.

It is a generalization of Oleg Kiselyov’s pmatch, a simple pattern-matcher for linear patterns: from it we adapt the test test/meta-circular-interpreter about a meta-circular interpreter. This matcher is described and used in the paper about miniKanren.

Our implementation contains:

  • some refactorings of the upstream code,
  • matching of vectors and records,
  • guards are introduced after the keyword,
  • injection and negated injection of expressions in patterns (see those in action in a more complex interpreter)
  • sexp-based error reporting in case of either no matches or overlapping patterns.

An example

For the sake of clarity, consider the following definition:

(define (w x y)
      (match/non-overlapping (cons x y)
        ((,a . ,b) (and (number? a) (number? b))  (* a b))
        ((,a . ,b) (+ a b))
        ((,a ,b ,c) (and (number? a) (number? b) (number? c))  (+ a b c))))

so that evaluating the expression (list (w 3 4) (apply w '(1 (3 4)))) yields

Error: match/non-overlapping

((reason "overlapping match")
 (expr (cons x y))
 (value (3 . 4))
 (ambiguities
   (((,a unquote b) (and (number? a) (number? b))  (* a b))
    ((,a unquote b) #t  (+ a b)))))

as expected.

Implementation

(module
  (aux match)
  *
  (import
    scheme
    (chicken base)
    (chicken string)
    (chicken memory representation)
    (aux base))
  (define-syntax-rule
    (match/non-overlapping v (e ...) ...)
    (dmatch-run-a-thunk 'v v (dmatch-remexp v (e ...) ...)))
  (define-record dmatch-pkg clause thunk)
  (define-syntax
    dmatch-remexp
    (syntax-rules
      ()
      ((dmatch-remexp (rator rand ...) cls ...)
       (let1 (v (rator rand ...)) (dmatch-aux v cls ...)))
      ((dmatch-remexp v cls ...) (dmatch-aux v cls ...))))
  (define-syntax
    dmatch-aux
    (syntax-rules
      (⇒)
      ((dmatch-aux v) '())
      ((dmatch-aux v (pat g ⇒ e ...) cls ...)
       (let1 (fk (τ (dmatch-aux v cls ...)))
             (dmatch-ppat
               v
               pat
               (if g
                 (cons (make-dmatch-pkg '(pat g ⇒ e ...) (τ e ...)) (fk))
                 (fk))
               (fk))))
      ((dmatch-aux v (pat e ...) cls ...)
       (dmatch-aux v (pat #t ⇒ e ...) cls ...))))
  (define-syntax
    dmatch-ppat
    (syntax-rules
      (unquote)
      ((dmatch-ppat vv () kt kf)
       (let1 (v vv)
             (if (or (null? v) (and (vector? v) (zero? (vector-length v))))
               kt
               kf)))
      ((dmatch-ppat vv ,,,v kt kf) (if (eq? v vv) kf kt))
      ((dmatch-ppat vv ,,v kt kf)
       (let ((vv* vv) (v* v))
         (cond ((procedure? v*) (if (v* vv*) kt kf))
               (else (if (eq? v* vv*) kt kf)))))
      ((dmatch-ppat vv ,v kt kf) (let1 (v vv) kt))
      ((dmatch-ppat vv (x . y) kt kf)
       (let1 (v vv)
             (cond ((pair? v)
                    (dmatch-ppat (car v) x (dmatch-ppat (cdr v) y kt kf) kf))
                   ((and (vector? v) (> (vector-length v) 0))
                    (dmatch-ppat
                      (vector-ref v 0)
                      x
                      (dmatch-ppat (subvector v 1) y kt kf)
                      kf))
                   ((record-instance? v)
                    (let1 (r (record->vector v))
                          (dmatch-ppat
                            (vector-ref r 0)
                            x
                            (dmatch-ppat (subvector r 1) y kt kf)
                            kf)))
                   (else kf))))
      ((dmatch-ppat v lit kt kf) (if (equal? v 'lit) kt kf))))
  (define (dmatch-run-a-thunk v-expr v pkgs)
    (cond ((null? pkgs)
           (error (string-append
                    "match/non-overlapping\n\n"
                    (->string/pretty-print
                      `((reason "no match found")
                        (expr ,v-expr)
                        (value ,v))))))
          ((null? (cdr pkgs)) (let1 (t (dmatch-pkg-thunk (car pkgs))) (t)))
          (else
           (error (string-append
                    "match/non-overlapping\n\n"
                    (->string/pretty-print
                      `((reason "overlapping match")
                        (expr ,v-expr)
                        (value ,v)
                        (ambiguities ,(map dmatch-pkg-clause pkgs))))))))))

Tests

test/base-non-overlapping: pass

(define (test/base-non-overlapping _)
  (⊦= 'empty (match/non-overlapping '() (() 'empty)))
  (⊦= 'empty (match/non-overlapping #() (() 'empty)))
  (⊦= '() (match/non-overlapping '() (,r r)))
  (⊦= #() (match/non-overlapping #() (,r r)))
  (⊦= 'p (match/non-overlapping '(p) ((,r) r)))
  (⊦= #t (match/non-overlapping #(p) ((p) #t)))
  (⊦= 'p (match/non-overlapping #(p) ((,r) r)))
  (⊦= 3 (match/non-overlapping #(3 2) ((,r 2) r)))
  (⊦= '(3 #(2)) (match/non-overlapping #(3 2) ((,r unquote s) (list r s))))
  (⊦= '(3 2 #())
        (match/non-overlapping #(3 2) ((,r ,s unquote t) (list r s t))))
  (⊦⧳ ((exn)) (match/non-overlapping #(3 2) ((,r 2 ,t) r)))
  (⊦= 3 (match/non-overlapping #(3 2) ((,r ,e) r)))
  (⊦= 3
        (match/non-overlapping
          (make-record-instance 'hello 3 2)
          ((hello ,r ,e) r))))
((eta 0.004) (memory #(12582912 6256896 1048576)) (stdout "") (stderr ""))

test/h-non-overlapping: pass

(define (test/h-non-overlapping _)
  (define (h x y)
    (match/non-overlapping
      (cons x y)
      ((,a unquote b) (and (number? a) (number? b)) ⇒ (* a b))
      ((,a ,b ,c) (and (number? a) (number? b) (number? c)) ⇒ (+ a b c))))
  (⊦= '(12 8) (list (h 3 4) (apply h '(1 (3 4))))))
((eta 0.0) (memory #(12582912 6257944 1048576)) (stdout "") (stderr ""))

test/h-overlapping: pass

(define (test/h-overlapping _)
  (define (w x y)
    (match/non-overlapping
      (cons x y)
      ((,a unquote b) (and (number? a) (number? b)) ⇒ (* a b))
      ((,a unquote b) (+ a b))
      ((,a ,b ,c) (and (number? a) (number? b) (number? c)) ⇒ (+ a b c))))
  (⊦⧳ ((exn)) (list (w 3 4) (apply w '(1 (3 4))))))
((eta 0.001) (memory #(12582912 6262952 1048576)) (stdout "") (stderr ""))

test/meta-circular-interpreter: pass

(define (test/meta-circular-interpreter _)
  (define (lookup x env)
    (cond ((assq x env) => cdr) (else (error "Can't find " x))))
  (define (int code env)
    (match/non-overlapping
      code
      (',x x)
      ((let (,x ,e) ,body)
       (symbol? x)
       ⇒
       (let ((xv (int e env))) (int body (cons (cons x xv) env))))
      ((τ ,body) (τ (int body env)))
      ((λ ,argl ,body)
       (symbol? argl)
       ⇒
       (λ arglv (int body (cons (cons argl arglv) env))))
      ((λ (,x) ,body)
       (symbol? x)
       ⇒
       (λ (xv) (int body (cons (cons x xv) env))))
      ((,op unquote args)
       (not (or (eq? op 'quote) (eq? op 'let) (eq? op 'τ) (eq? op 'λ)))
       ⇒
       (let ((opv (int op env)) (argvs (map (λ (c) (int c env)) args)))
         (apply opv argvs)))
      (,x (symbol? x) ⇒ (lookup x env))
      (,x (or (number? x) (string? x)) ⇒ x)))
  (define env0 (interaction-environment/symbols '(+ - display identity)))
  (define-syntax-rule (dint body) (int 'body env0))
  (⊦ equal? 1 (dint 1))
  (⊦ equal? 'x (dint 'x))
  (⊦ void? (dint (display 'x)))
  (⊦ equal? 'x (dint (identity 'x)))
  (⊦⧳ ((exn)) (dint (display x)))
  (⊦ equal? 6 (dint (let (x (+ 1 2 3)) x)))
  (⊦ equal? 1 (let1 (t (dint (τ 1))) (t)))
  (⊦ equal? 1 ((dint (λ (x) x)) 1))
  (⊦ equal? '(1 2 3) ((dint (λ x x)) 1 2 3))
  (⊦ equal? 5 (((dint (λ (x) (λ (y) (+ x y)))) 2) 3)))
((eta 0.001) (memory #(12582912 6251648 1048576)) (stdout "x") (stderr ""))

See also