# Library DenOpSem

Encoding semantics in Coq
Require Import Eqdep.
Require Import String.
Require Import List.
Require Import Omega.
Require Import Recdef.
Set Implicit Arguments.
Unset Automatic Introduction.
Local Open Scope string_scope.

## Abstract Syntax

Definition var := string.

Inductive binop : Set := Plus_op | Minus_op | Eq_op.

Inductive exp : Set :=
| Var_e : var -> exp
| Lam_e : var -> exp -> exp
| App_e : exp -> exp -> exp
| Num_e : nat -> exp
| Binop_e : binop -> exp -> exp -> exp
| Bool_e : bool -> exp
| If_e : exp -> exp -> exp -> exp.

Inductive value : Set :=
| Lam_v : list (var * value) -> var -> exp -> value
| Num_v : nat -> value
| Bool_v : bool -> value.

| Value : value -> answer

Definition tvar := nat.
Inductive type : Set :=
| Tvar_t : tvar -> type
| Nat_t : type
| Bool_t : type
| Arrow_t : type -> type -> type.

## Environments

Definition env_t(A:Type) := list (var * A).
Fixpoint lookup A (env:env_t A) (x:var) : option A :=
match env with
| nil => None
| (y,v)::env' => if string_dec x y then Some v else lookup env' x
end.

## Expression Typing

Reserved Notation "G |-- e ; t" (at level 80).

Inductive hasType : env_t type -> exp -> type -> Prop :=
| Var_ht : forall G x t,
lookup G x = Some t ->
G |-- Var_e x ; t
| Lam_ht : forall G x e t1 t2,
((x,t1)::G) |-- e ; t2 ->
G |-- Lam_e x e ; Arrow_t t1 t2
| App_ht : forall G e1 e2 t1 t2,
G |-- e1 ; (Arrow_t t1 t2) ->
G |-- e2 ; t1 ->
G |-- App_e e1 e2 ; t2
| Num_ht : forall G n,
G |-- Num_e n ; Nat_t
| Binop_ht : forall G b e1 e2,
G |-- e1 ; Nat_t ->
G |-- e2 ; Nat_t ->
G |-- Binop_e b e1 e2 ; match b with | Eq_op => Bool_t | _ => Nat_t end
| Bool_ht : forall G b,
G |-- Bool_e b ; Bool_t
| If_ht : forall G e1 e2 e3 t,
G |-- e1 ; Bool_t ->
G |-- e2 ; t ->
G |-- e3 ; t ->
G |-- If_e e1 e2 e3 ; t
where "G |-- e ; t" := (hasType G e t) : typing_scope.
Hint Constructors hasType : type_db.

## Value, Environment, and Answer Typing

Section ValueTyping.
Local Open Scope typing_scope.

Notice that we need value and environments definitions to be mutually recursive. Also notice that by choosing inductive definitions, we rule out "circular" lambdas. If we picked CoInductive instead, we could model this sort of case...
Inductive valType : value -> type -> Prop :=
| Num_vt : forall n, valType (Num_v n) Nat_t
| Bool_vt : forall b, valType (Bool_v b) Bool_t
| Lam_vt : forall env x e G t1 t2,
envType env G ->
(x,t1)::G |-- e ; t2 ->
valType (Lam_v env x e) (Arrow_t t1 t2)
with envType : env_t value -> env_t type -> Prop :=
| Nil_et : envType nil nil
| Cons_et : forall x v t env G,
valType v t ->
envType env G ->
envType ((x,v)::env) ((x,t)::G).
End ValueTyping.
Hint Constructors valType envType : type_db.

We don't really need this induction scheme, but whenever I have mutually inductive definitions, I always generate this in case I do need it.
Scheme valType_ind_2 := Induction for valType Sort Prop
with envType_ind_2 := Induction for envType Sort Prop.
Combined Scheme valenvType_ind from valType_ind_2, envType_ind_2.

Inductive ansType : answer -> type -> Prop :=
| Val_ans : forall v t, valType v t -> ansType (Value v) t.
Hint Constructors ansType : type_db.

## Binop Evaluation

Definition eval_binop(b:binop)(v1 v2:value) : answer :=
match b, v1, v2 with
| Plus_op, Num_v n1, Num_v n2 => Value (Num_v (n1+n2))
| Minus_op, Num_v n1, Num_v n2 => Value (Num_v (n1-n2))
| Eq_op, Num_v n1, Num_v n2 =>
Value (Bool_v (if eq_nat_dec n1 n2 then true else false))
| _, _, _ => TypeError
end.

match a with | Value _ => True | _ => False end.
Definition is_lambda(v:value) : Prop :=
match v with | Lam_v _ _ _ => True | _ => False end.
Definition is_bool(v:value) : Prop :=
match v with | Bool_v _ => True | _ => False end.

# A Standard (but tedious) Big-Step Semantics

Module TEDIOUS_EVALS.

Reserved Notation "env |= e ==> a" (at level 80).

Inductive evals_t : env_t value -> exp -> answer -> Prop :=
| evals_Var : forall x env v,
lookup env x = Some v -> env |= Var_e x ==> Value v

| evals_Var_fail : forall x env,
lookup env x = None -> env |= Var_e x ==> TypeError

| evals_Lam : forall x e env, env |= Lam_e x e ==> Value (Lam_v env x e)

| evals_Num : forall n env, env |= Num_e n ==> Value (Num_v n)

| evals_Bool : forall b env, env |= Bool_e b ==> Value (Bool_v b)

| evals_App : forall e1 e2 env x e env' v2 a,
env |= e1 ==> Value (Lam_v env' x e) ->
env |= e2 ==> Value v2 ->
((x,v2)::env') |= e ==> a ->
env |= App_e e1 e2 ==> a

| evals_App_fail1 : forall e1 e2 env a,
env |= e1 ==> a ->
~ is_value a ->
env |= App_e e1 e2 ==> a

| evals_App_fail2 : forall e1 e2 env v,
env |= e1 ==> Value v ->
~ is_lambda v ->
env |= App_e e1 e2 ==> TypeError

| evals_App_fail3 : forall e1 e2 env v1 a,
env |= e1 ==> Value v1 ->
env |= e2 ==> a ->
~ is_value a ->
env |= App_e e1 e2 ==> a

| evals_Binop : forall b e1 e2 env v1 v2,
env |= e1 ==> Value v1 ->
env |= e2 ==> Value v2 ->
env |= Binop_e b e1 e2 ==> eval_binop b v1 v2

| evals_Binop_fail1 : forall b e1 e2 env a,
env |= e1 ==> a ->
~ is_value a ->
env |= Binop_e b e1 e2 ==> a

| evals_Binop_fail2 : forall b e1 e2 env v1 a,
env |= e1 ==> Value v1 ->
env |= e2 ==> a ->
~ is_value a ->
env |= Binop_e b e1 e2 ==> a

| evals_If_true : forall e1 e2 e3 env a,
env |= e1 ==> Value (Bool_v true) ->
env |= e2 ==> a ->
env |= If_e e1 e2 e3 ==> a

| evals_If_false : forall e1 e2 e3 env a,
env |= e1 ==> Value (Bool_v false) ->
env |= e3 ==> a ->
env |= If_e e1 e2 e3 ==> a

| evals_If_fail1 : forall e1 e2 e3 env a,
env |= e1 ==> a ->
~ is_value a ->
env |= If_e e1 e2 e3 ==> a

| evals_If_fail2 : forall e1 e2 e3 env v,
env |= e1 ==> Value v ->
~ is_bool v ->
env |= If_e e1 e2 e3 ==> TypeError

where "env |= e ==> a" := (evals_t env e a) : evals_scope.
Hint Constructors evals_t : evals_t_db.

## Type Preservation of the Semantics

Local Open Scope evals_scope.
Local Open Scope typing_scope.

Ltac mysimp :=
match goal with
| [ H1 : ansType ?a _, H2 : ~ is_value ?a |- _] =>
inversion H1 ; clear H1 ; subst ; simpl in H2 ; firstorder
| [ H : ansType (Value _) _ |- _ ] => inversion H ; clear H ; subst
| [ H1 : ~ _ ?v, H : valType ?v _ |- _] =>
inversion H ; clear H ; subst ; simpl in H1 ; firstorder
| [ H : valType _ Nat_t |- _] => inversion H ; clear H ; subst
| [ H1: envType ?env ?G, H : ?G |-- ?e ; ?t, IH : forall _ _, _ -> _ |-- ?e ; _ -> _ |- _] =>
generalize (IH G _ H1 H) ; clear IH ; intro
| [ H : valType _ (Arrow_t _ _) |- _ ] => inversion H ; clear H ; subst
| [ IH : forall _ _, envType ((?x,?v2)::_) _ -> _ -> _ |- _] => eapply IH ; eauto ;
eauto with type_db
| [ |- context[eval_binop ?b (Num_v _) (Num_v _)] ] => destruct b ; simpl
| [ p : prod _ _ |- _ ] => destruct p
| [ H : context[string_dec ?a ?b] |- _ ] => destruct (string_dec a b) ; subst ; try congruence
| [ H : Some _ = Some _ |- _ ] => injection H ; clear H ; intros ; subst
end ; subst.

Ltac lookup_helper :=
induction 1; intros ; simpl in * ; try congruence ; repeat mysimp ;
eauto with type_db.

Lemma LookupVal : forall env G, envType env G ->
forall x v t, lookup env x = Some v -> lookup G x = Some t -> ansType (Value v) t.
Proof.
lookup_helper.
Qed.
Hint Resolve LookupVal : type_db.

Lemma LookupError : forall env G, envType env G ->
forall x t, lookup env x = None -> lookup G x = Some t ->
ansType TypeError t.
Proof.
lookup_helper.
Qed.
Hint Resolve LookupError : type_db.

## The Preservation Lemma

Lemma Preservation : forall env e a,
env |= e ==> a ->
forall G t,
envType env G ->
G |-- e ; t ->
ansType a t.
Proof.
Ltac inv :=
match goal with
| [ H1 : _ |-- _ ; _, H2 : _ |-- _ ; _ |- _] => inversion H1 ; inversion H2 ; clear H1 H2
| [ H1 : _ |-- _ ; _ |- _] => inversion H1 ; clear H1
| _ => idtac
end ; subst.

induction 1 ; intros ; inv ; repeat mysimp ; eauto with type_db.
Qed.

End TEDIOUS_EVALS.

# A (not quite) Denotational Semantics

We'll begin by defining a monad where the results are always answers.
Definition Ret(v:value) : answer := Value v.
This will take care of propagating type errors for us.
match c with
| Value v => f v
| TypeError => TypeError
end.

Coq notation really comes in handy.
Notation "'ret' x" := (Ret x) (at level 75) : comp_scope.
Notation "x <- c ; f" := (Bind c (fun x => f))
(right associativity, at level 84, c1 at next level) : comp_scope.
Local Open Scope comp_scope.

Now we can write an evaluator, just as we might do it in Haskell...
Definition eval (eval: exp -> env_t value -> answer) (e:exp) (env:env_t value) : answer :=
match e with
| Var_e x => match lookup env x with
| None => TypeError
| Some v => ret v
end
| Lam_e x e => ret (Lam_v env x e)
| App_e e1 e2 =>
v1 <- eval e1 env ; v2 <- eval e2 env ;
match v1 with
| Lam_v env' x e' => eval e' ((x,v2)::env')
| _ => TypeError
end
| Num_e n => ret Num_v n
| Binop_e b e1 e2 =>
v1 <- eval e1 env ; v2 <- eval e2 env ; eval_binop b v1 v2
| Bool_e b => ret Bool_v b
| If_e e1 e2 e3 =>
v1 <- eval e1 env ;
match v1 with
| Bool_v b => if b then eval e2 env else eval e3 env
| _ => TypeError
end
end.

Hmmm. But how do we tie the knot in the evaluator? We'd like to have a fixed-point combinator. But alas...

Section I_WISH.
Assume we had some fixed-point combinator that satisfies an unrolling equation.
Variable ffix : forall (A B:Type), ((A -> B) -> A -> B) -> A -> B.
Axiom ffix_unroll : forall A B (f : (A -> B) -> A -> B),
ffix f = f (ffix f).

Then we can use ffix to build a "proof" of False, rendering the logic inconsistent.
Definition omega : False := ffix (fun f : unit -> False => f) tt.

Even if we limited ffix to operating over Set (instead of Type), the problem doesn't go away.
Inductive void : Set := .
Definition omega' : void := ffix (fun f : unit -> void => f) tt.
Definition oops : False := match omega' with end.

So in spite of the fact that the Haskell-like function is easier to write, maintain (and I would claim, reason about), we can't really go down this route.
End I_WISH.

# A Mixed Denotational-Operational Approach

Module ONE_SOLUTION.
We're going to use the Haskell-like approach, but instead of producing values, we'll produce computations. A computation is simply a way to reify the monad, except that we'll add an extra case for "delayed" expressions that we haven't bothered to compile yet. This will allow us to break the cycle in our semantics, at the price of needing an operational definition of the meaning of a computation.

Our denotations will be computations. Notice that we use the Coq function space to represent Bind. That will greatly simplify our treatment of substitution in our semantics, and allow us to use an arbitrary Coq function to build a continuation.
Inductive comp :=
| Ret : answer -> comp
| Bind : comp -> (value -> comp) -> comp
| Delay : exp -> list (var * value) -> comp.

Notation "'ret' x" := (Ret (Value x)) (at level 75) : comp_scope.
Notation "x <- c1 ; c2" := (Bind c1 (fun x => c2))
(right associativity, at level 84, c1 at next level) : comp_scope.
Local Open Scope comp_scope.
Definition Terr := Ret TypeError.

Translate expressions to a computation -- this is exactly the same as the Haskell approach except that we Delay the compilation of the appliation of a function -- this is the only place where Coq can't conclude that the compilation will terminate.
Fixpoint compile (e:exp) (env:env_t value) : comp :=
match e with
| Var_e x =>
match lookup env x with
| None => Terr
| Some v => ret v
end
| Lam_e x e => ret (Lam_v env x e)
| App_e e1 e2 =>
v1 <- compile e1 env ;
v2 <- compile e2 env ;
match v1 with
| Lam_v env' x e' => Delay e' ((x,v2)::env')
| _ => Terr
end
| Num_e n => ret Num_v n
| Binop_e b e1 e2 =>
v1 <- compile e1 env ;
v2 <- compile e2 env ;
Ret (eval_binop b v1 v2)
| Bool_e b => ret Bool_v b
| If_e e1 e2 e3 =>
v1 <- compile e1 env ;
match v1 with
| Bool_v b =>
if b then compile e2 env else compile e3 env
| _ => Terr
end
end.

Now we can define a variety of different kinds of operational semantics for computations, including big-step, small-step, and even a partial function (since steps are deterministic.) The point is that the compilation doesn't preclude our modeling "effects" such as non-determinism (i.e., threads) or state or exceptions. We just need to have the right monadic structure to support those things.

The important point is that there are many, many fewer rules or cases to deal with in the computation space. And this will scale a lot better as we grow the source language.

A Big-Step Operational Semantics for Computations
Inductive run : comp -> answer -> Prop :=
| run_Ret : forall a, run (Ret a) a
| run_Delay : forall env e v,
run (compile e env) v -> run (Delay e env) v
| run_Bind_value : forall c f v a,
run c (Value v) -> run (f v) a -> run (Bind c f) a
| run_Bind_typeerror : forall c f,
run c TypeError -> run (Bind c f) TypeError
| run_Bind_bind : forall c f1 f2 a,
run (Bind c (fun x => Bind (f1 x) f2)) a->
run (Bind (Bind c f1) f2) a.
Hint Constructors run : evals_db.

A Small-Step Operational Semantics
Inductive step : comp -> comp -> Prop :=
| step_Delay : forall env e, step (Delay e env) (compile e env)
| step_Bind_value : forall f v, step (Bind (Ret (Value v)) f) (f v)
| step_Bind_typeerror : forall f, step (Bind (Ret TypeError) f) (Ret TypeError)
| step_Bind : forall c1 c2 f, step c1 c2 -> step (Bind c1 f) (Bind c2 f).
Hint Constructors step : evals_db.

Alternatively, we can define a step function.
Implicit Arguments inl [A B].
Implicit Arguments inr [A B].
Fixpoint step_fn(c:comp) : comp + answer :=
match c with
| Ret a => inr a
| Delay e env => inl (compile e env)
| Bind c1 f =>
match step_fn c1 with
| inl c2 => inl (Bind c2 f)
| inr (Value v) => inl (f v)
| inr TypeError => inr TypeError
end
end.

Now we can define the 1-step evaluation relation i nterms of the step function. Alternatively, we could use the inductively-defined step relation.
Definition step1(c1 c2:comp) : Prop := step_fn c1 = inl c2.

And we can define the steps relations as the reflexive, transitive closure of the 1-step relation.
Require Import Relation_Operators.
Definition steps := clos_refl_trans comp step.
Hint Constructors clos_refl_trans : evals_db.

Notation "c1 '==>1' c2" := (step c1 c2) (at level 80) : evals_scope.
Notation "c1 '==>*' c2" := (steps c1 c2) (at level 80) : evals_scope.
Local Open Scope evals_scope.

Two different notions of evaluation -- evals1 uses the big-step semantics, and evals2 uses the small-step semantics.
Definition evals1 (env:env_t value) (e:exp) (a:answer) := run (compile e env) a.
Definition evals2 (env:env_t value) (e:exp) (a:answer) :=
(compile e env) ==>* (Ret a).

Notation "env |= e ==> a" := (evals2 env e a) (at level 80) : evals_scope.

Now let's prove that our new semantics respects the typing rules.
Local Open Scope typing_scope.

Typing for computations: Notice that the type for bind!
Inductive compType : comp -> type -> Prop :=
| Ret_ct : forall a t, ansType a t -> compType (Ret a) t
| Delay_ct : forall e env G t,
envType env G -> G |-- e ; t ->
compType (Delay e env) t
| Bind_ct :
forall c f t1 t,
compType c t1 -> (forall v, valType v t1 -> compType (f v) t) -> compType (Bind c f) t.
Hint Constructors compType : type_db.

The compiler preserves types.
Lemma CompPreserves : forall G e t, G |-- e ; t ->
forall env, envType env G -> compType (compile e env) t.
Proof.
Ltac mysimp := simpl in * ; intros ; subst ; try congruence ;
match goal with
| [ |- context[string_dec ?x ?y] ] => destruct (string_dec x y)
| [ H : Some _ = Some _ |- _ ] => injection H ; clear H
| [ IH : forall _, envType _ ?G -> compType (compile ?e _) _, H: envType ?env ?G |- _] =>
generalize (IH env H) ; clear IH ; intro IH
| [ |- forall _, _ ] => intro
| _ => eauto with type_db
end.
Ltac pv :=
match goal with
| [ H : lookup _ _ = Some _, H0:envType _ _ |- context[lookup _ _]] =>
generalize H ; induction H0
| [ H : valType _ Nat_t |- _ ] => inversion H ; clear H
| [ H : valType _ Bool_t |- _ ] => inversion H ; clear H
| [ H : valType _ (Arrow_t _ _) |- _ ] => inversion H ; clear H
| [ |- context[compType(if ?b then _ else _) _] ] => destruct b
| [ |- compType (Ret (eval_binop ?b (Num_v _) (Num_v _))) _ ] => destruct b ; repeat mysimp
| _ => econstructor ; eauto with type_db ; econstructor ; eauto with type_db
end ; repeat mysimp.

induction 1 ; simpl ; intros; repeat mysimp ; (pv || econstructor ; eauto ; mysimp) ; repeat pv.
Qed.

A small step preserves types
Lemma StepPreserves : forall c c',
c ==>1 c' -> forall t, compType c t -> compType c' t.
Proof.
Ltac s :=
match goal with
| [ |- compType (compile _ _) _ ] => eapply CompPreserves ; eauto
| [ H : compType (ret _) _ |- _ ] => inversion H ; clear H ; subst
| [ H : ansType (Value _) _ |- _ ] => inversion H ; clear H ; subst
| [ H : compType (Ret TypeError) _ |- _] => inversion H ; clear H ; subst
| [ H : ansType TypeError _ |- _ ] => inversion H
| _ => eauto with type_db
end.

induction 1 ; intros ;
match goal with | [ H : compType _ _ |- _ ] => inversion H ; subst end ; repeat s.
Qed.

The reflexive/transitive closure of the step relation preserves types.
Lemma StepsPreserves : forall c c' t, c ==>* c' -> compType c t -> compType c' t.
Proof.
induction 1 ; auto ; eapply StepPreserves ; auto.
Qed.

Putting the translation and the compuation lemmas together, we get that any expression which has type t and that evaluates to an answer, produces an answer of type t.
Lemma Preservation : forall env e a,
env |= e ==> a ->
forall G t,
envType env G ->
G |-- e ; t -> ansType a t.
Proof.
intros env e a H G t H1 H2 ;
generalize (StepsPreserves H (CompPreserves H2 H1)) ; inversion 1 ; subst ; auto.
Qed.

Good exercises:

Comment out the other modules and then try the following:

• add a new binary operation -- where do the proofs break and how do you fix them?

• prove a progress theorem for well-typed computations: If compType c t then either c is a well-typed answer, or else c ==>1 c'.

• add a new type (Pair_t : type -> type -> type), expressions (Pair_e : exp -> exp -> exp) (Fst_e : exp -> exp) and (Snd_e : exp) and a value (Pair_v : value -> value -> value). Again, where to the definitions and the proofs have to change?

• add exceptions and a "catch" to the language for catching exceptions.

• replace the step relation with step1 (defined in terms of step_fn) and see where/how the proofs change.

• prove that (evals1 c a) <-> (evals2 c a).

• Add references. Now we must thread a store through the computations...