(* Copyright (c) 2011, Greg Morrisett and Joe Tassarotti *) (** * Regular expression matcher based on derivatives, inspired by the paper of Owens, Reppy, and Turon. *) Require Import Coq.Program.Equality. Require Import Coq.Init.Logic. Require Import List. Require Import Arith. Require Import Bool. Require Import Eqdep. Unset Automatic Introduction. Set Implicit Arguments. Axiom proof_irrelevance : forall (P:Prop) (H1 H2:P), H1 = H2. (** ** Argument to the module that builds the parser *) Module Type PARSER_ARG. (** We parameterize the development over a set of characters *) Parameter char_p : Set. Parameter char_eq : forall (c1 c2:char_p), {c1=c2} + {c1<>c2}. (** Now we also parameterize over names for types used in the semantic actions. *) Parameter tipe : Set. (** And we require a decidable equality for type names. *) Parameter tipe_eq : forall (t1 t2:tipe), {t1=t2} + {t1<>t2}. (** And we require an interpretation of type names as sets. *) Parameter tipe_m : tipe -> Set. End PARSER_ARG. (** ** The Parser functor *) Module PARSER(PA : PARSER_ARG). Import PA. (** An inductively generated set of names for the types of results returned by the parser. We need at least unit, char, pairs, and lists, as well as user-defined types (tipe). *) Inductive result : Set := | unit_t : result | char_t : result | pair_t : result -> result -> result | list_t : result -> result | sum_t : result -> result -> result | tipe_t : tipe -> result. (** This allows us to build an equality on results. *) Definition result_eq : forall (r1 r2:result), {r1=r2}+{r1<>r2}. decide equality. apply tipe_eq. Defined. (** Now we give an interpretation of results as Coq sets. *) Fixpoint result_m(t:result) : Set := (match t with | unit_t => unit | char_t => char_p | pair_t t1 t2 => (result_m t1) * (result_m t2) | list_t t1 => list (result_m t1) | sum_t t1 t2 => (result_m t1) + (result_m t2) | tipe_t t => tipe_m t end)%type. (** ** Constructors for regular expression parsers. *) (** The parsers are indexed by the return type as a [result], and we've added a new kind of parser [Map_p] which is used to transform the result of one parser to another. *) Inductive parser : result -> Set := | Any_p : parser char_t | Char_p : char_p -> parser char_t | Eps_p : parser unit_t | Cat_p : forall t1 t2 (r1:parser t1) (r2:parser t2), parser (pair_t t1 t2) | Zero_p : forall t, parser t | Alt_p : forall t (r1 r2:parser t), parser t | Star_p : forall t, parser t -> parser (list_t t) | Map_p : forall t1 t2, ((result_m t1) -> (result_m t2)) -> parser t1 -> parser t2. (** ** Denotational semantics for parsers *) (** The semantics relates input strings (as lists of characters) to result values. *) Inductive in_parser : forall t, parser t -> list char_p -> (result_m t) -> Prop := | Any_pi : forall c, in_parser Any_p (c::nil) c | Char_pi : forall c, in_parser (Char_p c) (c::nil) c | Eps_pi : in_parser Eps_p nil tt | Alt_left_pi : forall t (p1 p2:parser t) cs (v:result_m t), in_parser p1 cs v -> in_parser (Alt_p p1 p2) cs v | Alt_right_pi : forall t (p1 p2:parser t) cs (v:result_m t), in_parser p2 cs v -> in_parser (Alt_p p1 p2) cs v | Cat_pi : forall t1 t2 (p1:parser t1) (p2:parser t2) cs1 cs2 v1 v2, in_parser p1 cs1 v1 -> in_parser p2 cs2 v2 -> in_parser (Cat_p p1 p2) (cs1 ++ cs2) (v1,v2) | Star_eps_pi : forall t (p:parser t), in_parser (Star_p p) nil nil | Star_cat_pi : forall t (p:parser t) cs1 cs2 v1 v2, cs1 <> nil -> in_parser p cs1 v1 -> in_parser (Star_p p) cs2 v2 -> in_parser (Star_p p) (cs1 ++ cs2) (v1::v2) | Map_pi : forall t1 t2 (f:result_m t1 -> result_m t2) (p:parser t1) cs v, in_parser p cs v -> in_parser (Map_p _ f p) cs (f v). (** Note that for [Star_cat_pi] we require [cs1] to be non-empty -- this is crucial for ensuring that any regular expression matches a string and returns a finite list of associated values. Otherwise, something like [Star Eps] would take the empty string to an infinite set of possible values. *) (** ** Internal Representation of Parsers: [regexp]s *) (** Internally, we translate parsers to a representation called [regexp] where all of the functions are replaced with a function name that we can look up in an environment. This will allow us to put together a decidable equality on regular expressions, which will in turn, allow us to hash-cons them (though we don't take advantage of this yet.) *) (** We'll represent function names as positions in an environment list. *) Definition fn_name := nat. Definition fn_name_eq := eq_nat_dec. (** In addition to user-level functions, we need a few built-in functions that are used during optimization. *) Inductive fn : result -> result -> Set := | Fn_name : forall (f:fn_name) t1 t2, fn t1 t2 | Fn_const_char : forall (c:char_p), fn unit_t char_t (* \x:unit.c *) | Fn_empty_list : forall t, fn unit_t (list_t t) (* \x:unit.@nil t *) | Fn_cons : forall t, fn (pair_t t (list_t t)) (list_t t) (* \x:(t*list t).(fst x)::(snd x) *) | Fn_unit_left : forall t, fn t (pair_t unit_t t) (* \x:t.(tt,x) *) | Fn_unit_right : forall t, fn t (pair_t t unit_t) (* \x:t.(x,tt) *) . (** Finally, a [regexp] is just like a [parser] except that the [Map] constructor takes a [fn] instead of an actual function. Note that this syntax doesn't preclude us from using a function name with the wrong type -- we'll capture this constraint later on. *) Inductive regexp : result -> Set := | Any : regexp char_t | Char : char_p -> regexp char_t | Eps : regexp unit_t | Cat : forall t1 t2, regexp t1 -> regexp t2 -> regexp (pair_t t1 t2) | Alt : forall t, regexp t -> regexp t -> regexp t | Zero : forall t, regexp t | Star : forall t, regexp t -> regexp (list_t t) | Map : forall (t u:result) (f:fn t u), regexp t -> regexp u. (** A simplification tactic used through the development *) Ltac mysimp := simpl in * ; intros ; repeat match goal with | [ |- context[char_eq ?x ?y] ] => destruct (char_eq x y) ; auto | [ |- _ /\ _ ] => split | [ H : context[result_eq ?e1 ?e2] |- _ ] => destruct (result_eq e1 e2) ; simpl in * ; try discriminate | [ H : existT ?f ?t ?x = existT ?f ?t ?y |- _ ] => generalize (inj_pairT2 _ f t x y H) ; clear H ; intro H ; subst | [ H : _ /\ _ |- _ ] => destruct H | [ |- context[ _ ++ nil ] ] => rewrite <- app_nil_end | [ H : exists x, _ |- _ ] => destruct H | [ H : _ \/ _ |- _] => destruct H | [ H : _ <-> _ |- _] => destruct H | [ |- _ <-> _ ] => split | [ H : _::_ = _::_ |- _] => injection H ; clear H | _ => idtac end ; auto. (** Simplificaton followed by substitution. *) Ltac s := repeat (mysimp ; subst). (** Now we can try to define a syntactic equality checker on regexps. We'll begin by writing some simple functions that calculate booleans to avoid some hair with dependencies. *) Definition s2b (P Q:Prop) (x:{P}+{Q}) : bool := match x with | left _ => true | right _ => false end. (** Function equality *) Definition fn_eq t1 u1 (f1:fn t1 u1) t2 u2 (f2:fn t2 u2) : bool := match f1, f2 with | Fn_name f1 t1 u1, Fn_name f2 t2 u2 => s2b (fn_name_eq f1 f2) && s2b (result_eq t1 t2) && s2b (result_eq u1 u2) | Fn_const_char c1, Fn_const_char c2 => if char_eq c1 c2 then true else false | Fn_empty_list t1, Fn_empty_list t2 => s2b (result_eq t1 t2) | Fn_cons t1, Fn_cons t2 => s2b (result_eq t1 t2) | Fn_unit_left t1, Fn_unit_left t2 => s2b (result_eq t1 t2) | Fn_unit_right t1, Fn_unit_right t2 => s2b (result_eq t1 t2) | _, _ => false end. (** Regexp equality *) Fixpoint regexp_eq' t1 (r1:regexp t1) t2 (r2:regexp t2) : bool := match r1, r2 with | Any, Any => true | Char c1, Char c2 => if char_eq c1 c2 then true else false | Eps, Eps => true | Cat t1a t1b r1a r1b, Cat t2a t2b r2a r2b => regexp_eq' r1a r2a && regexp_eq' r1b r2b | Alt t1 r1a r1b, Alt t2 r2a r2b => regexp_eq' r1a r2a && regexp_eq' r1b r2b | Zero t1, Zero t2 => s2b(result_eq t1 t2) | Star t1 r1a, Star t2 r2a => regexp_eq' r1a r2a | Map t1 u1 f1 r1a, Map t2 u2 f2 r2a => fn_eq f1 f2 && regexp_eq' r1a r2a | _, _ => false end. (** A tactic for help in proving the correctness of the equality routines *) Ltac eq_help := match goal with | [ |- _ /\ _ ] => split ; auto | [ H : context[char_eq ?c ?c0] |- _ ] => destruct (char_eq c c0) ; subst ; auto ; try congruence | [ H : _ && _ = true |- _ ] => generalize (andb_prop _ _ H) ; clear H ; intro H ; destruct H | [ H : _ /\ _ |- _] => destruct H ; subst | [ H : s2b ?e = true |- _ ] => destruct e ; simpl in H ; subst ; try congruence | _ => auto end. (** If [fn_eq] returns true, then we know the functions and their types are equal. *) Lemma fn_eq_corr : forall t1 u1 (f1:fn t1 u1) t2 u2 (f2:fn t2 u2), fn_eq f1 f2 = true -> t1 = t2 /\ u1 = u2 /\ (existT (fun p => fn (fst p) (snd p)) (t1,u1) f1) = (existT (fun p => fn (fst p) (snd p)) (t2,u2) f2). Proof. induction f1 ; induction f2 ; simpl ; intros; try congruence ; repeat eq_help. Qed. (** If [regexp_eq'] returns true, then we know the regexps and their types are equal. *) Lemma regexp_eq'_corr : forall t1 (r1:regexp t1) t2 (r2:regexp t2), regexp_eq' r1 r2 = true -> t1 = t2 /\ (existT regexp t1 r1) = (existT regexp t2 r2). Proof. induction r1 ; destruct r2 ; simpl ; intros ; try congruence ; repeat (eq_help ; match goal with | [ IH : forall _ _, regexp_eq' ?r1 _ = true -> _, H : regexp_eq' ?r1 _ = true |- _]=> generalize (IH _ _ H) ; clear IH ; intros ; s | [ H : fn_eq ?f1 ?f2 = true |- _ ] => generalize (fn_eq_corr _ _ H) ; intros ; clear H | _ => auto end). mysimp. Qed. (** Now we can use this routine when we try to optimize [Alt]. *) Definition regexp_eq t (r1 r2:regexp t) : {r1 = r2} + {True} := match regexp_eq' r1 r2 as x return (regexp_eq' r1 r2 = x) -> {r1 = r2} + {True} with | true => fun H => left _ (inj_pairT2 _ _ _ _ _ (proj2 (regexp_eq'_corr _ _ H))) | false => fun H => right _ I end (eq_refl _). (** ** Environments for function names *) (** Function contexts map function names to a dependent pair consisting of - a pair of [result]s [(t1,t2)] - a function of type [result_m t1 -> result_m t2]. *) Definition fn_result_m(p:result*result) := result_m (fst p) -> result_m (snd p). Definition ctxt_t := list (sigT fn_result_m). Definition fn_result'(G:ctxt_t)(n:fn_name) : option (result*result)%type := match nth_error G n with | None => None | Some (existT p _) => Some p end. (** An empty set *) Inductive void : Set := . Definition fn_result_m_opt(p:option(result*result)) : Set := match p with | None => void | Some p => fn_result_m p end. (** This looks up a function in the environment, given a proof that the function name is in bounds. *) Fixpoint lookup_fn'(n:fn_name) : forall (G:ctxt_t), n < length G -> fn_result_m_opt (fn_result' G n) := match n return forall G, n < length G -> fn_result_m_opt (fn_result' G n) with | 0 => fun G => match G return 0 < length G -> fn_result_m_opt (fn_result' G 0) with | nil => fun H => match lt_n_O _ H with end | (existT p f)::_ => fun H => f end | S m => fun G => match G return (S m) < length G -> fn_result_m_opt (fn_result' G (S m)) with | nil => fun H => match lt_n_O _ H with end | _::G' => fun H => lookup_fn' G' (lt_S_n _ _ H) end end. (** In this section, we assume some context and some map for function names. *) Section FNMAP. Variable fn_ctxt : ctxt_t. (** Specialize the lookup to the parameters *) Definition fn_result := fn_result' fn_ctxt. Definition lookup_fn(n:fn_name)(H:n < length fn_ctxt) := lookup_fn' fn_ctxt H. (** A function is well-formed if all of the function names have the right types when we look them up in the [fn_ctxt]. *) Definition wf_fn t u (f:fn t u) : Prop := match f in fn u' v' with | Fn_name f t1 t2 => f < length fn_ctxt /\ fn_result f = Some (t1,t2) | _ => True end. (** A predicate for determining if a [regexp] is type-consistent with the given [fn_ctxt]. Note that by construction, the only things that can be ill-typed are the embedded [fn]s in a [Map]. *) Fixpoint wf_regexp t (r:regexp t) : Prop := match r in regexp t' with | Any => True | Char _ => True | Eps => True | Cat _ _ r1 r2 => wf_regexp r1 /\ wf_regexp r2 | Alt _ r1 r2 => wf_regexp r1 /\ wf_regexp r2 | Zero _ => True | Star _ r => wf_regexp r | Map _ _ f r => wf_regexp r /\ wf_fn f end. (** Convert a well-formed [fn] to an actual function, using the [fn_map]. *) Definition apply_fn t1 t2 (f:fn t1 t2) : wf_fn f -> result_m t1 -> result_m t2. refine (fun t1 t2 (f:fn t1 t2) => match f in fn t1' t2' return wf_fn f -> result_m t1' -> result_m t2' with | Fn_name f t1 t2 => fun H => _ | Fn_const_char c => fun _ => fun _ => c | Fn_empty_list t => fun _ => fun _ => nil | Fn_cons t => fun _ => fun p => (fst p)::(snd p) | Fn_unit_left t => fun _ => fun x => (tt,x) | Fn_unit_right t => fun _ => fun x => (x,tt) end) ; simpl in * ; destruct H ; unfold fn_result in * ; generalize (@lookup_fn f0 H). rewrite H0. auto. Defined. (** ** Denotational Semantics for [regexp] *) (** Now we can give a semantic interpretation to the regexps under a given context. *) Inductive in_regexp : forall t, regexp t -> list char_p -> (result_m t) -> Prop := | Any_i : forall c, in_regexp Any (c::nil) c | Char_i : forall c, in_regexp (Char c) (c::nil) c | Eps_i : in_regexp Eps nil tt | Alt_left_i : forall t (r1 r2:regexp t) cs (v:result_m t), in_regexp r1 cs v -> in_regexp (Alt r1 r2) cs v | Alt_right_i : forall t (r1 r2:regexp t) cs (v:result_m t), in_regexp r2 cs v -> in_regexp (Alt r1 r2) cs v | Cat_i : forall t1 t2 (r1:regexp t1) (r2:regexp t2) cs1 cs2 v1 v2, in_regexp r1 cs1 v1 -> in_regexp r2 cs2 v2 -> in_regexp (Cat r1 r2) (cs1++cs2) (v1,v2) | Star_eps_i : forall t (r:regexp t), in_regexp (Star r) nil nil | Star_cat_i : forall t (r:regexp t) cs1 cs2 v1 v2, cs1 <> nil -> in_regexp r cs1 v1 -> in_regexp (Star r) cs2 v2 -> in_regexp (Star r) (cs1++cs2) (v1::v2) | Map_i : forall t1 t2 (f:fn t1 t2) (r:regexp t1) cs v1 v2 (H:wf_fn f), apply_fn f H v1 = v2 -> in_regexp r cs v1 -> in_regexp (Map f r) cs v2. Hint Resolve Any_i Char_i Eps_i Alt_left_i Alt_right_i Cat_i Star_eps_i Star_cat_i Map_i : dfa. Notation "[[ r ]]" := (in_regexp r) (at level 0). (** Equivalence of regular expression parsers. *) Definition reg_eq t (r1 r2: regexp t) : Prop := forall cs v, [[r1]] cs v <-> [[r2]] cs v. Infix "[=]" := reg_eq (right associativity, at level 85). (** Reflexivity *) Lemma reg_eq_refl : forall t (r:regexp t), r [=] r. unfold reg_eq ; tauto. Qed. Hint Resolve reg_eq_refl : dfa. (** Transitivity *) Lemma reg_eq_trans : forall t (r1 r2 r3: regexp t), r1 [=] r2 -> r2 [=] r3 -> r1 [=] r3. unfold reg_eq ; mysimp ; generalize (H cs v) (H0 cs v) ; firstorder. Qed. (** Symmetry *) Lemma reg_eq_sym : forall t (r1 r2: regexp t), r1 [=] r2 -> r2 [=] r1. unfold reg_eq ; mysimp ; generalize (H cs v) ; firstorder. Qed. (** We define some explicit inversion principles for the [regexp] constructors so that we can invert them even when the arguments aren't variables. If we tried to use inversion otherwise, then Coq will choke because of the dependencies. *) (** An inversion principle for [Eps]. *) Lemma EpsInv : forall cs v, in_regexp Eps cs v -> cs = nil /\ v = tt. intros cs v H; inversion H ; mysimp. Qed. (** Inversion principle for [Any]. *) Lemma AnyInv : forall cs v, [[Any]] cs v -> cs = v::nil. intros cs v H ; inversion H ; mysimp. Qed. (** Inversion principle for [Char c]. *) Lemma CharInv : forall c cs v, [[Char c]] cs v -> cs = c::nil /\ v = c. intros c cs v H ; inversion H ; mysimp. Qed. (** Inversion principle for [Cat r1 r2].*) Lemma CatInv : forall t1 t2 (r1:regexp t1) (r2:regexp t2) cs v, [[Cat r1 r2]] cs v -> exists cs1, exists cs2, exists v1, exists v2, [[r1]] cs1 v1 /\ [[r2]] cs2 v2 /\ cs = cs1 ++ cs2 /\ v = (v1,v2). Proof. intros t1 t2 r1 r2 cs v H ; inversion H ; mysimp ; repeat econstructor ; auto. Qed. (** Inversion principle for [Alt r1 r2]. *) Lemma AltInv : forall t (r1 r2:regexp t) cs v, [[Alt r1 r2]] cs v -> ([[r1]] cs v \/ [[r2]] cs v). intros t r1 r2 cs v H ; inversion H ; mysimp. Qed. (** Inversion principle for [Star r]. *) Lemma StarInv : forall t (r:regexp t) cs v, [[Star r]] cs v -> (cs = nil /\ v = nil) \/ (exists cs1, exists v1, exists cs2, exists v2, cs1 <> nil /\ [[r]] cs1 v1 /\ [[Star r]] cs2 v2 /\ cs = cs1 ++ cs2 /\ v = v1::v2). Proof. intros t r cs v H ; inversion H ; mysimp ; right. exists cs1 ; econstructor ; exists cs2 ; econstructor ; eauto. Qed. (** Inversion principle for [Map f r]. *) Lemma MapInv : forall t1 t2 (f:fn t1 t2) (r:regexp t1) cs v, [[Map f r]] cs v -> exists H:wf_fn f, exists v1, in_regexp r cs v1 /\ apply_fn f H v1 = v. intros t1 t2 f r cs v H; inversion H ; mysimp. repeat econstructor ; eauto. Qed. (** Inversion principle for [Zero]. *) Lemma ZeroInv : forall t cs v, [[Zero t]] cs v -> False. intros t cs v H; inversion H. Qed. (** A little tactic that takes care of inversion for the [in_regexp] relation. *) Ltac in_inv := match goal with | [ H : [[Eps]] _ _ |- _ ] => generalize (EpsInv H) ; clear H | [ H : [[Any]] _ _ |- _ ] => generalize (AnyInv H) ; clear H | [ H : [[Char _]] _ _ |- _] => generalize (CharInv H) ; clear H | [ H : [[Cat _ _]] _ _ |- _] => generalize (CatInv H) ; clear H | [ H : [[Alt _ _]] _ _ |- _] => generalize (AltInv H) ; clear H | [ H : [[Zero _]] _ _ |- _] => contradiction (ZeroInv H) (*| [ H : [[Star _]] _ _ |- _] => generalize (StarInv H) ; clear H*) | [ H : [[Map _ _]] _ _ |- _] => generalize (MapInv H) ; clear H end ; mysimp ; subst ; auto with dfa. (** A little tactic that tries to prove [in_regexp]. *) Ltac pv_in := try in_inv ; match goal with | [ H : in_regexp ?r ?cs ?v1 |- in_regexp (Map ?f ?r) ?cs ?v2 ] => eapply (@Map_i _ _ f r cs v1 v2 I (eq_refl _) H) ; eauto with dfa | [ |- in_regexp (Map ?f ?r) ?cs ?v ] => eapply (Map_i f I (eq_refl _)) ; eauto with dfa | [ |- in_regexp (Cat _ _) (_ ++ _) (_,_) ] => apply Cat_i ; auto | [ |- in_regexp (Cat _ _) nil (_,_) ] => let H := fresh "H" in assert (H:@nil char_p= nil ++ nil) ; [auto | rewrite H] ; clear H ; apply Cat_i ; auto with dfa | [ |- in_regexp (Cat _ _) (?c :: ?x ++ ?y) _ ] => let H := fresh "H" in assert (H: c::x++y = (c::x) ++ y) ; [ auto | rewrite H] ; clear H ; apply Cat_i ; auto with dfa | _ => auto with dfa end. (** [Cat r Eps] is equivalent to [r] *) Lemma cat_eps_r : forall t (r:regexp t), (Cat r Eps) [=] (Map (Fn_unit_right t) r). Proof. unfold reg_eq ; repeat (mysimp ; pv_in). rewrite (app_nil_end cs). repeat pv_in. Qed. (** [Cat Eps r] is equivalent to [r] *) Lemma cat_eps_l : forall t (r:regexp t), (Cat Eps r) [=] (Map (Fn_unit_left t) r). unfold reg_eq ; repeat (mysimp ; pv_in). rewrite <- (app_nil_l cs). repeat pv_in. Qed. (** [Cat r Zero] is equivalent to [Zero] *) Lemma cat_zero_r : forall t1 t2 (r:regexp t1), (Cat r (Zero t2)) [=] Zero _. unfold reg_eq ; repeat (mysimp ; pv_in). Qed. (** [Cat Zero r] is equivalent to [Zero] *) Lemma cat_zero_l : forall t1 t2 (r:regexp t2), (Cat (Zero t1) r) [=] Zero _. unfold reg_eq ; repeat (mysimp ; pv_in). Qed. (** In the simple matcher code, we could fuse OptCat and OptCat_r into a single definition. Here, we broke it out to make the dependency a little simpler to reason about. *) (** Used in an optimizing constructor for [Cat]. *) Definition OptCat_r t1 t2 (r2:regexp t2) (r1:regexp t1) := match r2 in regexp t2' return regexp (pair_t t1 t2') with | Zero _ => Zero _ | Eps => Map (Fn_unit_right t1) r1 | r2 => Cat r1 r2 end. (** An optimized constructor for [Cat]. *) Definition OptCat t1 t2 (r1:regexp t1) (r2:regexp t2) := match r1 in regexp t1' return regexp (pair_t t1' t2) with | Zero _ => Zero _ | Eps => Map (Fn_unit_left t2) r2 | r1 => OptCat_r r2 r1 end. (** [OptCat r1 r2] is equivalent to [Cat r1 r2] *) Lemma opt_cat_r : forall t1 t2 (r1:regexp t1) (r2:regexp t2), OptCat_r r2 r1 [=] Cat r1 r2. Proof. destruct r2 ; simpl ; apply reg_eq_sym ; (apply reg_eq_refl || apply cat_eps_r || apply cat_zero_r). Qed. Lemma opt_cat : forall t1 t2 (r1:regexp t1) (r2:regexp t2), OptCat r1 r2 [=] Cat r1 r2. destruct r1 ; intros ; simpl ; try (apply reg_eq_refl || apply opt_cat_r) ; apply reg_eq_sym ; (apply cat_eps_l || apply cat_zero_l). Qed. (** [Alt r Zero] is equivalent to [r]. *) Lemma alt_zero_r : forall t (r:regexp t), (Alt r (Zero _)) [=] r. unfold reg_eq ; intros ; repeat (mysimp ; pv_in). Qed. (** [Alt Zero r] is equivalent to [r]. *) Lemma alt_zero_l : forall t (r:regexp t), (Alt (Zero _) r) [=] r. unfold reg_eq ; intros ; repeat (mysimp ; pv_in). Qed. (** Used in an optimizing version of [Alt]. *) Definition OptAlt_r t (r2:regexp t) : regexp t -> regexp t := match r2 in regexp t' return regexp t' -> regexp t' with | Zero _ => fun r => r | r => fun r1 => if regexp_eq r r1 then r else Alt r1 r end. (** Optimized version of [Alt]. *) Definition OptAlt t (r1:regexp t) : regexp t -> regexp t := match r1 in regexp t' return regexp t' -> regexp t' with | Zero _ => fun r => r | r => fun r2 => OptAlt_r r2 r end. (** [Alt r r] is equivalent to [r] *) Lemma alt_refl t (r:regexp t) : Alt r r [=] r. Proof. unfold reg_eq ; repeat (mysimp ; pv_in). Qed. (** [OptAlt r1 r2] is equivalent to [Alt r1 r2] *) Lemma opt_alt_r t (r1 r2:regexp t) : OptAlt_r r2 r1 [=] Alt r1 r2. destruct r2 ; simpl ; repeat match goal with | [ |- context[regexp_eq ?r1 ?r2] ] => destruct (regexp_eq r1 r2) ; subst | [ |- ?r [=] Alt ?r ?r ] => apply reg_eq_sym ; apply alt_refl | [ |- ?r [=] ?r ] => apply reg_eq_refl | [ |- ?r [=] Alt ?r (Zero _) ] => apply reg_eq_sym ; apply alt_zero_r | _ => idtac end. Qed. Lemma opt_alt : forall t1 (r1 r2: regexp t1), OptAlt r1 r2 [=] Alt r1 r2. destruct r1 ; simpl ; intros ; try (apply opt_alt_r) ; apply reg_eq_sym ; apply alt_zero_l. Qed. (** Optimizing version of [Map]. *) Definition OptMap (t u: result) (f: fn t u) (r1: regexp t) : regexp u := match r1 in regexp t' return regexp u with | Zero _ => Zero _ | _ => Map f r1 end. (** [OptMap f r] is equivalent to [Map f r]. *) Lemma opt_map: forall (t u: result) (f: fn t u) (r1: regexp t), OptMap f r1 [=] Map f r1. unfold reg_eq ; destruct r1 ; simpl ; repeat (mysimp ; pv_in). Qed. (** Now we define what it means for a function to be a valid parser. A function [f: regexp t -> list char_p -> list (result_m t)], is a valid parser if, when given a [regexp] and a string of characters [s], it returns a list of values [[v1,...,vn]], such that { (s,vi) } is the relation denoted by the regular expression. *) Definition is_parser (f: forall t (r:regexp t), wf_regexp r -> list char_p -> list (result_m t)) : Prop := forall t (r: regexp t) (H:wf_regexp r) (cs: list char_p) (v:result_m t), ([[r]] cs v -> In v (f t r H cs)) /\ (In v (f t r H cs) -> [[r]] cs v). (** -----------------------------------------------------*) (** ** Now we define the actual derivative-based parser. *) (** -----------------------------------------------------*) (** Returns a regexp denoting { (null,v) | (null,v) in r }. *) Fixpoint null t (r:regexp t) : regexp t := match r in regexp t' return regexp t' with | Any => Zero _ | Char _ => Zero _ | Eps => Eps | Zero _ => Zero _ | Alt t r1 r2 => OptAlt (null r1) (null r2) | Cat t1 t2 r1 r2 => OptCat (null r1) (null r2) | Star t _ => OptMap (Fn_empty_list t) Eps | Map _ _ f r1 => OptMap f (null r1) end. (** This is the heart of the algorithm. It returns a regexp denoting { (cs,v) | (c::cs,v) in r }. *) Fixpoint deriv t (r:regexp t) (c:char_p) : regexp t := match r in regexp t' return regexp t' with | Any => OptMap (Fn_const_char c) Eps | Char c' => if char_eq c c' then OptMap (Fn_const_char c) Eps else Zero _ | Eps => Zero _ | Zero _ => Zero _ | Alt t r1 r2 => OptAlt (deriv r1 c) (deriv r2 c) | Cat t1 t2 r1 r2 => OptAlt (OptCat (deriv r1 c) r2) (OptCat (null r1) (deriv r2 c)) | Star t r => OptMap (Fn_cons t) (OptCat (deriv r c) (Star r)) | Map _ _ f r1 => OptMap f (deriv r1 c) end. (** When we are done parsing using a [regexp] r, we are left with a [regexp] denoting { (null,v) | exists s.(s,v) in r }. This function computes all of the values [v] in this set. To do so, it needs to be well-formed with respect to the context. *) Definition apply_null t (r:regexp t) : wf_regexp r -> list (result_m t). refine ( fix apply_null t (r:regexp t) : wf_regexp r -> list (result_m t) := match r in regexp t' return wf_regexp r -> list (result_m t') with | Any => fun H => nil | Char _ => fun H => nil | Zero _ => fun H => nil | Eps => fun H => tt::nil | Star _ r => fun H => nil::nil | Alt _ r1 r2 => fun H => (apply_null _ r1 _) ++ (apply_null _ r2 _) | Cat _ _ r1 r2 => fun H => let res1 := apply_null _ r1 _ in let res2 := apply_null _ r2 _ in fold_right (fun v1 a => (map (fun v2 => (v1,v2)) res2) ++ a) nil res1 | Map _ _ f r1 => fun H => map (apply_fn f _) (apply_null _ r1 _) end ) ; mysimp. Defined. Lemma InConcatMap A B (x:A) (y:B) : forall xs, In x xs -> forall ys, In y ys -> In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs). Proof. induction xs ; intro H ; [ contradiction H | destruct H ] ; s ; apply in_or_app ; [ idtac | firstorder ]. left ; clear IHxs ; induction ys ; [ contradiction H0 | destruct H0 ; s ]. Qed. (** Show that if [(nil,v)] is in [r] then [apply_null r] returns [v] as a result *) Lemma ApplyNull1 t (r:regexp t) cs v H : [[r]] cs v -> cs = nil -> In v (apply_null r H). Proof. induction 1 ; mysimp ; try congruence ; repeat match goal with | [ H : ?x ++ ?y = nil |- _ ] => generalize (app_eq_nil _ _ H) ; clear H ; mysimp | [ |- In _ (fold_right _ nil _) ] => apply InConcatMap ; auto | [ H : wf_regexp ?r, IH : forall _ : wf_regexp ?r, ?cs = nil -> _, H1 : ?cs = nil |- _ ] => let l := fresh "l" in generalize (IH H H1) ; generalize (apply_null r H) as l ; induction l ; s ; left | [ |- apply_fn ?f ?H1 _ = apply_fn ?f ?H2 _] => rewrite (proof_irrelevance H1 H2) ; auto | [ H : None = Some _ |- _ ] => congruence | [ |- In _ (_ ++ _) ] => apply in_or_app ; auto end. Qed. Lemma InConcatMap1 A B (x:A) (y:B) xs ys : In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs) -> In x xs. Proof. induction xs ; mysimp ; repeat match goal with | [ H : In _ _ |- _ ] => destruct (in_app_or _ _ _ H) ; [left | right ; eauto] | [ H : In (?x, _) (map _ ?ys) |- _ ] => let l := fresh "l" in generalize H ; generalize ys as l; induction l ; mysimp ; firstorder ; congruence end. Qed. Lemma InConcatMap2 A B (x:A) (y:B) xs ys : In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs) -> In y ys. Proof. induction xs ; mysimp ; firstorder ; match goal with | [ H : In _ (map _ ?ys ++ _) |- _ ] => let H0 := fresh "H" in let l := fresh "l" in generalize (in_app_or _ _ _ H) ; intro H0 ; destruct H0 ; auto ; generalize H0 ; generalize ys as l ; induction l ; mysimp ; left ; congruence end. Qed. (** Show that if [apply_null r] returns [v] as a result, then [(nil,v)] is in [r] *) Lemma ApplyNull2 t (r:regexp t) H v : In v (apply_null r H) -> [[r]] nil v. Proof. induction r ; s ; try contradiction ; auto with dfa ; match goal with | [ IHr1 : _, IHr2 : _, v: prod _ _ |- in_regexp (Cat _ _) _ _] => destruct v ; pv_in ; [ eapply IHr1 ; eauto ; eapply InConcatMap1 ; eauto | eapply IHr2 ; eauto ; eapply InConcatMap2 ; eauto] | [ IHr1:_, IHr2:_, H0 : In _ (_ ++ _) |- in_regexp (Alt _ _) _ _ ] => generalize (in_app_or _ _ _ H0) ; clear H0 ; intro H0 ; destruct H0 ; [ apply Alt_left_i ; eapply IHr1 ; eauto | eapply Alt_right_i ; eapply IHr2 ; eauto ] | [ IHr: forall _, _, H:wf_regexp ?r, H0: In _ (map _ _) |- in_regexp (Map _ _) _ _ ] => let l := fresh "l" in generalize (apply_null r H) (IHr H) H0 ; intro l ; induction l ; clear IHr H0 ; mysimp ; try tauto ; eapply Map_i ; eauto end. Qed. (** Show that the optimizing constructors preserve well-formedness. *) Lemma wf_cat_opt : forall t1 t2 (r1:regexp t1) (r2:regexp t2), wf_regexp r1 -> wf_regexp r2 -> wf_regexp (OptCat r1 r2). Proof. destruct r1 ; mysimp ; destruct r2 ; mysimp. Qed. Lemma wf_alt_opt : forall t (r1 r2:regexp t), wf_regexp r1 -> wf_regexp r2 -> wf_regexp (OptAlt r1 r2). Proof. dependent destruction r1 ; mysimp ; dependent destruction r2 ; mysimp ; match goal with | [ |- context[regexp_eq ?r1 ?r2] ] => destruct (regexp_eq r1 r2) ; auto end ; mysimp. Qed. Lemma wf_map_opt : forall t u (f: fn t u) (r1: regexp t), wf_regexp r1 -> wf_fn f -> wf_regexp (OptMap f r1). Proof. destruct r1; mysimp. Qed. (** Show that [null r] is well-formed if [r] is well-formed. *) Lemma wf_null : forall t (r:regexp t), wf_regexp r -> wf_regexp (null r). induction r ; mysimp ; (apply wf_cat_opt || apply wf_alt_opt || apply wf_map_opt) ; auto. Qed. (** Show that [deriv r c] is well-formed if [r] is well-formed. *) Lemma wf_deriv : forall t (r:regexp t) c, wf_regexp r -> wf_regexp (deriv r c). Proof. induction r; mysimp ; repeat (apply wf_cat_opt || apply wf_alt_opt || apply wf_null || apply wf_map_opt) ; simpl ; auto. Qed. Fixpoint deriv_parse' t (r:regexp t) (cs:list char_p) : regexp t := match cs with | nil => r | c::cs' => deriv_parse' (deriv r c) cs' end. Lemma wf_derivs : forall cs t (r:regexp t), wf_regexp r -> wf_regexp (deriv_parse' r cs). induction cs ; simpl ; intros ; auto. apply IHcs. apply wf_deriv. auto. Qed. (** Finally, the derivative-based parser simply runs through the input, calculating derivatives based on the characters it sees, and calls [apply_null] when it gets to the end of the input. *) Definition deriv_parse t (r:regexp t) (H:wf_regexp r) cs : list (result_m t) := apply_null (deriv_parse' r cs) (wf_derivs cs r H). (** Tactic for helping to reason about the optimizing constructors. *) Ltac pv_opt := match goal with | [ |- in_regexp (OptAlt ?r1 ?r2) ?cs ?v ] => apply (proj2 (opt_alt r1 r2 cs v)) | [ |- in_regexp (OptCat ?r1 ?r2) ?cs ?v ] => apply (proj2 (opt_cat r1 r2 cs v)) | [ |- in_regexp (OptMap ?f ?r) ?cs ?v ] => apply (proj2 (opt_map f r cs v)) ; try (eapply Map_i ; eauto ; fail) | [ H : ?x ++ ?y = nil |- _] => generalize (app_eq_nil _ _ H) ; clear H ; mysimp | [ H : nil = ?x ++ ?y |- _] => generalize (app_eq_nil _ _ (eq_sym H)) ; clear H ; mysimp | [ H : in_regexp (OptCat ?r1 ?r2) ?cs ?v |- _] => generalize (proj1 (opt_cat r1 r2 cs v) H) ; clear H ; intro H | [ H : in_regexp (OptAlt ?r1 ?r2) ?cs ?v |- _] => generalize (proj1 (opt_alt r1 r2 cs v) H) ; clear H ; intro H | [ H : in_regexp (OptMap ?f ?r) ?cs ?v |- _] => generalize (proj1 (opt_map f r cs v) H) ; clear H ; mysimp | [ v : prod (result_m _) (result_m _) |- _ ] => destruct v | [ H : (_,_) = (_,_) |- _ ] => injection H ; clear H ; mysimp | _ => auto with dfa end. (** [apply_null] is correct. *) Lemma Null1 : forall t (r:regexp t) cs v, [[r]] cs v -> cs = nil -> [[null r]] cs v. Proof. induction 1 ; mysimp ; try congruence ; auto with dfa ; repeat pv_opt ; pv_in ; try congruence. Qed. Lemma Null2 : forall t (r:regexp t) cs v, [[null r]] cs v -> cs = nil -> [[r]] cs v. Proof. induction r ; simpl ; intros ; try in_inv ; (repeat (pv_opt ; subst ; pv_in)) ; try apply nil ; eapply @Map_i ; eauto. Qed. (** [deriv] is correct part 1. *) Lemma Deriv1 : forall t (r:regexp t) c cs v, [[r]] (c::cs) v -> [[deriv r c]] cs v. Proof. induction r ; simpl ; intros ; try in_inv ; match goal with | [ H : in_regexp (Star _) _ _ |- _] => generalize (StarInv H); clear H ; mysimp ; subst | _ => idtac end ; try congruence ; repeat (pv_opt ; match goal with | [ _ : (?c :: ?cs) = (?x ++ _) |- in_regexp (Alt _ _) ?cs _] => destruct x ; s ; [eapply Alt_right_i | eapply Alt_left_i] ; pv_opt ; pv_opt | [ H : in_regexp ?r1 nil _ |- in_regexp (Cat (null ?r1) _) ?cs _ ] => let H := fresh "H" in assert (H:cs=nil++cs) ; [ auto | rewrite H ] ; pv_in ; apply Null1 ; auto | [ |- in_regexp (Map (Fn_cons ?t) ?r) ?cs (?x1::?x2) ] => apply (@Map_i _ _ (Fn_cons t) r cs (x1,x2) (x0::x2) I (eq_refl _)) | [ |- in_regexp (Map (Fn_const_char ?v) Eps) nil ?v ] => apply (@Map_i _ _ (Fn_const_char v) Eps nil tt v I (eq_refl _)) | [ H1: ?x <> nil, H2: _::?cs = ?x ++ nil |- _] => rewrite app_nil_r in H2 ; subst ; rewrite (app_nil_end cs) ; pv_opt ; pv_in ; pv_in | [ H2 : _::?cs = ?x ++ ?x1 |- _ ] => destruct x ; try congruence ; simpl in H2 ; injection H2 ; s ; apply Cat_i ; auto with dfa | _ => s ; auto with dfa end). Qed. (** If [null r] matches [cs] returning [v], then [cs] must be empty *) Lemma NullNil : forall t (r:regexp t) cs v, [[null r]] cs v -> cs = nil. Proof. induction r ; mysimp ; try in_inv ; pv_opt ; try in_inv ; repeat match goal with | [ IHr : forall cs v, in_regexp (null ?r) _ _ -> _ = _, H : in_regexp (null ?r) _ _ |- _] => rewrite (IHr _ _ H) ; clear IHr ; auto end. Qed. (** [deriv] is correct part 2. *) Lemma Deriv2 : forall t (r:regexp t) c cs v, [[deriv r c]] cs v -> [[r]] (c::cs) v. Proof. induction r ; simpl ; intros ; repeat (match goal with | [ H : context[char_eq ?c1 ?c2] |- _ ] => destruct (char_eq c1 c2) ; s | [ H : in_regexp (null _) _ _ |- in_regexp (Cat _ _) (?c::?x++?x0) _ ] => let H1 := fresh "H" in generalize (NullNil _ H) ; s ; assert (H1:c::x0 = nil ++ (c::x0)) ; [auto | rewrite H1] ; pv_in ; apply Null2 ; auto | [ H: in_regexp (deriv ?r1 _) ?x ?x1 |- in_regexp (Cat ?r1 _) (_::?x++_) (?x1,_) ] => pv_in | [ |- in_regexp (Star _) (?c :: ?x1 ++ ?x2) _ ] => let H := fresh "H" in assert (H:c :: x1 ++ x2 = (c::x1) ++ x2) ; [ auto | rewrite H ] ; eapply Star_cat_i ; eauto ; congruence | [ |- in_regexp (Map _ _) _ (apply_fn _ _ _) ] => eapply Map_i ; eauto | [ H : in_regexp (Star _) _ _ |- _ ] => fail 1 | _ => pv_opt ; in_inv end). Qed. (** First half of correctness for [deriv_parse]. *) Lemma Deriv'1 cs t (r:regexp t) v : [[deriv_parse' r cs]] nil v -> [[r]] cs v. Proof. induction cs ; mysimp. apply Deriv2. apply IHcs. auto. Qed. Lemma Deriv'2 cs t (r:regexp t) v : [[r]] cs v -> [[deriv_parse' r cs]] nil v. Proof. induction cs ; mysimp. apply IHcs. apply Deriv1 ; auto. Qed. Lemma DerivParse1 cs t (r:regexp t) H vs : deriv_parse r H cs = vs -> forall v, In v vs -> [[r]] cs v. Proof. unfold deriv_parse. intros. subst. generalize (ApplyNull2 _ _ _ H1). apply Deriv'1. Qed. (** Second half of correctness for [deriv_parse]. *) Lemma DerivParse2 cs t (r:regexp t) v H : [[r]] cs v -> In v (deriv_parse r H cs). Proof. unfold deriv_parse. intros. eapply ApplyNull1 ; auto. eapply Deriv'2. auto. Qed. (** [deriv_parse] is correct *) Theorem DerivParse_is_parser : is_parser deriv_parse. Proof. unfold is_parser ; mysimp ; [apply DerivParse2 | eapply DerivParse1] ; eauto. Qed. End FNMAP. (** ** Now we need to translate our external representation, [parser] to our internal representation [regexp] and build an appropriate [ctxt] mapping function names to functions of the right type. *) (** Add a new function to the end of the context and return its position as a "fresh" function name, along with the new context. *) Definition extend_state(s:ctxt_t) t1 t2 (f:fn_result_m(t1,t2)) : (fn_name * ctxt_t) := (length s, s ++ (existT fn_result_m (t1,t2) f)::nil). (** Convert a parser with inlined functions to a regexp and a function map. *) Fixpoint par2reg t (p:parser t)(s:ctxt_t) : (regexp t) * ctxt_t := match p in parser t return (regexp t) * ctxt_t with | Any_p => (Any, s) | Char_p b => (Char b, s) | Eps_p => (Eps, s) | Cat_p t1 t2 p1 p2 => let (r1,s1) := par2reg p1 s in let (r2,s2) := par2reg p2 s1 in (Cat r1 r2, s2) | Zero_p t => (Zero t, s) | Alt_p t p1 p2 => let (r1,s1) := par2reg p1 s in let (r2,s2) := par2reg p2 s1 in (Alt r1 r2, s2) | Star_p t p1 => let (r1,s1) := par2reg p1 s in (Star r1, s1) | Map_p t1 t2 f p => let (r,s1) := par2reg p s in let (n,s2) := extend_state s1 f in (@Map t1 t2 (Fn_name n t1 t2) r, s2) end. (** Initial state for the translation. *) Definition initial_ctxt : ctxt_t := nil. (** Top-level translation of parsers to regexps. *) Definition parser2regexp t (p:parser t) : (regexp t) * ctxt_t := par2reg p initial_ctxt. (** ** Now we need to prove that the translation [parser2regexp] preserves meaning. *) (** Tactic to propagate information about the translation. *) Ltac unfold_par2reg := match goal with | [ |- context[par2reg ?p ?s] ] => let H := fresh "H" in let x := fresh "x" in let r := fresh "r" in let s := fresh "s" in assert (H : exists x, x = par2reg p s) ; [eauto | idtac] ; destruct H as [x H]; rewrite <- H in * ; destruct x as [r s] end. (** Define a partial order on contexts, which the translation respects. That is, the input context is always less than the output context. *) Definition ctxt_leq(c1 c2:ctxt_t) : Prop := exists c:ctxt_t, c2 = c1 ++ c. Lemma ctxt_leq_trans : forall c1 c2 c3, ctxt_leq c1 c2 -> ctxt_leq c2 c3 -> ctxt_leq c1 c3. Proof. unfold ctxt_leq ; s. rewrite app_ass. eauto. Qed. Lemma ctxt_leq_refl : forall c, ctxt_leq c c. unfold ctxt_leq ; s. exists nil. apply app_nil_end. Qed. Hint Resolve ctxt_leq_refl : dfa. Lemma ctxt_ext : forall c c', ctxt_leq c (c ++ c'). unfold ctxt_leq ; s. eauto. Qed. Hint Resolve ctxt_ext : dfa. (** Simplify reasoning about inductive hypotheses that depend upon a context. *) Ltac p2rsimp := match goal with | [ |- context[length(?x ++ ?y)] ] => rewrite (@app_length _ x y) | [ IH : forall _:ctxt_t, _, H : _ = par2reg ?p ?s |- _ ] => generalize (IH s) ; match goal with | [ |- context[par2reg ?p ?s] ] => let H' := fresh "H" in generalize (IH s) ; clear IH ; rewrite <- H ; simpl ; intro H' | _ => fail end | _ => idtac end. (** The translation only results in a greater or equal ctxt *) Lemma p2r_extends : forall t (p:parser t) (c:ctxt_t), ctxt_leq c (snd (par2reg p c)). Proof. unfold ctxt_leq ; induction p ; simpl ; unfold extend_state ; intros ; repeat unfold_par2reg ; simpl ; try (exists nil ; mysimp ; fail) ; repeat p2rsimp ; s ; try rewrite app_ass ; econstructor ; eauto. Qed. (** Functions stay well-typed under greater or equal contexts. *) Lemma extends_wf_fn : forall t1 t2 (f:fn t1 t2) c c', ctxt_leq c c' -> wf_fn c f -> wf_fn c' f. Proof. unfold ctxt_leq ; s ; destruct f ; auto ; simpl in * ; generalize c H0 ; clear c H0 ; unfold fn_result, fn_result' in * ; induction f ; destruct c ; [ mysimp | mysimp | mysimp | intro ] ; try congruence ; simpl in *. firstorder. destruct H0. generalize (IHf _ (conj (lt_S_n _ _ H) H0)). mysimp. firstorder. Qed. (** Regexps stay well-typed under greater or equal contexts. *) Lemma extends_wf_regexp : forall t (r:regexp t) c c', ctxt_leq c c' -> wf_regexp c r -> wf_regexp c' r. Proof. induction r ; simpl ; s ; firstorder. eapply extends_wf_fn ; eauto. econstructor ; eauto. Qed. (** Tactic for remembering a translated sub-term is well-formed. *) Ltac p2rext := match goal with | [ H : (?r,?s2) = par2reg ?p ?s |- _ ] => generalize (p2r_extends p s) ; rewrite <- H ; simpl ; intro ; clear H end. Lemma FnResult : forall s p r, fn_result (s ++ existT fn_result_m p r :: nil) (length s) = Some p. Proof. induction s ; mysimp ; firstorder. Qed. Hint Resolve FnResult. (** The translation results in a well-formed regexp. *) Lemma p2r_wf : forall t (p:parser t) c, wf_regexp (snd (par2reg p c)) (fst (par2reg p c)). Proof. induction p ; mysimp ; repeat unfold_par2reg ; repeat p2rsimp ; mysimp ; p2rext ; try (eapply (@extends_wf_regexp _ _ s); eauto) ; try econstructor ; eauto with arith. Qed. (** Applying the same function in an extended environment yields the same result. *) Lemma extend_apply : forall x t1 t2 (f:fn t1 t2) s (H:wf_fn s f) v1 H', apply_fn (s ++ x) f H' v1 = apply_fn s f H v1. Proof. destruct f ; auto. simpl. mysimp. generalize f l0 e0 l e ; clear f l0 e0 l e. induction s ; destruct f ; mysimp ; firstorder ; unfold fn_result, fn_result' in * |- ; simpl in * ; try congruence. destruct a. rewrite (proof_irrelevance e0 e) in *. auto. Qed. Hint Resolve extend_apply : dfa. (** [in_regexp] respects weakening on states. *) Lemma in_leq : forall t (r:regexp t) s cs v, in_regexp s r cs v -> forall s', ctxt_leq s s' -> in_regexp s' r cs v. Proof. induction 1 ; repeat (s ; firstorder) ; try ((constructor ; auto with dfa ; fail) || (apply Alt_right_i ; auto with dfa)). apply (Map_i(v1:=v1) f (@extends_wf_fn t1 t2 f _ _ (ctxt_ext s x) H)) ; auto with dfa. Qed. (** Newly-allocated functions are well-formed with respect to the resulting context. *) Lemma new_fn_wf : forall t1 t2 f s, wf_fn (s ++ existT fn_result_m (t1,t2) f :: nil) (Fn_name (length s) t1 t2). Proof. induction s; mysimp ; firstorder. Qed. Hint Resolve new_fn_wf : dfa. (** If [(cs,v)] is in [[p]] then [(cs,v)] is in [[r]] under the output context [s]. *) Lemma p2r_ok : forall t (p:parser t) cs v, in_parser p cs v -> forall s, in_regexp (snd (par2reg p s)) (fst (par2reg p s)) cs v. Proof. induction 1 ; simpl ; intro s ; try (constructor ; fail) ; repeat unfold_par2reg ; simpl in * ; repeat p2rsimp ; repeat p2rext ; intros ; try (econstructor ; eauto ; eapply in_leq ; eauto ; fail) || (apply Alt_right_i ; auto). apply (Map_i(v1:=v) (Fn_name (length s0) t1 t2) (new_fn_wf f s0)). generalize s0 (new_fn_wf f s0). induction s1 ; mysimp ; unfold fn_result, fn_result' in * ; simpl in *. rewrite (proof_irrelevance e (eq_refl _)). auto. generalize (IHs1 (conj (lt_S_n _ _ l) e)). clear IHs1. auto. eapply in_leq ; eauto. auto with dfa. Qed. (** If [(cs,v)] is in [[r]] under context [s2], and [r] is well-formed with respect to context [s1], and [s1 <= s2], then [(cs,v)] is in [[r]] under context [s1]. *) Lemma extends_in : forall t (r:regexp t) cs v s1 s2, in_regexp s2 r cs v -> wf_regexp s1 r -> ctxt_leq s1 s2 -> in_regexp s1 r cs v. Proof. induction 1 ; simpl ; intros ; mysimp ; (econstructor ; eauto ; fail) || (eapply Alt_right_i ; eauto) || (apply (@Map_i s1 t1 t2 f r cs v1 v2 H4)) ; auto ; s. destruct H3. subst. symmetry. eapply extend_apply. Qed. Hint Resolve extends_in : dfa. (** Used for induction over proofs of [(cs,v)] in [Star r]. *) Fixpoint in_unwind s t (r:regexp t) n cs (v:result_m (list_t t)) : Prop := match n with | 0 => cs = nil /\ v = nil | S m => exists cs1, exists cs2, exists v1, exists v2, cs = cs1 ++ cs2 /\ v = (v1::v2) /\ cs1 <> nil /\ in_regexp s r cs1 v1 /\ in_unwind s r m cs2 v2 end. Definition coerce_reg t1 t2 (r:regexp t1) : t1 = t2 -> regexp t2. intros. rewrite H in r. apply r. Defined. Definition coerce_val t1 t2 (v:result_m t1) : t1 = t2 -> result_m t2. intros. rewrite H in v. apply v. Defined. (** If [(cs,v)] is in [Star r] then there exists some [n] such that [(cs,v)] is in the nth unwinding of [r]. This gives us an easy inner induction. *) Lemma star_rep : forall s t (r:regexp t) cs (v:result_m t), in_regexp s r cs v -> forall t1 (r1 : regexp t1) (H: t = list_t t1), coerce_reg r H = Star r1 -> exists n, in_unwind s r1 n cs (coerce_val v H). Proof. unfold coerce_reg, coerce_val; induction 1 ; mysimp ; s ; try discriminate ; repeat match goal with | [ H : list_t _ = list_t _ |- _] => injection H ; intros ; subst ; rewrite (proof_irrelevance H (eq_refl _)) in * ; simpl in * | [ H : Star _ = Star _ |- _ ] => injection H ; mysimp | _ => idtac end ; [ exists 0 ; mysimp | idtac]. destruct (IHin_regexp2 t1 r1 (eq_refl _) (eq_refl _)) ; exists (S x) ; repeat econstructor ; mysimp. Qed. (** If [(cs,v)] is in [r] under context [s], where [(r,s) = par2reg p], then [(cs,v)] is in [p]. *) Lemma r2p_ok : forall t (p:parser t) s cs v, in_regexp (snd (par2reg p s)) (fst (par2reg p s)) cs v -> in_parser p cs v. Proof. induction p ; simpl ; intros s cs v ; try (repeat unfold_par2reg) ; simpl in * ; intros ; (generalize (AnyInv H) || generalize (CharInv H) || generalize (EpsInv H) || generalize (CatInv H1) || generalize (AltInv H1) || generalize (MapInv H0) || generalize (ZeroInv H) || idtac) ; s ; try (econstructor ; eauto ; fail) ; repeat match goal with | [ IH : forall _ _ _, in_regexp (snd (par2reg ?p _)) _ _ _ -> _, H : _ = par2reg ?p ?s |- _ ] => generalize (IH s) ; generalize (p2r_extends p s) ; generalize (p2r_wf p s) ; rewrite <- H ; simpl ; clear IH ; intros end ; try contradiction || (econstructor ; eauto with dfa ; fail) || (eapply Alt_right_pi ; eauto with dfa). generalize (star_rep H0 (eq_refl _) (eq_refl _)). mysimp. clear H0. subst. generalize cs v H4. clear cs v H4. induction x ; s ; constructor ; auto. match goal with | [ |- in_parser (Map_p _ r _) _ (?f x) ] => assert (f = r) end. generalize s0 e l. induction s1 ; unfold value, error, fn_result, fn_result' ; mysimp. rewrite (proof_irrelevance e0 (eq_refl _)). auto. rewrite H5. apply (Map_pi _ r). apply H4. apply (@extends_in _ r0 cs x s0 (snd (extend_state s0 r)) H1 H2). unfold extend_state. simpl. auto with dfa. Qed. (** The translation preserves meaning *) Theorem parser2regexp_equiv : forall t (p:parser t) cs v, in_parser p cs v <-> in_regexp (snd (parser2regexp p)) (fst (parser2regexp p)) cs v. Proof. unfold parser2regexp ; mysimp ; firstorder ; [eapply p2r_ok | eapply r2p_ok] ; eauto. Qed. (** Finally -- convert the parser to a regexp and then run the derivative-based parser on this. *) Definition parse t (p:parser t) : list char_p -> list (result_m t) := deriv_parse (snd (parser2regexp p)) (fst (parser2regexp p)) (p2r_wf p _). Theorem parse_correct : forall t (p:parser t) cs v, in_parser p cs v <-> In v (parse p cs). Proof. intros t p cs v ; generalize (DerivParse_is_parser (snd (parser2regexp p)) (fst (parser2regexp p)) (p2r_wf p initial_ctxt) cs v) ; intro H ; destruct H ; generalize (parser2regexp_equiv p cs v) ; intro H1 ; destruct H1 ; split ; auto. Qed. End PARSER.