(* Copyright (c) 2011, Greg Morrisett and Joe Tassarotti *) (** * Regular expression matcher based on derivatives, inspired by the paper of Owens, Reppy, and Turon. *) Require Import Coq.Program.Equality. Require Import Coq.Init.Logic. Require Import List. Require Import Arith. Require Import Eqdep. Unset Automatic Introduction. Set Implicit Arguments. Axiom proof_irrelevance : forall (P:Prop) (H1 H2:P), H1 = H2. (** ** Argument to the module that builds the parser *) Module Type PARSER_ARG. (** We parameterize the development over a set of characters *) Parameter char_p : Set. Parameter char_eq : forall (c1 c2:char_p), {c1=c2} + {c1<>c2}. End PARSER_ARG. (** ** The Parser functor *) Module PARSER(PA : PARSER_ARG). Import PA. (** ** Constructors for regular expression parsers. *) (** The parsers are indexed by the return type, and we've added a new kind of parser [Map_p] which is used to transform the result of one parser to another. *) Inductive parser : Set -> Type := | Any_p : parser char_p | Char_p : char_p -> parser char_p | Eps_p : parser unit | Cat_p : forall (t1 t2:Set) (r1:parser t1) (r2:parser t2), parser (t1 * t2) | Zero_p : forall (t:Set), parser t | Alt_p : forall (t:Set) (r1 r2:parser t), parser t | Star_p : forall (t:Set), parser t -> parser (list t) | Map_p : forall (t1 t2:Set), (t1 -> t2) -> parser t1 -> parser t2. (** ** Denotational semantics for parsers *) (** The semantics relates input strings (as lists of characters) to values. *) Inductive in_parser : forall t, parser t -> list char_p -> t -> Prop := | Any_pi : forall c, in_parser Any_p (c::nil) c | Char_pi : forall c, in_parser (Char_p c) (c::nil) c | Eps_pi : in_parser Eps_p nil tt | Alt_left_pi : forall t (p1 p2:parser t) cs (v:t), in_parser p1 cs v -> in_parser (Alt_p p1 p2) cs v | Alt_right_pi : forall t (p1 p2:parser t) cs (v:t), in_parser p2 cs v -> in_parser (Alt_p p1 p2) cs v | Cat_pi : forall t1 t2 (p1:parser t1) (p2:parser t2) cs1 cs2 v1 v2, in_parser p1 cs1 v1 -> in_parser p2 cs2 v2 -> in_parser (Cat_p p1 p2) (cs1 ++ cs2) (v1,v2) | Star_eps_pi : forall t (p:parser t), in_parser (Star_p p) nil nil | Star_cat_pi : forall t (p:parser t) cs1 cs2 v1 v2, cs1 <> nil -> in_parser p cs1 v1 -> in_parser (Star_p p) cs2 v2 -> in_parser (Star_p p) (cs1 ++ cs2) (v1::v2) | Map_pi : forall (t1 t2:Set) (f:t1 -> t2) (p:parser t1) cs v, in_parser p cs v -> in_parser (Map_p f p) cs (f v). (** Note that for [Star_cat_pi] we require [cs1] to be non-empty -- this is crucial for ensuring that any regular expression matches a string and returns a finite list of associated values. Otherwise, something like [Star Eps] would take the empty string to an infinite set of possible values. However, note that (ignoring possible return values) the proof rules above are equivalent (i.e., will accept the same strings) as one where we allow [cs1] to be empty. *) (** ** Internal Representation of Parsers: [regexp]s *) (** Internally, we translate parsers to a representation called [regexp] where all of the functions are replaced with a function name that we can look up in an environment. This will allow us to put together a decidable equality on regular expressions, which will in turn, allow us to hash-cons them (though we don't take advantage of this yet.) *) (** We'll represent function names as positions in an environment list. *) Definition fn_name := nat. Definition fn_name_eq := eq_nat_dec. (** In addition to user-level functions, we need a few built-in functions that are used during optimization. *) Inductive fn : Set -> Set -> Type := | Fn_name : forall (f:fn_name) t1 t2, fn t1 t2 | Fn_const_char : forall (c:char_p), fn unit char_p (* \x:unit.c *) | Fn_empty_list : forall (t:Set), fn unit (list t) (* \x:unit.@nil t *) | Fn_cons : forall (t:Set), fn (t * (list t)) (list t) (* \x:(t*list t).(fst x)::(snd x) *) | Fn_unit_left : forall (t:Set), fn t (unit * t) (* \x:t.(tt,x) *) | Fn_unit_right : forall (t:Set), fn t (t * unit) (* \x:t.(x,tt) *) . (** Finally, a [regexp] is just like a [parser] except that the [Map] constructor takes a [fn] instead of an actual function. Note that this syntax doesn't preclude us from using a function name with the wrong type -- we'll capture this constraint later on. *) Inductive regexp : Set -> Type := | Any : regexp char_p | Char : char_p -> regexp char_p | Eps : regexp unit | Cat : forall (t1 t2:Set), regexp t1 -> regexp t2 -> regexp (t1 * t2) | Alt : forall (t:Set), regexp t -> regexp t -> regexp t | Zero : forall (t:Set), regexp t | Star : forall (t:Set), regexp t -> regexp (list t) | Map : forall (t u:Set) (f:fn t u), regexp t -> regexp u. (** A simplification tactic used through the development *) Ltac mysimp := simpl in * ; intros ; repeat match goal with | [ |- context[char_eq ?x ?y] ] => destruct (char_eq x y) ; auto | [ |- _ /\ _ ] => split | [ H : existT ?f ?t ?x = existT ?f ?t ?y |- _ ] => generalize (inj_pairT2 _ f t x y H) ; clear H ; intro H ; subst | [ H : _ /\ _ |- _ ] => destruct H | [ |- context[ _ ++ nil ] ] => rewrite <- app_nil_end | [ H : exists x, _ |- _ ] => destruct H | [ H : _ \/ _ |- _] => destruct H | [ H : _ <-> _ |- _] => destruct H | [ |- _ <-> _ ] => split | [ H : _::_ = _::_ |- _] => injection H ; clear H | _ => idtac end ; try (firstorder) ; auto. (** Simplificaton followed by substitution. *) Ltac s := repeat (mysimp ; subst). (** ** Environments for function names *) (** Function contexts map function names to a dependent pair consisting of - a pair of [Set]s [(t1,t2)] - a function of type [t1 -> t2]. *) Definition fn_result_m(p:Set*Set) := (fst p) -> (snd p). Definition ctxt_t := list (sigT fn_result_m). Definition fn_result'(G:ctxt_t)(n:fn_name) : option (Set*Set)%type := match nth_error G n with | None => None | Some (existT p _) => Some p end. (** An empty set *) Inductive void : Set := . Definition fn_result_m_opt(p:option(Set*Set)) : Set := match p with | None => void | Some p => fn_result_m p end. (** This looks up a function in the environment, given a proof that the function name is in bounds. *) Fixpoint lookup_fn'(n:fn_name) : forall (G:ctxt_t), n < length G -> fn_result_m_opt (fn_result' G n) := match n return forall G, n < length G -> fn_result_m_opt (fn_result' G n) with | 0 => fun G => match G return 0 < length G -> fn_result_m_opt (fn_result' G 0) with | nil => fun H => match lt_n_O _ H with end | (existT p f)::_ => fun H => f end | S m => fun G => match G return (S m) < length G -> fn_result_m_opt (fn_result' G (S m)) with | nil => fun H => match lt_n_O _ H with end | _::G' => fun H => lookup_fn' G' (lt_S_n _ _ H) end end. (** In this section, we assume some context and some map for function names. *) Section FNMAP. Variable fn_ctxt : ctxt_t. (** Specialize the lookup to the parameters *) Definition fn_result := fn_result' fn_ctxt. Definition lookup_fn(n:fn_name)(H:n < length fn_ctxt) := lookup_fn' fn_ctxt H. (** A function is well-formed if all of the function names have the right types when we look them up in the [fn_ctxt]. *) Definition wf_fn t u (f:fn t u) : Prop := match f in fn u' v' with | Fn_name f t1 t2 => f < length fn_ctxt /\ fn_result f = Some (t1,t2) | _ => True end. (** A predicate for determining if a [regexp] is type-consistent with the given [fn_ctxt]. Note that by construction, the only things that can be ill-typed are the embedded [fn]s in a [Map]. *) Fixpoint wf_regexp t (r:regexp t) : Prop := match r in regexp t' with | Any => True | Char _ => True | Eps => True | Cat _ _ r1 r2 => wf_regexp r1 /\ wf_regexp r2 | Alt _ r1 r2 => wf_regexp r1 /\ wf_regexp r2 | Zero _ => True | Star _ r => wf_regexp r | Map _ _ f r => wf_regexp r /\ wf_fn f end. (** Convert a well-formed [fn] to an actual function, using the [fn_map]. *) Definition apply_fn t1 t2 (f:fn t1 t2) : wf_fn f -> t1 -> t2. refine (fun t1 t2 (f:fn t1 t2) => match f in fn t1' t2' return wf_fn f -> t1' -> t2' with | Fn_name f t1 t2 => fun H => _ | Fn_const_char c => fun _ => fun _ => c | Fn_empty_list t => fun _ => fun _ => nil | Fn_cons t => fun _ => fun p => (fst p)::(snd p) | Fn_unit_left t => fun _ => fun x => (tt,x) | Fn_unit_right t => fun _ => fun x => (x,tt) end) ; simpl in * ; destruct H ; unfold fn_result in * ; generalize (@lookup_fn f0 H). rewrite H0. auto. Defined. (** ** Denotational Semantics for [regexp] *) (** Now we can give a semantic interpretation to the regexps under a given context. *) Inductive in_regexp : forall t, regexp t -> list char_p -> t -> Prop := | Any_i : forall c, in_regexp Any (c::nil) c | Char_i : forall c, in_regexp (Char c) (c::nil) c | Eps_i : in_regexp Eps nil tt | Alt_left_i : forall t (r1 r2:regexp t) cs (v:t), in_regexp r1 cs v -> in_regexp (Alt r1 r2) cs v | Alt_right_i : forall t (r1 r2:regexp t) cs (v:t), in_regexp r2 cs v -> in_regexp (Alt r1 r2) cs v | Cat_i : forall t1 t2 (r1:regexp t1) (r2:regexp t2) cs1 cs2 v1 v2, in_regexp r1 cs1 v1 -> in_regexp r2 cs2 v2 -> in_regexp (Cat r1 r2) (cs1++cs2) (v1,v2) | Star_eps_i : forall t (r:regexp t), in_regexp (Star r) nil nil | Star_cat_i : forall t (r:regexp t) cs1 cs2 v1 v2, cs1 <> nil -> in_regexp r cs1 v1 -> in_regexp (Star r) cs2 v2 -> in_regexp (Star r) (cs1++cs2) (v1::v2) | Map_i : forall t1 t2 (f:fn t1 t2) (r:regexp t1) cs v1 v2 (H:wf_fn f), apply_fn f H v1 = v2 -> in_regexp r cs v1 -> in_regexp (Map f r) cs v2. Hint Resolve Any_i Char_i Eps_i Alt_left_i Alt_right_i Cat_i Star_eps_i Star_cat_i Map_i : dfa. Notation "[[ r ]]" := (in_regexp r) (at level 0). (** Equivalence of regular expression parsers. *) Definition reg_eq t (r1 r2: regexp t) : Prop := forall cs v, [[r1]] cs v <-> [[r2]] cs v. Infix "[=]" := reg_eq (right associativity, at level 85). (** Reflexivity *) Lemma reg_eq_refl : forall t (r:regexp t), r [=] r. unfold reg_eq ; tauto. Qed. Hint Resolve reg_eq_refl : dfa. (** Transitivity *) Lemma reg_eq_trans : forall t (r1 r2 r3: regexp t), r1 [=] r2 -> r2 [=] r3 -> r1 [=] r3. unfold reg_eq ; mysimp ; match goal with | [ H1 : forall cs v, _ <-> _, H2 : forall cs v, _ <-> _ |- _] => apply H1 ; apply H2 ; auto end. Qed. (** Symmetry *) Lemma reg_eq_sym : forall t (r1 r2: regexp t), r1 [=] r2 -> r2 [=] r1. unfold reg_eq ; mysimp. Qed. (** We define some explicit inversion principles for the [regexp] constructors so that we can invert them even when the arguments aren't variables. If we tried to use inversion otherwise, then Coq will choke because of the dependencies. *) (** An inversion principle for [Eps]. *) Lemma EpsInv : forall cs v, in_regexp Eps cs v -> cs = nil /\ v = tt. intros cs v H; inversion H ; mysimp. Qed. (** Inversion principle for [Any]. *) Lemma AnyInv : forall cs v, [[Any]] cs v -> cs = v::nil. intros cs v H ; inversion H ; mysimp. Qed. (** Inversion principle for [Char c]. *) Lemma CharInv : forall c cs v, [[Char c]] cs v -> cs = c::nil /\ v = c. intros c cs v H ; inversion H ; mysimp. Qed. (** Inversion principle for [Cat r1 r2].*) Lemma CatInv : forall t1 t2 (r1:regexp t1) (r2:regexp t2) cs v, [[Cat r1 r2]] cs v -> exists cs1, exists cs2, exists v1, exists v2, [[r1]] cs1 v1 /\ [[r2]] cs2 v2 /\ cs = cs1 ++ cs2 /\ v = (v1,v2). Proof. intros t1 t2 r1 r2 cs v H ; inversion H ; mysimp ; repeat econstructor ; auto. Qed. (** Inversion principle for [Alt r1 r2]. *) Lemma AltInv : forall t (r1 r2:regexp t) cs v, [[Alt r1 r2]] cs v -> ([[r1]] cs v \/ [[r2]] cs v). intros t r1 r2 cs v H ; inversion H ; mysimp. Qed. (** Inversion principle for [Star r]. *) Lemma StarInv : forall t (r:regexp t) cs v, [[Star r]] cs v -> (cs = nil /\ v = nil) \/ (exists cs1, exists v1, exists cs2, exists v2, cs1 <> nil /\ [[r]] cs1 v1 /\ [[Star r]] cs2 v2 /\ cs = cs1 ++ cs2 /\ v = v1::v2). Proof. intros t r cs v H ; inversion H ; mysimp ; right. exists cs1 ; econstructor ; exists cs2 ; econstructor ; eauto. Qed. (** Inversion principle for [Map f r]. *) Lemma MapInv : forall t1 t2 (f:fn t1 t2) (r:regexp t1) cs v, [[Map f r]] cs v -> exists H:wf_fn f, exists v1, in_regexp r cs v1 /\ apply_fn f H v1 = v. intros t1 t2 f r cs v H; inversion H ; mysimp. repeat econstructor ; eauto. Qed. (** Inversion principle for [Zero]. *) Lemma ZeroInv : forall t cs v, [[Zero t]] cs v -> False. intros t cs v H; inversion H. Qed. (** A little tactic that takes care of inversion for the [in_regexp] relation. *) Ltac in_inv := match goal with | [ H : [[Eps]] _ _ |- _ ] => generalize (EpsInv H) ; clear H | [ H : [[Any]] _ _ |- _ ] => generalize (AnyInv H) ; clear H | [ H : [[Char _]] _ _ |- _] => generalize (CharInv H) ; clear H | [ H : [[Cat _ _]] _ _ |- _] => generalize (CatInv H) ; clear H | [ H : [[Alt _ _]] _ _ |- _] => generalize (AltInv H) ; clear H | [ H : [[Zero _]] _ _ |- _] => contradiction (ZeroInv H) (*| [ H : [[Star _]] _ _ |- _] => generalize (StarInv H) ; clear H*) | [ H : [[Map _ _]] _ _ |- _] => generalize (MapInv H) ; clear H end ; mysimp ; subst ; auto with dfa. (** A little tactic that tries to prove [in_regexp]. *) Ltac pv_in := try in_inv ; match goal with | [ H : in_regexp ?r ?cs ?v1 |- in_regexp (Map ?f ?r) ?cs ?v2 ] => eapply (@Map_i _ _ f r cs v1 v2 I (eq_refl _) H) ; eauto with dfa | [ |- in_regexp (Map ?f ?r) ?cs ?v ] => eapply (Map_i f I (eq_refl _)) ; eauto with dfa | [ |- in_regexp (Cat _ _) (_ ++ _) (_,_) ] => apply Cat_i ; auto | [ |- in_regexp (Cat _ _) nil (_,_) ] => let H := fresh "H" in assert (H:@nil char_p= nil ++ nil) ; [auto | rewrite H] ; clear H ; apply Cat_i ; auto with dfa | [ |- in_regexp (Cat _ _) (?c :: ?x ++ ?y) _ ] => let H := fresh "H" in assert (H: c::x++y = (c::x) ++ y) ; [ auto | rewrite H] ; clear H ; apply Cat_i ; auto with dfa | _ => auto with dfa end. (** [Cat r Eps] is equivalent to [r] *) Lemma cat_eps_r : forall t (r:regexp t), (Cat r Eps) [=] (Map (Fn_unit_right t) r). Proof. unfold reg_eq ; repeat (mysimp ; pv_in). rewrite (app_nil_end cs). repeat pv_in. Qed. (** [Cat Eps r] is equivalent to [r] *) Lemma cat_eps_l : forall t (r:regexp t), (Cat Eps r) [=] (Map (Fn_unit_left t) r). unfold reg_eq ; repeat (mysimp ; pv_in). rewrite <- (app_nil_l cs). repeat pv_in. Qed. (** [Cat r Zero] is equivalent to [Zero] *) Lemma cat_zero_r : forall t1 t2 (r:regexp t1), (Cat r (Zero t2)) [=] Zero _. unfold reg_eq ; repeat (mysimp ; pv_in). Qed. (** [Cat Zero r] is equivalent to [Zero] *) Lemma cat_zero_l : forall t1 t2 (r:regexp t2), (Cat (Zero t1) r) [=] Zero _. unfold reg_eq ; repeat (mysimp ; pv_in). Qed. (** In the simple matcher code, we could fuse OptCat and OptCat_r into a single definition. Here, we broke it out to make the dependency a little simpler to reason about. *) (** Used in an optimizing constructor for [Cat]. *) Definition OptCat_r t1 t2 (r2:regexp t2) (r1:regexp t1) := match r2 in regexp t2' return regexp (t1 * t2') with | Zero _ => Zero _ | Eps => Map (Fn_unit_right t1) r1 | r2 => Cat r1 r2 end. (** An optimized constructor for [Cat]. *) Definition OptCat t1 t2 (r1:regexp t1) (r2:regexp t2) := match r1 in regexp t1' return regexp (t1' * t2) with | Zero _ => Zero _ | Eps => Map (Fn_unit_left t2) r2 | r1 => OptCat_r r2 r1 end. (** [OptCat r1 r2] is equivalent to [Cat r1 r2] *) Lemma opt_cat_r : forall t1 t2 (r1:regexp t1) (r2:regexp t2), OptCat_r r2 r1 [=] Cat r1 r2. Proof. destruct r2 ; simpl ; apply reg_eq_sym ; (apply reg_eq_refl || apply cat_eps_r || apply cat_zero_r). Qed. Lemma opt_cat : forall t1 t2 (r1:regexp t1) (r2:regexp t2), OptCat r1 r2 [=] Cat r1 r2. destruct r1 ; intros ; simpl ; try (apply reg_eq_refl || apply opt_cat_r) ; apply reg_eq_sym ; (apply cat_eps_l || apply cat_zero_l). Qed. (** [Alt r Zero] is equivalent to [r]. *) Lemma alt_zero_r : forall t (r:regexp t), (Alt r (Zero _)) [=] r. unfold reg_eq ; intros ; mysimp ; repeat pv_in. Qed. (** [Alt Zero r] is equivalent to [r]. *) Lemma alt_zero_l : forall t (r:regexp t), (Alt (Zero _) r) [=] r. unfold reg_eq ; intros ; mysimp ; repeat pv_in. Qed. (** Used in an optimizing version of [Alt]. Note that we don't easily have a way to determine whether r2 = r1 so that we can collapse [Alt r r] into [r]. Try writing this and see what goes wrong... *) Definition OptAlt_r t (r2:regexp t) : regexp t -> regexp t := match r2 in regexp t' return regexp t' -> regexp t' with | Zero _ => fun r => r | r => fun r1 => Alt r1 r end. (** Optimized version of [Alt]. *) Definition OptAlt t (r1:regexp t) : regexp t -> regexp t := match r1 in regexp t' return regexp t' -> regexp t' with | Zero _ => fun r => r | r => fun r2 => OptAlt_r r2 r end. (** [OptAlt r1 r2] is equivalent to [Alt r1 r2] *) Lemma opt_alt_r t (r1 r2:regexp t) : OptAlt_r r2 r1 [=] Alt r1 r2. destruct r2 ; simpl ; try apply reg_eq_refl ; apply reg_eq_sym ; apply alt_zero_r. Qed. Lemma opt_alt : forall t1 (r1 r2: regexp t1), OptAlt r1 r2 [=] Alt r1 r2. destruct r1 ; simpl ; intros ; try (apply opt_alt_r) ; apply reg_eq_sym ; apply alt_zero_l. Qed. (** Optimizing version of [Map]. *) Definition OptMap (t u: Set) (f: fn t u) (r1: regexp t) : regexp u := match r1 in regexp t' return regexp u with | Zero _ => Zero _ | _ => Map f r1 end. (** [OptMap f r] is equivalent to [Map f r]. *) Lemma opt_map: forall (t u: Set) (f: fn t u) (r1: regexp t), OptMap f r1 [=] Map f r1. unfold reg_eq ; destruct r1 ; simpl ; mysimp ; repeat pv_in. Qed. (** Now we define what it means for a function to be a valid parser. A function [f: regexp t -> list char_p -> list (result_m t)], is a valid parser if, when given a [regexp] and a string of characters [s], it returns a list of values [[v1,...,vn]], such that { (s,vi) } is the relation denoted by the regular expression. *) Definition is_parser (f: forall t (r:regexp t), wf_regexp r -> list char_p -> list t) : Prop := forall t (r: regexp t) (H:wf_regexp r) (cs: list char_p) (v:t), ([[r]] cs v -> In v (f t r H cs)) /\ (In v (f t r H cs) -> [[r]] cs v). (** -----------------------------------------------------*) (** ** Now we define the actual derivative-based parser. *) (** -----------------------------------------------------*) (** Returns a regexp denoting { (null,v) | (null,v) in r }. *) Fixpoint null t (r:regexp t) : regexp t := match r in regexp t' return regexp t' with | Any => Zero _ | Char _ => Zero _ | Eps => Eps | Zero _ => Zero _ | Alt t r1 r2 => OptAlt (null r1) (null r2) | Cat t1 t2 r1 r2 => OptCat (null r1) (null r2) | Star t _ => OptMap (Fn_empty_list t) Eps | Map _ _ f r1 => OptMap f (null r1) end. (** This is the heart of the algorithm. It returns a regexp denoting { (cs,v) | (c::cs,v) in r }. *) Fixpoint deriv t (r:regexp t) (c:char_p) : regexp t := match r in regexp t' return regexp t' with | Any => OptMap (Fn_const_char c) Eps | Char c' => if char_eq c c' then OptMap (Fn_const_char c) Eps else Zero _ | Eps => Zero _ | Zero _ => Zero _ | Alt t r1 r2 => OptAlt (deriv r1 c) (deriv r2 c) | Cat t1 t2 r1 r2 => OptAlt (OptCat (deriv r1 c) r2) (OptCat (null r1) (deriv r2 c)) | Star t r => OptMap (Fn_cons t) (OptCat (deriv r c) (Star r)) | Map _ _ f r1 => OptMap f (deriv r1 c) end. (** When we are done parsing using a [regexp] r, we are left with a [regexp] denoting { (null,v) | exists s.(s,v) in r }. This function computes all of the values [v] in this set. To do so, it needs to be well-formed with respect to the context. *) Definition apply_null t (r:regexp t) : wf_regexp r -> list t. refine ( fix apply_null t (r:regexp t) : wf_regexp r -> list t := match r in regexp t' return wf_regexp r -> list t' with | Any => fun H => nil | Char _ => fun H => nil | Zero _ => fun H => nil | Eps => fun H => tt::nil | Star _ r => fun H => nil::nil | Alt _ r1 r2 => fun H => (apply_null _ r1 _) ++ (apply_null _ r2 _) | Cat _ _ r1 r2 => fun H => let res1 := apply_null _ r1 _ in let res2 := apply_null _ r2 _ in fold_right (fun v1 a => (map (fun v2 => (v1,v2)) res2) ++ a) nil res1 | Map _ _ f r1 => fun H => map (apply_fn f _) (apply_null _ r1 _) end ) ; mysimp. Defined. Lemma InConcatMap A B (x:A) (y:B) : forall xs, In x xs -> forall ys, In y ys -> In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs). Proof. induction xs ; intro H ; [ contradiction H | destruct H ] ; s ; apply in_or_app. left ; clear IHxs ; induction ys ; [ contradiction H0 | destruct H0 ; s ]. Qed. (** Show that if [(nil,v)] is in [r] then [apply_null r] returns [v] as a result *) Lemma ApplyNull1 t (r:regexp t) cs v H : [[r]] cs v -> cs = nil -> In v (apply_null r H). Proof. induction 1 ; mysimp ; try congruence ; repeat match goal with | [ H : ?x ++ ?y = nil |- _ ] => generalize (app_eq_nil _ _ H) ; clear H ; mysimp | [ |- In _ (fold_right _ nil _) ] => apply InConcatMap ; auto | [ H : wf_regexp ?r, IH : forall _ : wf_regexp ?r, ?cs = nil -> _, H1 : ?cs = nil |- _ ] => let l := fresh "l" in generalize (IH H H1) ; generalize (apply_null r H) as l ; induction l ; s ; left | [ |- apply_fn ?f ?H1 _ = apply_fn ?f ?H2 _] => rewrite (proof_irrelevance H1 H2) ; auto | [ H : None = Some _ |- _ ] => congruence end. Qed. Lemma InConcatMap1 A B (x:A) (y:B) xs ys : In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs) -> In x xs. Proof. induction xs ; mysimp ; repeat match goal with | [ H : In _ _ |- _ ] => destruct (in_app_or _ _ _ H) ; [left | right ; eauto] | [ H : In (?x, _) (map _ ?ys) |- _ ] => let l := fresh "l" in generalize H ; generalize ys as l; induction l ; mysimp ; congruence end. Qed. Lemma InConcatMap2 A B (x:A) (y:B) xs ys : In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs) -> In y ys. Proof. induction xs ; mysimp ; match goal with | [ H : In _ (map _ ?ys ++ _) |- _ ] => let H0 := fresh "H" in let l := fresh "l" in generalize (in_app_or _ _ _ H) ; intro H0 ; destruct H0 ; auto ; generalize H0 ; generalize ys as l ; induction l ; mysimp ; left ; congruence end. Qed. (** Show that if [apply_null r] returns [v] as a result, then [(nil,v)] is in [r] *) Lemma ApplyNull2 t (r:regexp t) H v : In v (apply_null r H) -> [[r]] nil v. Proof. induction r ; s ; match goal with | [ |- in_regexp (Cat _ _) _ _] => destruct v ; pv_in ; [ eapply IHr1 ; eauto ; eapply InConcatMap1 ; eauto | eapply IHr2 ; eauto ; eapply InConcatMap2 ; eauto] | [ H0 : In _ _ |- in_regexp (Alt _ _) _ _ ] => generalize (in_app_or _ _ _ H0) ; clear H0 ; intro H0 ; destruct H0 ; [ apply Alt_left_i ; eapply IHr1 ; eauto | eapply Alt_right_i ; eapply IHr2 ; eauto ] | [ H:wf_regexp ?r, H0: In _ (map _ _) |- in_regexp (Map _ _) _ _ ] => let l := fresh "l" in generalize (apply_null r H) (IHr H) H0 ; intro l ; induction l ; clear IHr H0 ; mysimp ; eapply Map_i ; eauto end. Qed. (** Show that the optimizing constructors preserve well-formedness. *) Lemma wf_cat_opt : forall t1 t2 (r1:regexp t1) (r2:regexp t2), wf_regexp r1 -> wf_regexp r2 -> wf_regexp (OptCat r1 r2). Proof. destruct r1 ; mysimp ; destruct r2 ; mysimp. Qed. Lemma wf_alt_opt_r : forall t (r2 r1:regexp t), wf_regexp r2 -> wf_regexp r1 -> wf_regexp (OptAlt_r r2 r1). Proof. destruct r2 ; mysimp. Qed. Lemma wf_alt_opt : forall t (r1 r2:regexp t), wf_regexp r1 -> wf_regexp r2 -> wf_regexp (OptAlt r1 r2). Proof. destruct r1 ; mysimp ; apply wf_alt_opt_r ; mysimp. Qed. Lemma wf_map_opt : forall t u (f: fn t u) (r1: regexp t), wf_regexp r1 -> wf_fn f -> wf_regexp (OptMap f r1). Proof. destruct r1; mysimp. Qed. (** Show that [null r] is well-formed if [r] is well-formed. *) Lemma wf_null : forall t (r:regexp t), wf_regexp r -> wf_regexp (null r). induction r ; mysimp ; (apply wf_cat_opt || apply wf_alt_opt || apply wf_map_opt) ; auto. Qed. (** Show that [deriv r c] is well-formed if [r] is well-formed. *) Lemma wf_deriv : forall t (r:regexp t) c, wf_regexp r -> wf_regexp (deriv r c). Proof. induction r; mysimp ; repeat (apply wf_cat_opt || apply wf_alt_opt || apply wf_null || apply wf_map_opt) ; simpl ; auto. Qed. (** Lift derivative parsing from characters to strings *) Fixpoint deriv_parse' t (r:regexp t) (cs:list char_p) : regexp t := match cs with | nil => r | c::cs' => deriv_parse' (deriv r c) cs' end. (** Derivatives with respect to strings preserve well-formedness. *) Lemma wf_derivs : forall cs t (r:regexp t), wf_regexp r -> wf_regexp (deriv_parse' r cs). induction cs ; simpl ; intros ; auto. apply IHcs. apply wf_deriv. auto. Qed. (** Finally, the derivative-based parser simply runs through the input, calculating derivatives based on the characters it sees, and calls [apply_null] when it gets to the end of the input. *) Definition deriv_parse t (r:regexp t) (H:wf_regexp r) cs : list t := apply_null (deriv_parse' r cs) (wf_derivs cs r H). (** Tactic for helping to reason about the optimizing constructors. *) Ltac pv_opt := match goal with | [ |- in_regexp (OptAlt ?r1 ?r2) ?cs ?v ] => apply (proj2 (opt_alt r1 r2 cs v)) | [ |- in_regexp (OptCat ?r1 ?r2) ?cs ?v ] => apply (proj2 (opt_cat r1 r2 cs v)) | [ |- in_regexp (OptMap ?f ?r) ?cs ?v ] => apply (proj2 (opt_map f r cs v)) ; try (eapply Map_i ; eauto ; fail) | [ H : ?x ++ ?y = nil |- _] => generalize (app_eq_nil _ _ H) ; clear H ; mysimp | [ H : nil = ?x ++ ?y |- _] => generalize (app_eq_nil _ _ (eq_sym H)) ; clear H ; mysimp | [ H : in_regexp (OptCat ?r1 ?r2) ?cs ?v |- _] => generalize (proj1 (opt_cat r1 r2 cs v) H) ; clear H ; intro H | [ H : in_regexp (OptAlt ?r1 ?r2) ?cs ?v |- _] => generalize (proj1 (opt_alt r1 r2 cs v) H) ; clear H ; intro H | [ H : in_regexp (OptMap ?f ?r) ?cs ?v |- _] => generalize (proj1 (opt_map f r cs v) H) ; clear H ; mysimp | [ v : prod _ _ |- _ ] => destruct v | [ H : (_,_) = (_,_) |- _ ] => injection H ; clear H ; mysimp | _ => auto with dfa end. (** [null] is correct. *) Lemma Null1 : forall t (r:regexp t) cs v, [[r]] cs v -> cs = nil -> [[null r]] cs v. Proof. induction 1 ; mysimp ; try congruence ; auto with dfa ; repeat pv_opt ; pv_in. Qed. Lemma Null2 : forall t (r:regexp t) cs v, [[null r]] cs v -> cs = nil -> [[r]] cs v. Proof. induction r ; simpl ; intros ; try in_inv ; (repeat (pv_opt ; subst ; pv_in)) ; try apply nil ; eapply @Map_i ; eauto. Qed. (** [deriv] is correct part 1. *) Lemma Deriv1 : forall t (r:regexp t) c cs v, [[r]] (c::cs) v -> [[deriv r c]] cs v. Proof. induction r ; simpl ; intros ; try in_inv ; match goal with | [ H : in_regexp (Star _) _ _ |- _] => generalize (StarInv H); clear H ; mysimp ; subst | _ => idtac end ; try congruence ; repeat (pv_opt ; match goal with | [ _ : (?c :: ?cs) = (?x ++ _) |- in_regexp (Alt _ _) ?cs _] => destruct x ; s ; [eapply Alt_right_i | eapply Alt_left_i] ; pv_opt ; pv_opt | [ H : in_regexp ?r1 nil _ |- in_regexp (Cat (null ?r1) _) ?cs _ ] => let H := fresh "H" in assert (H:cs=nil++cs) ; [ auto | rewrite H ] ; pv_in ; apply Null1 ; auto | [ |- in_regexp (Map (Fn_cons ?t) ?r) ?cs (?x1::?x2) ] => apply (@Map_i _ _ (Fn_cons t) r cs (x1,x2) (x0::x2) I (eq_refl _)) | [ H1: ?x <> nil, H2: _::?cs = ?x ++ nil |- _] => rewrite app_nil_r in H2 ; subst ; rewrite (app_nil_end cs) ; pv_opt ; pv_in ; pv_in | [ H2 : _::?cs = ?x ++ ?x1 |- _ ] => destruct x ; try congruence ; simpl in H2 ; injection H2 ; s ; apply Cat_i ; auto with dfa | _ => auto with dfa end) ; pv_in. Qed. (** If [null r] matches [cs] returning [v], then [cs] must be empty *) Lemma NullNil : forall t (r:regexp t) cs v, [[null r]] cs v -> cs = nil. Proof. induction r ; mysimp ; try in_inv ; pv_opt ; try in_inv ; repeat match goal with | [ IHr : forall cs v, in_regexp (null ?r) _ _ -> _ = _, H : in_regexp (null ?r) _ _ |- _] => rewrite (IHr _ _ H) ; clear IHr ; auto end. Qed. (** [deriv] is correct part 2. *) Lemma Deriv2 : forall t (r:regexp t) c cs v, [[deriv r c]] cs v -> [[r]] (c::cs) v. Proof. induction r ; simpl ; intros ; repeat (match goal with | [ H : context[char_eq ?c1 ?c2] |- _ ] => destruct (char_eq c1 c2) ; s | [ H : in_regexp (null _) _ _ |- in_regexp (Cat _ _) (?c::?x++?x0) _ ] => let H1 := fresh "H" in generalize (NullNil _ H) ; s ; assert (H1:c::x0 = nil ++ (c::x0)) ; [auto | rewrite H1] ; pv_in ; apply Null2 ; auto | [ H: in_regexp (deriv ?r1 _) ?x ?x1 |- in_regexp (Cat ?r1 _) (_::?x++_) (?x1,_) ] => pv_in | [ |- in_regexp (Star _) (?c :: ?x1 ++ ?x2) _ ] => let H := fresh "H" in assert (H:c :: x1 ++ x2 = (c::x1) ++ x2) ; [ auto | rewrite H ] ; eapply Star_cat_i ; eauto ; congruence | [ |- in_regexp (Map _ _) _ (apply_fn _ _ _) ] => eapply Map_i ; eauto | [ H : in_regexp (Star _) _ _ |- _ ] => fail 1 | _ => pv_opt ; in_inv end). Qed. (** First half of correctness for [deriv_parse']. *) Lemma Deriv'1 cs t (r:regexp t) v : [[deriv_parse' r cs]] nil v -> [[r]] cs v. Proof. induction cs ; mysimp. apply Deriv2. apply IHcs. auto. Qed. (** Second half of correctness for [deriv_parse']. *) Lemma Deriv'2 cs t (r:regexp t) v : [[r]] cs v -> [[deriv_parse' r cs]] nil v. Proof. induction cs ; mysimp. apply IHcs. apply Deriv1 ; auto. Qed. (** First half of correctness for [deriv_parse]. *) Lemma DerivParse1 cs t (r:regexp t) H vs : deriv_parse r H cs = vs -> forall v, In v vs -> [[r]] cs v. Proof. unfold deriv_parse. intros. subst. generalize (ApplyNull2 _ _ _ H1). apply Deriv'1. Qed. (** Second half of correctness for [deriv_parse]. *) Lemma DerivParse2 cs t (r:regexp t) v H : [[r]] cs v -> In v (deriv_parse r H cs). Proof. unfold deriv_parse. intros. eapply ApplyNull1 ; auto. eapply Deriv'2. auto. Qed. (** [deriv_parse] is correct *) Theorem DerivParse_is_parser : is_parser deriv_parse. Proof. unfold is_parser ; mysimp ; [apply DerivParse2 | eapply DerivParse1] ; eauto. Qed. End FNMAP. (** ** Now we need to translate our external representation, [parser] to our internal representation [regexp] and build an appropriate [ctxt] mapping function names to functions of the right type. *) (** Add a new function to the end of the context and return its position as a "fresh" function name, along with the new context. *) Definition extend_state(s:ctxt_t) t1 t2 (f:fn_result_m(t1,t2)) : (fn_name * ctxt_t) := (length s, s ++ (existT fn_result_m (t1,t2) f)::nil). (** Convert a parser with inlined functions to a regexp and a function map. *) Fixpoint par2reg t (p:parser t)(s:ctxt_t) : (regexp t) * ctxt_t := match p in parser t return (regexp t) * ctxt_t with | Any_p => (Any, s) | Char_p b => (Char b, s) | Eps_p => (Eps, s) | Cat_p t1 t2 p1 p2 => let (r1,s1) := par2reg p1 s in let (r2,s2) := par2reg p2 s1 in (Cat r1 r2, s2) | Zero_p t => (Zero t, s) | Alt_p t p1 p2 => let (r1,s1) := par2reg p1 s in let (r2,s2) := par2reg p2 s1 in (Alt r1 r2, s2) | Star_p t p1 => let (r1,s1) := par2reg p1 s in (Star r1, s1) | Map_p t1 t2 f p => let (r,s1) := par2reg p s in let (n,s2) := extend_state s1 f in (@Map t1 t2 (Fn_name n t1 t2) r, s2) end. (** Initial state for the translation. *) Definition initial_ctxt : ctxt_t := nil. (** Top-level translation of parsers to regexps. *) Definition parser2regexp t (p:parser t) : (regexp t) * ctxt_t := par2reg p initial_ctxt. (** ** Now we need to prove that the translation [parser2regexp] preserves meaning. *) (** Tactic to propagate information about the translation. *) Ltac unfold_par2reg := match goal with | [ |- context[par2reg ?p ?s] ] => let H := fresh "H" in let x := fresh "x" in let r := fresh "r" in let s := fresh "s" in assert (H : exists x, x = par2reg p s) ; [eauto | idtac] ; destruct H as [x H]; rewrite <- H in * ; destruct x as [r s] end. (** We need to support weakening of contexts -- i.e., adding more bindings to the end of the context. To that end, we define a partial order on contexts and show that the translation respects this form of weakening. *) Definition ctxt_leq(c1 c2:ctxt_t) : Prop := exists c:ctxt_t, c2 = c1 ++ c. Lemma ctxt_leq_trans : forall c1 c2 c3, ctxt_leq c1 c2 -> ctxt_leq c2 c3 -> ctxt_leq c1 c3. Proof. unfold ctxt_leq ; s. rewrite app_ass. eauto. Qed. Lemma ctxt_leq_refl : forall c, ctxt_leq c c. unfold ctxt_leq ; s. exists nil. apply app_nil_end. Qed. Hint Resolve ctxt_leq_refl : dfa. Lemma ctxt_ext : forall c c', ctxt_leq c (c ++ c'). unfold ctxt_leq ; s. eauto. Qed. Hint Resolve ctxt_ext : dfa. (** Simplify reasoning about inductive hypotheses that depend upon a context. *) Ltac p2rsimp := match goal with | [ |- context[length(?x ++ ?y)] ] => rewrite (@app_length _ x y) | [ IH : forall _:ctxt_t, _, H : _ = par2reg ?p ?s |- _ ] => generalize (IH s) ; match goal with | [ |- context[par2reg ?p ?s] ] => let H' := fresh "H" in generalize (IH s) ; clear IH ; rewrite <- H ; simpl ; intro H' | _ => fail end | _ => idtac end. (** The translation only results in a greater or equal ctxt *) Lemma p2r_extends : forall t (p:parser t) (c:ctxt_t), ctxt_leq c (snd (par2reg p c)). Proof. unfold ctxt_leq ; induction p ; simpl ; unfold extend_state ; intros ; repeat unfold_par2reg ; simpl ; try (exists nil ; mysimp ; fail) ; repeat p2rsimp ; s ; try rewrite app_ass ; econstructor ; eauto. Qed. (** Functions stay well-typed under greater or equal contexts. *) Lemma extends_wf_fn : forall t1 t2 (f:fn t1 t2) c c', ctxt_leq c c' -> wf_fn c f -> wf_fn c' f. Proof. unfold ctxt_leq ; s ; destruct f ; auto ; simpl in * ; generalize c H0 ; clear c H0 ; unfold fn_result, fn_result' in * ; induction f ; destruct c ; [ mysimp | mysimp | mysimp | intro ] ; try congruence ; simpl in *. destruct H0. generalize (IHf _ (conj (lt_S_n _ _ H) H0)). mysimp. Qed. (** Regexps stay well-typed under greater or equal contexts. *) Lemma extends_wf_regexp : forall t (r:regexp t) c c', ctxt_leq c c' -> wf_regexp c r -> wf_regexp c' r. Proof. induction r ; simpl ; s. eapply extends_wf_fn ; eauto. econstructor ; eauto. Qed. (** Tactic for remembering a translated sub-term is well-formed. *) Ltac p2rext := match goal with | [ H : (?r,?s2) = par2reg ?p ?s |- _ ] => generalize (p2r_extends p s) ; rewrite <- H ; simpl ; intro ; clear H end. Lemma FnResult : forall s p r, fn_result (s ++ existT fn_result_m p r :: nil) (length s) = Some p. Proof. induction s ; mysimp. Qed. Hint Resolve FnResult. (** The translation results in a well-formed regexp. *) Lemma p2r_wf : forall t (p:parser t) c, wf_regexp (snd (par2reg p c)) (fst (par2reg p c)). Proof. induction p ; mysimp ; repeat unfold_par2reg ; repeat p2rsimp ; mysimp ; p2rext ; try (eapply (@extends_wf_regexp _ _ s); eauto) ; try econstructor ; eauto with arith. Qed. (** Applying the same function in an extended environment yields the same result. *) Lemma extend_apply : forall x t1 t2 (f:fn t1 t2) s (H:wf_fn s f) v1 H', apply_fn (s ++ x) f H' v1 = apply_fn s f H v1. Proof. destruct f ; auto. simpl. mysimp. generalize f l0 e0 l e ; clear f l0 e0 l e. induction s ; destruct f ; mysimp ; unfold fn_result, fn_result' in * |- ; simpl in * ; try congruence. destruct a. rewrite (proof_irrelevance e0 e) in *. auto. Qed. Hint Resolve extend_apply : dfa. (** [in_regexp] respects weakening on states. *) Lemma in_leq : forall t (r:regexp t) s cs v, in_regexp s r cs v -> forall s', ctxt_leq s s' -> in_regexp s' r cs v. Proof. induction 1 ; s ; try ((constructor ; auto with dfa ; fail) || (apply Alt_right_i ; auto with dfa)). apply (Map_i(v1:=v1) f (@extends_wf_fn t1 t2 f _ _ (ctxt_ext s x) H)) ; auto with dfa. Qed. (** Newly-allocated functions are well-formed with respect to the resulting context. *) Lemma new_fn_wf : forall t1 t2 f s, wf_fn (s ++ existT fn_result_m (t1,t2) f :: nil) (Fn_name (length s) t1 t2). Proof. induction s; mysimp. Qed. Hint Resolve new_fn_wf : dfa. (** If [(cs,v)] is in [[p]] then [(cs,v)] is in [[r]] under the output context [s]. *) Lemma p2r_ok : forall t (p:parser t) cs v, in_parser p cs v -> forall s, in_regexp (snd (par2reg p s)) (fst (par2reg p s)) cs v. Proof. induction 1 ; simpl ; intro s ; try (constructor ; fail) ; repeat unfold_par2reg ; simpl in * ; repeat p2rsimp ; repeat p2rext ; intros ; try (econstructor ; eauto ; eapply in_leq ; eauto ; fail) || (apply Alt_right_i ; auto). apply (Map_i(v1:=v) (Fn_name (length s0) t1 t2) (new_fn_wf f s0)). generalize s0 (new_fn_wf f s0). induction s1 ; mysimp ; unfold fn_result, fn_result' in * ; simpl in *. rewrite (proof_irrelevance e (eq_refl _)). auto. generalize (IHs1 (conj (lt_S_n _ _ l) e)). clear IHs1. auto. eapply in_leq ; eauto. auto with dfa. Qed. (** If [(cs,v)] is in [[r]] under context [s2], and [r] is well-formed with respect to context [s1], and [s1 <= s2], then [(cs,v)] is in [[r]] under context [s1]. *) Lemma extends_in : forall t (r:regexp t) cs v s1 s2, in_regexp s2 r cs v -> wf_regexp s1 r -> ctxt_leq s1 s2 -> in_regexp s1 r cs v. Proof. induction 1 ; simpl ; intros ; mysimp ; (econstructor ; eauto ; fail) || (eapply Alt_right_i ; eauto) || (apply (@Map_i s1 t1 t2 f r cs v1 v2 H4)) ; auto ; s. Qed. Hint Resolve extends_in : dfa. (** Used for induction over proofs of [(cs,v)] in [Star r]. *) Fixpoint in_unwind s (t:Set) (r:regexp t) n cs (v:list t) : Prop := match n with | 0 => cs = nil /\ v = nil | S m => exists cs1, exists cs2, exists v1, exists v2, cs = cs1 ++ cs2 /\ v = (v1::v2) /\ cs1 <> nil /\ in_regexp s r cs1 v1 /\ in_unwind s r m cs2 v2 end. (** If [(cs,v)] is in [Star r] then there exists some [n] such that [(cs,v)] is in the nth unwinding of [r]. This gives us an easy inner induction when reasoning about the meaning of [Star r], though it's not strictly necessary. *) Lemma star_rep' : forall s (t:Set) (r:regexp t) cs (v:t), in_regexp s r cs v -> match r in regexp t' return t' -> Prop with | Star _ r1 => fun v => exists n, in_unwind s r1 n cs v | _ => fun _ => True end v. Proof. induction 1 ; simpl ; auto ; [exists 0 ; simpl ; auto | idtac ] ; let H := fresh "H" in let n := fresh "n" in destruct IHin_regexp2 as [n H]; exists (S n) ; repeat econstructor ; eauto. Qed. Lemma star_rep : forall s (t:Set) (r:regexp t) cs (v:list t), in_regexp s (Star r) cs v -> exists n, in_unwind s r n cs v. Proof. intros s t r cs v H. apply (star_rep' H). Qed. (** It's interesting to try to define [star_rep] without the match and still get the proof to go through. We can't do induction on [in_regexp s (Star r1) cs v] directly because we need to limit the arguments to variables (including the return type [t]. And notice that we had to use the convoy pattern to refine v's type once we know [t] must be a [list t'] for some [t']. *) (** This is the key lemma regarding the translation: If [(cs,v)] is in [r] under context [s], where [(r,s) = par2reg p], then [(cs,v)] is in [p]. *) Lemma r2p_ok : forall t (p:parser t) s cs v, in_regexp (snd (par2reg p s)) (fst (par2reg p s)) cs v -> in_parser p cs v. Proof. induction p ; simpl ; intros s cs v ; try (repeat unfold_par2reg) ; simpl in * ; intros ; (generalize (AnyInv H) || generalize (CharInv H) || generalize (EpsInv H) || generalize (CatInv H1) || generalize (AltInv H1) || generalize (MapInv H0) || generalize (ZeroInv H) || idtac) ; s ; try (econstructor ; eauto ; fail) ; repeat match goal with | [ IH : forall _ _ _, in_regexp (snd (par2reg ?p _)) _ _ _ -> _, H : _ = par2reg ?p ?s |- _ ] => generalize (IH s) ; generalize (p2r_extends p s) ; generalize (p2r_wf p s) ; rewrite <- H ; simpl ; clear IH ; intros end ; try (econstructor ; eauto with dfa ; fail) || (eapply Alt_right_pi ; eauto with dfa). generalize (star_rep H0). mysimp. clear H0. subst. generalize cs v H4. clear cs v H4. induction x ; s ; constructor ; auto. match goal with | [ |- in_parser (Map_p ?r _) _ (?f _) ] => assert (f = r) end. generalize s0 e l. induction s1 ; unfold value, error, fn_result, fn_result' ; mysimp. rewrite (proof_irrelevance e0 (eq_refl _)). auto. rewrite H5. apply (@Map_pi _ _ t). apply H4. apply (@extends_in _ _ cs x s0 (snd (extend_state s0 _)) H1 H2). unfold extend_state. simpl. auto with dfa. Qed. (** The translation preserves meaning *) Theorem parser2regexp_equiv : forall t (p:parser t) cs v, in_parser p cs v <-> in_regexp (snd (parser2regexp p)) (fst (parser2regexp p)) cs v. Proof. unfold parser2regexp ; mysimp ; [eapply p2r_ok | eapply r2p_ok] ; eauto. Qed. (** Finally -- convert the parser to a regexp and then run the derivative-based parser on this. *) Definition parse t (p:parser t) : list char_p -> list t := deriv_parse (snd (parser2regexp p)) (fst (parser2regexp p)) (p2r_wf p _). (** We can conclude that the parser respects the semantics *) Theorem parse_correct : forall t (p:parser t) cs v, in_parser p cs v <-> In v (parse p cs). Proof. intros t p cs v ; generalize (DerivParse_is_parser (snd (parser2regexp p)) (fst (parser2regexp p)) (p2r_wf p initial_ctxt) cs v) ; intro H ; destruct H ; generalize (parser2regexp_equiv p cs v) ; intro H1 ; destruct H1 ; split ; auto. Qed. End PARSER.