Library LexParser
Require Import Coq.Program.Equality.
Require Import Coq.Init.Logic.
Require Import List.
Require Import Arith.
Require Import Eqdep.
Unset Automatic Introduction.
Set Implicit Arguments.
Axiom proof_irrelevance : forall (P:Prop) (H1 H2:P), H1 = H2.
Require Import Coq.Init.Logic.
Require Import List.
Require Import Arith.
Require Import Eqdep.
Unset Automatic Introduction.
Set Implicit Arguments.
Axiom proof_irrelevance : forall (P:Prop) (H1 H2:P), H1 = H2.
We parameterize the development over a set of characters
Parameter char_p : Set.
Parameter char_eq : forall (c1 c2:char_p), {c1=c2} + {c1<>c2}.
End PARSER_ARG.
Parameter char_eq : forall (c1 c2:char_p), {c1=c2} + {c1<>c2}.
End PARSER_ARG.
The parsers are indexed by the return type, and we've
added a new kind of parser Map_p which is used to transform the
result of one parser to another.
Inductive parser : Set -> Type :=
| Any_p : parser char_p
| Char_p : char_p -> parser char_p
| Eps_p : parser unit
| Cat_p : forall (t1 t2:Set) (r1:parser t1) (r2:parser t2), parser (t1 * t2)
| Zero_p : forall (t:Set), parser t
| Alt_p : forall (t:Set) (r1 r2:parser t), parser t
| Star_p : forall (t:Set), parser t -> parser (list t)
| Map_p : forall (t1 t2:Set), (t1 -> t2) -> parser t1 -> parser t2.
| Any_p : parser char_p
| Char_p : char_p -> parser char_p
| Eps_p : parser unit
| Cat_p : forall (t1 t2:Set) (r1:parser t1) (r2:parser t2), parser (t1 * t2)
| Zero_p : forall (t:Set), parser t
| Alt_p : forall (t:Set) (r1 r2:parser t), parser t
| Star_p : forall (t:Set), parser t -> parser (list t)
| Map_p : forall (t1 t2:Set), (t1 -> t2) -> parser t1 -> parser t2.
The semantics relates input strings (as lists of characters) to values.
Inductive in_parser : forall t, parser t -> list char_p -> t -> Prop :=
| Any_pi : forall c, in_parser Any_p (c::nil) c
| Char_pi : forall c, in_parser (Char_p c) (c::nil) c
| Eps_pi : in_parser Eps_p nil tt
| Alt_left_pi : forall t (p1 p2:parser t) cs (v:t),
in_parser p1 cs v -> in_parser (Alt_p p1 p2) cs v
| Alt_right_pi : forall t (p1 p2:parser t) cs (v:t),
in_parser p2 cs v -> in_parser (Alt_p p1 p2) cs v
| Cat_pi : forall t1 t2 (p1:parser t1) (p2:parser t2) cs1 cs2 v1 v2,
in_parser p1 cs1 v1 -> in_parser p2 cs2 v2 ->
in_parser (Cat_p p1 p2) (cs1 ++ cs2) (v1,v2)
| Star_eps_pi : forall t (p:parser t), in_parser (Star_p p) nil nil
| Star_cat_pi : forall t (p:parser t) cs1 cs2 v1 v2,
cs1 <> nil -> in_parser p cs1 v1 -> in_parser (Star_p p) cs2 v2 ->
in_parser (Star_p p) (cs1 ++ cs2) (v1::v2)
| Map_pi : forall (t1 t2:Set) (f:t1 -> t2) (p:parser t1) cs v,
in_parser p cs v -> in_parser (Map_p f p) cs (f v).
| Any_pi : forall c, in_parser Any_p (c::nil) c
| Char_pi : forall c, in_parser (Char_p c) (c::nil) c
| Eps_pi : in_parser Eps_p nil tt
| Alt_left_pi : forall t (p1 p2:parser t) cs (v:t),
in_parser p1 cs v -> in_parser (Alt_p p1 p2) cs v
| Alt_right_pi : forall t (p1 p2:parser t) cs (v:t),
in_parser p2 cs v -> in_parser (Alt_p p1 p2) cs v
| Cat_pi : forall t1 t2 (p1:parser t1) (p2:parser t2) cs1 cs2 v1 v2,
in_parser p1 cs1 v1 -> in_parser p2 cs2 v2 ->
in_parser (Cat_p p1 p2) (cs1 ++ cs2) (v1,v2)
| Star_eps_pi : forall t (p:parser t), in_parser (Star_p p) nil nil
| Star_cat_pi : forall t (p:parser t) cs1 cs2 v1 v2,
cs1 <> nil -> in_parser p cs1 v1 -> in_parser (Star_p p) cs2 v2 ->
in_parser (Star_p p) (cs1 ++ cs2) (v1::v2)
| Map_pi : forall (t1 t2:Set) (f:t1 -> t2) (p:parser t1) cs v,
in_parser p cs v -> in_parser (Map_p f p) cs (f v).
Note that for Star_cat_pi we require cs1 to be non-empty -- this is
crucial for ensuring that any regular expression matches a string and
returns a finite list of associated values. Otherwise, something like
Star Eps would take the empty string to an infinite set of possible
values. However, note that (ignoring possible return values) the
proof rules above are equivalent (i.e., will accept the same strings)
as one where we allow cs1 to be empty.
Internally, we translate parsers to a representation called regexp
where all of the functions are replaced with a function name that we can
look up in an environment. This will allow us to put together a decidable
equality on regular expressions, which will in turn, allow us to hash-cons
them (though we don't take advantage of this yet.)
We'll represent function names as positions in an environment list.
In addition to user-level functions, we need a few built-in functions that
are used during optimization.
Inductive fn : Set -> Set -> Type :=
| Fn_name : forall (f:fn_name) t1 t2, fn t1 t2
| Fn_const_char : forall (c:char_p), fn unit char_p
| Fn_empty_list : forall (t:Set), fn unit (list t)
| Fn_cons : forall (t:Set), fn (t * (list t)) (list t)
| Fn_unit_left : forall (t:Set), fn t (unit * t)
| Fn_unit_right : forall (t:Set), fn t (t * unit)
.
| Fn_name : forall (f:fn_name) t1 t2, fn t1 t2
| Fn_const_char : forall (c:char_p), fn unit char_p
| Fn_empty_list : forall (t:Set), fn unit (list t)
| Fn_cons : forall (t:Set), fn (t * (list t)) (list t)
| Fn_unit_left : forall (t:Set), fn t (unit * t)
| Fn_unit_right : forall (t:Set), fn t (t * unit)
.
Finally, a regexp is just like a parser except that the Map constructor
takes a fn instead of an actual function. Note that this syntax doesn't preclude
us from using a function name with the wrong type -- we'll capture this constraint
later on.
Inductive regexp : Set -> Type :=
| Any : regexp char_p
| Char : char_p -> regexp char_p
| Eps : regexp unit
| Cat : forall (t1 t2:Set), regexp t1 -> regexp t2 -> regexp (t1 * t2)
| Alt : forall (t:Set), regexp t -> regexp t -> regexp t
| Zero : forall (t:Set), regexp t
| Star : forall (t:Set), regexp t -> regexp (list t)
| Map : forall (t u:Set) (f:fn t u), regexp t -> regexp u.
| Any : regexp char_p
| Char : char_p -> regexp char_p
| Eps : regexp unit
| Cat : forall (t1 t2:Set), regexp t1 -> regexp t2 -> regexp (t1 * t2)
| Alt : forall (t:Set), regexp t -> regexp t -> regexp t
| Zero : forall (t:Set), regexp t
| Star : forall (t:Set), regexp t -> regexp (list t)
| Map : forall (t u:Set) (f:fn t u), regexp t -> regexp u.
A simplification tactic used through the development
Ltac mysimp :=
simpl in * ; intros ;
repeat match goal with
| [ |- context[char_eq ?x ?y] ] => destruct (char_eq x y) ; auto
| [ |- _ /\ _ ] => split
| [ H : existT ?f ?t ?x = existT ?f ?t ?y |- _ ] =>
generalize (inj_pairT2 _ f t x y H) ; clear H ; intro H ; subst
| [ H : _ /\ _ |- _ ] => destruct H
| [ |- context[ _ ++ nil ] ] => rewrite <- app_nil_end
| [ H : exists x, _ |- _ ] => destruct H
| [ H : _ \/ _ |- _] => destruct H
| [ H : _ <-> _ |- _] => destruct H
| [ |- _ <-> _ ] => split
| [ H : _::_ = _::_ |- _] => injection H ; clear H
| _ => idtac
end ; try (firstorder) ; auto.
simpl in * ; intros ;
repeat match goal with
| [ |- context[char_eq ?x ?y] ] => destruct (char_eq x y) ; auto
| [ |- _ /\ _ ] => split
| [ H : existT ?f ?t ?x = existT ?f ?t ?y |- _ ] =>
generalize (inj_pairT2 _ f t x y H) ; clear H ; intro H ; subst
| [ H : _ /\ _ |- _ ] => destruct H
| [ |- context[ _ ++ nil ] ] => rewrite <- app_nil_end
| [ H : exists x, _ |- _ ] => destruct H
| [ H : _ \/ _ |- _] => destruct H
| [ H : _ <-> _ |- _] => destruct H
| [ |- _ <-> _ ] => split
| [ H : _::_ = _::_ |- _] => injection H ; clear H
| _ => idtac
end ; try (firstorder) ; auto.
Simplificaton followed by substitution.
Ltac s := repeat (mysimp ; subst).
Function contexts map function names to a dependent pair consisting of
- a pair of Sets (t1,t2)
- a function of type t1 -> t2.
Definition fn_result_m(p:Set*Set) := (fst p) -> (snd p).
Definition ctxt_t := list (sigT fn_result_m).
Definition fn_result'(G:ctxt_t)(n:fn_name) : option (Set*Set)%type :=
match nth_error G n with
| None => None
| Some (existT p _) => Some p
end.
Definition ctxt_t := list (sigT fn_result_m).
Definition fn_result'(G:ctxt_t)(n:fn_name) : option (Set*Set)%type :=
match nth_error G n with
| None => None
| Some (existT p _) => Some p
end.
An empty set
Inductive void : Set := .
Definition fn_result_m_opt(p:option(Set*Set)) : Set :=
match p with | None => void | Some p => fn_result_m p end.
Definition fn_result_m_opt(p:option(Set*Set)) : Set :=
match p with | None => void | Some p => fn_result_m p end.
This looks up a function in the environment, given a proof that
the function name is in bounds.
Fixpoint lookup_fn'(n:fn_name) :
forall (G:ctxt_t), n < length G -> fn_result_m_opt (fn_result' G n) :=
match n return forall G, n < length G -> fn_result_m_opt (fn_result' G n) with
| 0 =>
fun G =>
match G return 0 < length G -> fn_result_m_opt (fn_result' G 0) with
| nil => fun H => match lt_n_O _ H with end
| (existT p f)::_ => fun H => f
end
| S m =>
fun G =>
match G return (S m) < length G -> fn_result_m_opt (fn_result' G (S m)) with
| nil => fun H => match lt_n_O _ H with end
| _::G' => fun H => lookup_fn' G' (lt_S_n _ _ H)
end
end.
forall (G:ctxt_t), n < length G -> fn_result_m_opt (fn_result' G n) :=
match n return forall G, n < length G -> fn_result_m_opt (fn_result' G n) with
| 0 =>
fun G =>
match G return 0 < length G -> fn_result_m_opt (fn_result' G 0) with
| nil => fun H => match lt_n_O _ H with end
| (existT p f)::_ => fun H => f
end
| S m =>
fun G =>
match G return (S m) < length G -> fn_result_m_opt (fn_result' G (S m)) with
| nil => fun H => match lt_n_O _ H with end
| _::G' => fun H => lookup_fn' G' (lt_S_n _ _ H)
end
end.
In this section, we assume some context and some map for function names.
Specialize the lookup to the parameters
Definition fn_result := fn_result' fn_ctxt.
Definition lookup_fn(n:fn_name)(H:n < length fn_ctxt) := lookup_fn' fn_ctxt H.
Definition lookup_fn(n:fn_name)(H:n < length fn_ctxt) := lookup_fn' fn_ctxt H.
A function is well-formed if all of the function names have the right
types when we look them up in the fn_ctxt.
Definition wf_fn t u (f:fn t u) : Prop :=
match f in fn u' v' with
| Fn_name f t1 t2 => f < length fn_ctxt /\ fn_result f = Some (t1,t2)
| _ => True
end.
match f in fn u' v' with
| Fn_name f t1 t2 => f < length fn_ctxt /\ fn_result f = Some (t1,t2)
| _ => True
end.
A predicate for determining if a regexp is type-consistent with
the given fn_ctxt. Note that by construction, the only things that
can be ill-typed are the embedded fns in a Map.
Fixpoint wf_regexp t (r:regexp t) : Prop :=
match r in regexp t' with
| Any => True
| Char _ => True
| Eps => True
| Cat _ _ r1 r2 => wf_regexp r1 /\ wf_regexp r2
| Alt _ r1 r2 => wf_regexp r1 /\ wf_regexp r2
| Zero _ => True
| Star _ r => wf_regexp r
| Map _ _ f r => wf_regexp r /\ wf_fn f
end.
match r in regexp t' with
| Any => True
| Char _ => True
| Eps => True
| Cat _ _ r1 r2 => wf_regexp r1 /\ wf_regexp r2
| Alt _ r1 r2 => wf_regexp r1 /\ wf_regexp r2
| Zero _ => True
| Star _ r => wf_regexp r
| Map _ _ f r => wf_regexp r /\ wf_fn f
end.
Convert a well-formed fn to an actual function, using the fn_map.
Definition apply_fn t1 t2 (f:fn t1 t2) : wf_fn f -> t1 -> t2.
refine (fun t1 t2 (f:fn t1 t2) =>
match f in fn t1' t2' return wf_fn f -> t1' -> t2' with
| Fn_name f t1 t2 => fun H => _
| Fn_const_char c => fun _ => fun _ => c
| Fn_empty_list t => fun _ => fun _ => nil
| Fn_cons t => fun _ => fun p => (fst p)::(snd p)
| Fn_unit_left t => fun _ => fun x => (tt,x)
| Fn_unit_right t => fun _ => fun x => (x,tt)
end) ; simpl in * ; destruct H ; unfold fn_result in * ;
generalize (@lookup_fn f0 H). rewrite H0. auto.
Defined.
refine (fun t1 t2 (f:fn t1 t2) =>
match f in fn t1' t2' return wf_fn f -> t1' -> t2' with
| Fn_name f t1 t2 => fun H => _
| Fn_const_char c => fun _ => fun _ => c
| Fn_empty_list t => fun _ => fun _ => nil
| Fn_cons t => fun _ => fun p => (fst p)::(snd p)
| Fn_unit_left t => fun _ => fun x => (tt,x)
| Fn_unit_right t => fun _ => fun x => (x,tt)
end) ; simpl in * ; destruct H ; unfold fn_result in * ;
generalize (@lookup_fn f0 H). rewrite H0. auto.
Defined.
Now we can give a semantic interpretation to the regexps under a given context.
Inductive in_regexp : forall t, regexp t -> list char_p -> t -> Prop :=
| Any_i : forall c, in_regexp Any (c::nil) c
| Char_i : forall c, in_regexp (Char c) (c::nil) c
| Eps_i : in_regexp Eps nil tt
| Alt_left_i : forall t (r1 r2:regexp t) cs (v:t),
in_regexp r1 cs v -> in_regexp (Alt r1 r2) cs v
| Alt_right_i : forall t (r1 r2:regexp t) cs (v:t),
in_regexp r2 cs v -> in_regexp (Alt r1 r2) cs v
| Cat_i : forall t1 t2 (r1:regexp t1) (r2:regexp t2) cs1 cs2 v1 v2,
in_regexp r1 cs1 v1 -> in_regexp r2 cs2 v2 -> in_regexp (Cat r1 r2) (cs1++cs2) (v1,v2)
| Star_eps_i : forall t (r:regexp t), in_regexp (Star r) nil nil
| Star_cat_i : forall t (r:regexp t) cs1 cs2 v1 v2,
cs1 <> nil -> in_regexp r cs1 v1 ->
in_regexp (Star r) cs2 v2 -> in_regexp (Star r) (cs1++cs2) (v1::v2)
| Map_i : forall t1 t2 (f:fn t1 t2) (r:regexp t1) cs v1 v2 (H:wf_fn f),
apply_fn f H v1 = v2 -> in_regexp r cs v1 -> in_regexp (Map f r) cs v2.
Hint Resolve Any_i Char_i Eps_i Alt_left_i Alt_right_i Cat_i
Star_eps_i Star_cat_i Map_i : dfa.
Notation "[[ r ]]" := (in_regexp r) (at level 0).
| Any_i : forall c, in_regexp Any (c::nil) c
| Char_i : forall c, in_regexp (Char c) (c::nil) c
| Eps_i : in_regexp Eps nil tt
| Alt_left_i : forall t (r1 r2:regexp t) cs (v:t),
in_regexp r1 cs v -> in_regexp (Alt r1 r2) cs v
| Alt_right_i : forall t (r1 r2:regexp t) cs (v:t),
in_regexp r2 cs v -> in_regexp (Alt r1 r2) cs v
| Cat_i : forall t1 t2 (r1:regexp t1) (r2:regexp t2) cs1 cs2 v1 v2,
in_regexp r1 cs1 v1 -> in_regexp r2 cs2 v2 -> in_regexp (Cat r1 r2) (cs1++cs2) (v1,v2)
| Star_eps_i : forall t (r:regexp t), in_regexp (Star r) nil nil
| Star_cat_i : forall t (r:regexp t) cs1 cs2 v1 v2,
cs1 <> nil -> in_regexp r cs1 v1 ->
in_regexp (Star r) cs2 v2 -> in_regexp (Star r) (cs1++cs2) (v1::v2)
| Map_i : forall t1 t2 (f:fn t1 t2) (r:regexp t1) cs v1 v2 (H:wf_fn f),
apply_fn f H v1 = v2 -> in_regexp r cs v1 -> in_regexp (Map f r) cs v2.
Hint Resolve Any_i Char_i Eps_i Alt_left_i Alt_right_i Cat_i
Star_eps_i Star_cat_i Map_i : dfa.
Notation "[[ r ]]" := (in_regexp r) (at level 0).
Equivalence of regular expression parsers.
Definition reg_eq t (r1 r2: regexp t) : Prop := forall cs v, [[r1]] cs v <-> [[r2]] cs v.
Infix "[=]" := reg_eq (right associativity, at level 85).
Infix "[=]" := reg_eq (right associativity, at level 85).
Reflexivity
Lemma reg_eq_refl : forall t (r:regexp t), r [=] r.
unfold reg_eq ; tauto.
Qed.
Hint Resolve reg_eq_refl : dfa.
unfold reg_eq ; tauto.
Qed.
Hint Resolve reg_eq_refl : dfa.
Transitivity
Lemma reg_eq_trans : forall t (r1 r2 r3: regexp t), r1 [=] r2 -> r2 [=] r3 -> r1 [=] r3.
unfold reg_eq ; mysimp ;
match goal with
| [ H1 : forall cs v, _ <-> _,
H2 : forall cs v, _ <-> _ |- _] => apply H1 ; apply H2 ; auto
end.
Qed.
unfold reg_eq ; mysimp ;
match goal with
| [ H1 : forall cs v, _ <-> _,
H2 : forall cs v, _ <-> _ |- _] => apply H1 ; apply H2 ; auto
end.
Qed.
Symmetry
We define some explicit inversion principles for the regexp constructors
so that we can invert them even when the arguments aren't variables. If
we tried to use inversion otherwise, then Coq will choke because of the
dependencies.
An inversion principle for Eps.
Lemma EpsInv : forall cs v, in_regexp Eps cs v -> cs = nil /\ v = tt.
intros cs v H; inversion H ; mysimp.
Qed.
intros cs v H; inversion H ; mysimp.
Qed.
Inversion principle for Any.
Inversion principle for Char c.
Lemma CharInv : forall c cs v, [[Char c]] cs v -> cs = c::nil /\ v = c.
intros c cs v H ; inversion H ; mysimp.
Qed.
intros c cs v H ; inversion H ; mysimp.
Qed.
Inversion principle for Cat r1 r2.
Lemma CatInv : forall t1 t2 (r1:regexp t1) (r2:regexp t2) cs v, [[Cat r1 r2]] cs v ->
exists cs1, exists cs2, exists v1, exists v2,
[[r1]] cs1 v1 /\ [[r2]] cs2 v2 /\ cs = cs1 ++ cs2 /\ v = (v1,v2).
Proof.
intros t1 t2 r1 r2 cs v H ; inversion H ; mysimp ; repeat econstructor ; auto.
Qed.
exists cs1, exists cs2, exists v1, exists v2,
[[r1]] cs1 v1 /\ [[r2]] cs2 v2 /\ cs = cs1 ++ cs2 /\ v = (v1,v2).
Proof.
intros t1 t2 r1 r2 cs v H ; inversion H ; mysimp ; repeat econstructor ; auto.
Qed.
Inversion principle for Alt r1 r2.
Lemma AltInv : forall t (r1 r2:regexp t) cs v, [[Alt r1 r2]] cs v -> ([[r1]] cs v \/ [[r2]] cs v).
intros t r1 r2 cs v H ; inversion H ; mysimp.
Qed.
intros t r1 r2 cs v H ; inversion H ; mysimp.
Qed.
Inversion principle for Star r.
Lemma StarInv : forall t (r:regexp t) cs v, [[Star r]] cs v ->
(cs = nil /\ v = nil) \/
(exists cs1, exists v1, exists cs2, exists v2,
cs1 <> nil /\ [[r]] cs1 v1 /\ [[Star r]] cs2 v2 /\ cs = cs1 ++ cs2 /\ v = v1::v2).
Proof.
intros t r cs v H ; inversion H ; mysimp ; right.
exists cs1 ; econstructor ; exists cs2 ; econstructor ; eauto.
Qed.
(cs = nil /\ v = nil) \/
(exists cs1, exists v1, exists cs2, exists v2,
cs1 <> nil /\ [[r]] cs1 v1 /\ [[Star r]] cs2 v2 /\ cs = cs1 ++ cs2 /\ v = v1::v2).
Proof.
intros t r cs v H ; inversion H ; mysimp ; right.
exists cs1 ; econstructor ; exists cs2 ; econstructor ; eauto.
Qed.
Inversion principle for Map f r.
Lemma MapInv : forall t1 t2 (f:fn t1 t2) (r:regexp t1) cs v, [[Map f r]] cs v ->
exists H:wf_fn f,
exists v1, in_regexp r cs v1 /\ apply_fn f H v1 = v.
intros t1 t2 f r cs v H; inversion H ; mysimp. repeat econstructor ; eauto.
Qed.
exists H:wf_fn f,
exists v1, in_regexp r cs v1 /\ apply_fn f H v1 = v.
intros t1 t2 f r cs v H; inversion H ; mysimp. repeat econstructor ; eauto.
Qed.
Inversion principle for Zero.
A little tactic that takes care of inversion for the in_regexp relation.
Ltac in_inv :=
match goal with
| [ H : [[Eps]] _ _ |- _ ] => generalize (EpsInv H) ; clear H
| [ H : [[Any]] _ _ |- _ ] => generalize (AnyInv H) ; clear H
| [ H : [[Char _]] _ _ |- _] => generalize (CharInv H) ; clear H
| [ H : [[Cat _ _]] _ _ |- _] => generalize (CatInv H) ; clear H
| [ H : [[Alt _ _]] _ _ |- _] => generalize (AltInv H) ; clear H
| [ H : [[Zero _]] _ _ |- _] => contradiction (ZeroInv H)
| [ H : [[Map _ _]] _ _ |- _] => generalize (MapInv H) ; clear H
end ; mysimp ; subst ; auto with dfa.
match goal with
| [ H : [[Eps]] _ _ |- _ ] => generalize (EpsInv H) ; clear H
| [ H : [[Any]] _ _ |- _ ] => generalize (AnyInv H) ; clear H
| [ H : [[Char _]] _ _ |- _] => generalize (CharInv H) ; clear H
| [ H : [[Cat _ _]] _ _ |- _] => generalize (CatInv H) ; clear H
| [ H : [[Alt _ _]] _ _ |- _] => generalize (AltInv H) ; clear H
| [ H : [[Zero _]] _ _ |- _] => contradiction (ZeroInv H)
| [ H : [[Map _ _]] _ _ |- _] => generalize (MapInv H) ; clear H
end ; mysimp ; subst ; auto with dfa.
A little tactic that tries to prove in_regexp.
Ltac pv_in :=
try in_inv ;
match goal with
| [ H : in_regexp ?r ?cs ?v1 |- in_regexp (Map ?f ?r) ?cs ?v2 ] =>
eapply (@Map_i _ _ f r cs v1 v2 I (eq_refl _) H) ; eauto with dfa
| [ |- in_regexp (Map ?f ?r) ?cs ?v ] =>
eapply (Map_i f I (eq_refl _)) ; eauto with dfa
| [ |- in_regexp (Cat _ _) (_ ++ _) (_,_) ] => apply Cat_i ; auto
| [ |- in_regexp (Cat _ _) nil (_,_) ] =>
let H := fresh "H" in
assert (H:@nil char_p= nil ++ nil) ; [auto | rewrite H] ; clear H ;
apply Cat_i ; auto with dfa
| [ |- in_regexp (Cat _ _) (?c :: ?x ++ ?y) _ ] =>
let H := fresh "H" in assert (H: c::x++y = (c::x) ++ y) ;
[ auto | rewrite H] ; clear H ;
apply Cat_i ; auto with dfa
| _ => auto with dfa
end.
try in_inv ;
match goal with
| [ H : in_regexp ?r ?cs ?v1 |- in_regexp (Map ?f ?r) ?cs ?v2 ] =>
eapply (@Map_i _ _ f r cs v1 v2 I (eq_refl _) H) ; eauto with dfa
| [ |- in_regexp (Map ?f ?r) ?cs ?v ] =>
eapply (Map_i f I (eq_refl _)) ; eauto with dfa
| [ |- in_regexp (Cat _ _) (_ ++ _) (_,_) ] => apply Cat_i ; auto
| [ |- in_regexp (Cat _ _) nil (_,_) ] =>
let H := fresh "H" in
assert (H:@nil char_p= nil ++ nil) ; [auto | rewrite H] ; clear H ;
apply Cat_i ; auto with dfa
| [ |- in_regexp (Cat _ _) (?c :: ?x ++ ?y) _ ] =>
let H := fresh "H" in assert (H: c::x++y = (c::x) ++ y) ;
[ auto | rewrite H] ; clear H ;
apply Cat_i ; auto with dfa
| _ => auto with dfa
end.
Cat r Eps is equivalent to r
Lemma cat_eps_r : forall t (r:regexp t), (Cat r Eps) [=] (Map (Fn_unit_right t) r).
Proof.
unfold reg_eq ; repeat (mysimp ; pv_in). rewrite (app_nil_end cs). repeat pv_in.
Qed.
Proof.
unfold reg_eq ; repeat (mysimp ; pv_in). rewrite (app_nil_end cs). repeat pv_in.
Qed.
Cat Eps r is equivalent to r
Lemma cat_eps_l : forall t (r:regexp t), (Cat Eps r) [=] (Map (Fn_unit_left t) r).
unfold reg_eq ; repeat (mysimp ; pv_in). rewrite <- (app_nil_l cs). repeat pv_in.
Qed.
unfold reg_eq ; repeat (mysimp ; pv_in). rewrite <- (app_nil_l cs). repeat pv_in.
Qed.
Cat r Zero is equivalent to Zero
Lemma cat_zero_r : forall t1 t2 (r:regexp t1), (Cat r (Zero t2)) [=] Zero _.
unfold reg_eq ; repeat (mysimp ; pv_in).
Qed.
unfold reg_eq ; repeat (mysimp ; pv_in).
Qed.
Cat Zero r is equivalent to Zero
Lemma cat_zero_l : forall t1 t2 (r:regexp t2), (Cat (Zero t1) r) [=] Zero _.
unfold reg_eq ; repeat (mysimp ; pv_in).
Qed.
unfold reg_eq ; repeat (mysimp ; pv_in).
Qed.
In the simple matcher code, we could fuse OptCat and OptCat_r into
a single definition. Here, we broke it out to make the dependency
a little simpler to reason about.
Used in an optimizing constructor for Cat.
Definition OptCat_r t1 t2 (r2:regexp t2) (r1:regexp t1) :=
match r2 in regexp t2' return regexp (t1 * t2') with
| Zero _ => Zero _
| Eps => Map (Fn_unit_right t1) r1
| r2 => Cat r1 r2
end.
match r2 in regexp t2' return regexp (t1 * t2') with
| Zero _ => Zero _
| Eps => Map (Fn_unit_right t1) r1
| r2 => Cat r1 r2
end.
An optimized constructor for Cat.
Definition OptCat t1 t2 (r1:regexp t1) (r2:regexp t2) :=
match r1 in regexp t1' return regexp (t1' * t2) with
| Zero _ => Zero _
| Eps => Map (Fn_unit_left t2) r2
| r1 => OptCat_r r2 r1
end.
match r1 in regexp t1' return regexp (t1' * t2) with
| Zero _ => Zero _
| Eps => Map (Fn_unit_left t2) r2
| r1 => OptCat_r r2 r1
end.
OptCat r1 r2 is equivalent to Cat r1 r2
Lemma opt_cat_r : forall t1 t2 (r1:regexp t1) (r2:regexp t2),
OptCat_r r2 r1 [=] Cat r1 r2.
Proof.
destruct r2 ; simpl ; apply reg_eq_sym ;
(apply reg_eq_refl || apply cat_eps_r || apply cat_zero_r).
Qed.
Lemma opt_cat : forall t1 t2 (r1:regexp t1) (r2:regexp t2),
OptCat r1 r2 [=] Cat r1 r2.
destruct r1 ; intros ; simpl ; try (apply reg_eq_refl || apply opt_cat_r) ;
apply reg_eq_sym ; (apply cat_eps_l || apply cat_zero_l).
Qed.
OptCat_r r2 r1 [=] Cat r1 r2.
Proof.
destruct r2 ; simpl ; apply reg_eq_sym ;
(apply reg_eq_refl || apply cat_eps_r || apply cat_zero_r).
Qed.
Lemma opt_cat : forall t1 t2 (r1:regexp t1) (r2:regexp t2),
OptCat r1 r2 [=] Cat r1 r2.
destruct r1 ; intros ; simpl ; try (apply reg_eq_refl || apply opt_cat_r) ;
apply reg_eq_sym ; (apply cat_eps_l || apply cat_zero_l).
Qed.
Alt r Zero is equivalent to r.
Lemma alt_zero_r : forall t (r:regexp t), (Alt r (Zero _)) [=] r.
unfold reg_eq ; intros ; mysimp ; repeat pv_in.
Qed.
unfold reg_eq ; intros ; mysimp ; repeat pv_in.
Qed.
Alt Zero r is equivalent to r.
Lemma alt_zero_l : forall t (r:regexp t), (Alt (Zero _) r) [=] r.
unfold reg_eq ; intros ; mysimp ; repeat pv_in.
Qed.
unfold reg_eq ; intros ; mysimp ; repeat pv_in.
Qed.
Used in an optimizing version of Alt. Note that we don't easily
have a way to determine whether r2 = r1 so that we can collapse
Alt r r into r. Try writing this and see what goes wrong...
Definition OptAlt_r t (r2:regexp t) : regexp t -> regexp t :=
match r2 in regexp t' return regexp t' -> regexp t' with
| Zero _ => fun r => r
| r => fun r1 => Alt r1 r
end.
match r2 in regexp t' return regexp t' -> regexp t' with
| Zero _ => fun r => r
| r => fun r1 => Alt r1 r
end.
Optimized version of Alt.
Definition OptAlt t (r1:regexp t) : regexp t -> regexp t :=
match r1 in regexp t' return regexp t' -> regexp t' with
| Zero _ => fun r => r
| r => fun r2 => OptAlt_r r2 r
end.
match r1 in regexp t' return regexp t' -> regexp t' with
| Zero _ => fun r => r
| r => fun r2 => OptAlt_r r2 r
end.
OptAlt r1 r2 is equivalent to Alt r1 r2
Lemma opt_alt_r t (r1 r2:regexp t) : OptAlt_r r2 r1 [=] Alt r1 r2.
destruct r2 ; simpl ; try apply reg_eq_refl ; apply reg_eq_sym ; apply alt_zero_r.
Qed.
Lemma opt_alt : forall t1 (r1 r2: regexp t1), OptAlt r1 r2 [=] Alt r1 r2.
destruct r1 ; simpl ; intros ; try (apply opt_alt_r) ; apply reg_eq_sym ;
apply alt_zero_l.
Qed.
destruct r2 ; simpl ; try apply reg_eq_refl ; apply reg_eq_sym ; apply alt_zero_r.
Qed.
Lemma opt_alt : forall t1 (r1 r2: regexp t1), OptAlt r1 r2 [=] Alt r1 r2.
destruct r1 ; simpl ; intros ; try (apply opt_alt_r) ; apply reg_eq_sym ;
apply alt_zero_l.
Qed.
Optimizing version of Map.
Definition OptMap (t u: Set) (f: fn t u) (r1: regexp t) : regexp u :=
match r1 in regexp t' return regexp u with
| Zero _ => Zero _
| _ => Map f r1
end.
match r1 in regexp t' return regexp u with
| Zero _ => Zero _
| _ => Map f r1
end.
OptMap f r is equivalent to Map f r.
Lemma opt_map: forall (t u: Set) (f: fn t u) (r1: regexp t), OptMap f r1 [=] Map f r1.
unfold reg_eq ; destruct r1 ; simpl ; mysimp ; repeat pv_in.
Qed.
unfold reg_eq ; destruct r1 ; simpl ; mysimp ; repeat pv_in.
Qed.
Now we define what it means for a function to be a valid
parser. A function f: regexp t -> list char_p -> list (result_m t), is a
valid parser if, when given a regexp and a string of characters s, it
returns a list of values [v1,...,vn], such that { (s,vi) } is the
relation denoted by the regular expression.
Definition is_parser (f: forall t (r:regexp t),
wf_regexp r -> list char_p -> list t) : Prop :=
forall t (r: regexp t) (H:wf_regexp r) (cs: list char_p) (v:t),
([[r]] cs v -> In v (f t r H cs)) /\ (In v (f t r H cs) -> [[r]] cs v).
wf_regexp r -> list char_p -> list t) : Prop :=
forall t (r: regexp t) (H:wf_regexp r) (cs: list char_p) (v:t),
([[r]] cs v -> In v (f t r H cs)) /\ (In v (f t r H cs) -> [[r]] cs v).
Returns a regexp denoting { (null,v) | (null,v) in r }.
Fixpoint null t (r:regexp t) : regexp t :=
match r in regexp t' return regexp t' with
| Any => Zero _
| Char _ => Zero _
| Eps => Eps
| Zero _ => Zero _
| Alt t r1 r2 => OptAlt (null r1) (null r2)
| Cat t1 t2 r1 r2 => OptCat (null r1) (null r2)
| Star t _ => OptMap (Fn_empty_list t) Eps
| Map _ _ f r1 => OptMap f (null r1)
end.
match r in regexp t' return regexp t' with
| Any => Zero _
| Char _ => Zero _
| Eps => Eps
| Zero _ => Zero _
| Alt t r1 r2 => OptAlt (null r1) (null r2)
| Cat t1 t2 r1 r2 => OptCat (null r1) (null r2)
| Star t _ => OptMap (Fn_empty_list t) Eps
| Map _ _ f r1 => OptMap f (null r1)
end.
This is the heart of the algorithm. It returns a regexp denoting
{ (cs,v) | (c::cs,v) in r }.
Fixpoint deriv t (r:regexp t) (c:char_p) : regexp t :=
match r in regexp t' return regexp t' with
| Any => OptMap (Fn_const_char c) Eps
| Char c' => if char_eq c c' then OptMap (Fn_const_char c) Eps else Zero _
| Eps => Zero _
| Zero _ => Zero _
| Alt t r1 r2 => OptAlt (deriv r1 c) (deriv r2 c)
| Cat t1 t2 r1 r2 => OptAlt (OptCat (deriv r1 c) r2) (OptCat (null r1) (deriv r2 c))
| Star t r => OptMap (Fn_cons t) (OptCat (deriv r c) (Star r))
| Map _ _ f r1 => OptMap f (deriv r1 c)
end.
match r in regexp t' return regexp t' with
| Any => OptMap (Fn_const_char c) Eps
| Char c' => if char_eq c c' then OptMap (Fn_const_char c) Eps else Zero _
| Eps => Zero _
| Zero _ => Zero _
| Alt t r1 r2 => OptAlt (deriv r1 c) (deriv r2 c)
| Cat t1 t2 r1 r2 => OptAlt (OptCat (deriv r1 c) r2) (OptCat (null r1) (deriv r2 c))
| Star t r => OptMap (Fn_cons t) (OptCat (deriv r c) (Star r))
| Map _ _ f r1 => OptMap f (deriv r1 c)
end.
When we are done parsing using a regexp r, we are left with a regexp denoting
{ (null,v) | exists s.(s,v) in r }. This function computes all of the
values v in this set. To do so, it needs to be well-formed with respect to the
context.
Definition apply_null t (r:regexp t) : wf_regexp r -> list t.
refine (
fix apply_null t (r:regexp t) : wf_regexp r -> list t :=
match r in regexp t' return wf_regexp r -> list t' with
| Any => fun H => nil
| Char _ => fun H => nil
| Zero _ => fun H => nil
| Eps => fun H => tt::nil
| Star _ r => fun H => nil::nil
| Alt _ r1 r2 => fun H => (apply_null _ r1 _) ++ (apply_null _ r2 _)
| Cat _ _ r1 r2 =>
fun H =>
let res1 := apply_null _ r1 _ in
let res2 := apply_null _ r2 _ in
fold_right (fun v1 a => (map (fun v2 => (v1,v2)) res2) ++ a) nil res1
| Map _ _ f r1 => fun H => map (apply_fn f _) (apply_null _ r1 _)
end
) ; mysimp.
Defined.
Lemma InConcatMap A B (x:A) (y:B) :
forall xs, In x xs ->
forall ys, In y ys ->
In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs).
Proof.
induction xs ; intro H ; [ contradiction H | destruct H ] ; s ; apply in_or_app.
left ; clear IHxs ; induction ys ; [ contradiction H0 | destruct H0 ; s ].
Qed.
refine (
fix apply_null t (r:regexp t) : wf_regexp r -> list t :=
match r in regexp t' return wf_regexp r -> list t' with
| Any => fun H => nil
| Char _ => fun H => nil
| Zero _ => fun H => nil
| Eps => fun H => tt::nil
| Star _ r => fun H => nil::nil
| Alt _ r1 r2 => fun H => (apply_null _ r1 _) ++ (apply_null _ r2 _)
| Cat _ _ r1 r2 =>
fun H =>
let res1 := apply_null _ r1 _ in
let res2 := apply_null _ r2 _ in
fold_right (fun v1 a => (map (fun v2 => (v1,v2)) res2) ++ a) nil res1
| Map _ _ f r1 => fun H => map (apply_fn f _) (apply_null _ r1 _)
end
) ; mysimp.
Defined.
Lemma InConcatMap A B (x:A) (y:B) :
forall xs, In x xs ->
forall ys, In y ys ->
In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs).
Proof.
induction xs ; intro H ; [ contradiction H | destruct H ] ; s ; apply in_or_app.
left ; clear IHxs ; induction ys ; [ contradiction H0 | destruct H0 ; s ].
Qed.
Show that if (nil,v) is in r then apply_null r returns v as a result
Lemma ApplyNull1 t (r:regexp t) cs v H :
[[r]] cs v -> cs = nil -> In v (apply_null r H).
Proof.
induction 1 ; mysimp ; try congruence ; repeat
match goal with
| [ H : ?x ++ ?y = nil |- _ ] => generalize (app_eq_nil _ _ H) ; clear H ; mysimp
| [ |- In _ (fold_right _ nil _) ] => apply InConcatMap ; auto
| [ H : wf_regexp ?r, IH : forall _ : wf_regexp ?r, ?cs = nil -> _,
H1 : ?cs = nil |- _ ] =>
let l := fresh "l" in
generalize (IH H H1) ; generalize (apply_null r H) as l ; induction l ; s ;
left
| [ |- apply_fn ?f ?H1 _ = apply_fn ?f ?H2 _] =>
rewrite (proof_irrelevance H1 H2) ; auto
| [ H : None = Some _ |- _ ] => congruence
end.
Qed.
Lemma InConcatMap1 A B (x:A) (y:B) xs ys :
In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs) ->
In x xs.
Proof.
induction xs ; mysimp ; repeat
match goal with
| [ H : In _ _ |- _ ] => destruct (in_app_or _ _ _ H) ; [left | right ; eauto]
| [ H : In (?x, _) (map _ ?ys) |- _ ] =>
let l := fresh "l" in generalize H ; generalize ys as l;
induction l ; mysimp ; congruence
end.
Qed.
Lemma InConcatMap2 A B (x:A) (y:B) xs ys :
In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs) ->
In y ys.
Proof.
induction xs ; mysimp ;
match goal with
| [ H : In _ (map _ ?ys ++ _) |- _ ] =>
let H0 := fresh "H" in let l := fresh "l" in
generalize (in_app_or _ _ _ H) ; intro H0 ; destruct H0 ; auto ;
generalize H0 ; generalize ys as l ; induction l ; mysimp ; left ; congruence
end.
Qed.
[[r]] cs v -> cs = nil -> In v (apply_null r H).
Proof.
induction 1 ; mysimp ; try congruence ; repeat
match goal with
| [ H : ?x ++ ?y = nil |- _ ] => generalize (app_eq_nil _ _ H) ; clear H ; mysimp
| [ |- In _ (fold_right _ nil _) ] => apply InConcatMap ; auto
| [ H : wf_regexp ?r, IH : forall _ : wf_regexp ?r, ?cs = nil -> _,
H1 : ?cs = nil |- _ ] =>
let l := fresh "l" in
generalize (IH H H1) ; generalize (apply_null r H) as l ; induction l ; s ;
left
| [ |- apply_fn ?f ?H1 _ = apply_fn ?f ?H2 _] =>
rewrite (proof_irrelevance H1 H2) ; auto
| [ H : None = Some _ |- _ ] => congruence
end.
Qed.
Lemma InConcatMap1 A B (x:A) (y:B) xs ys :
In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs) ->
In x xs.
Proof.
induction xs ; mysimp ; repeat
match goal with
| [ H : In _ _ |- _ ] => destruct (in_app_or _ _ _ H) ; [left | right ; eauto]
| [ H : In (?x, _) (map _ ?ys) |- _ ] =>
let l := fresh "l" in generalize H ; generalize ys as l;
induction l ; mysimp ; congruence
end.
Qed.
Lemma InConcatMap2 A B (x:A) (y:B) xs ys :
In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs) ->
In y ys.
Proof.
induction xs ; mysimp ;
match goal with
| [ H : In _ (map _ ?ys ++ _) |- _ ] =>
let H0 := fresh "H" in let l := fresh "l" in
generalize (in_app_or _ _ _ H) ; intro H0 ; destruct H0 ; auto ;
generalize H0 ; generalize ys as l ; induction l ; mysimp ; left ; congruence
end.
Qed.
Show that if apply_null r returns v as a result, then (nil,v) is in r
Lemma ApplyNull2 t (r:regexp t) H v :
In v (apply_null r H) -> [[r]] nil v.
Proof.
induction r ; s ;
match goal with
| [ |- in_regexp (Cat _ _) _ _] =>
destruct v ; pv_in ; [ eapply IHr1 ; eauto ; eapply InConcatMap1 ; eauto |
eapply IHr2 ; eauto ; eapply InConcatMap2 ; eauto]
| [ H0 : In _ _ |- in_regexp (Alt _ _) _ _ ] =>
generalize (in_app_or _ _ _ H0) ; clear H0 ; intro H0 ; destruct H0 ;
[ apply Alt_left_i ; eapply IHr1 ; eauto
| eapply Alt_right_i ; eapply IHr2 ; eauto ]
| [ H:wf_regexp ?r, H0: In _ (map _ _) |- in_regexp (Map _ _) _ _ ] =>
let l := fresh "l" in
generalize (apply_null r H) (IHr H) H0 ; intro l ; induction l ;
clear IHr H0 ; mysimp ; eapply Map_i ; eauto
end.
Qed.
In v (apply_null r H) -> [[r]] nil v.
Proof.
induction r ; s ;
match goal with
| [ |- in_regexp (Cat _ _) _ _] =>
destruct v ; pv_in ; [ eapply IHr1 ; eauto ; eapply InConcatMap1 ; eauto |
eapply IHr2 ; eauto ; eapply InConcatMap2 ; eauto]
| [ H0 : In _ _ |- in_regexp (Alt _ _) _ _ ] =>
generalize (in_app_or _ _ _ H0) ; clear H0 ; intro H0 ; destruct H0 ;
[ apply Alt_left_i ; eapply IHr1 ; eauto
| eapply Alt_right_i ; eapply IHr2 ; eauto ]
| [ H:wf_regexp ?r, H0: In _ (map _ _) |- in_regexp (Map _ _) _ _ ] =>
let l := fresh "l" in
generalize (apply_null r H) (IHr H) H0 ; intro l ; induction l ;
clear IHr H0 ; mysimp ; eapply Map_i ; eauto
end.
Qed.
Show that the optimizing constructors preserve well-formedness.
Lemma wf_cat_opt : forall t1 t2 (r1:regexp t1) (r2:regexp t2),
wf_regexp r1 -> wf_regexp r2 -> wf_regexp (OptCat r1 r2).
Proof.
destruct r1 ; mysimp ; destruct r2 ; mysimp.
Qed.
Lemma wf_alt_opt_r : forall t (r2 r1:regexp t),
wf_regexp r2 -> wf_regexp r1 -> wf_regexp (OptAlt_r r2 r1).
Proof.
destruct r2 ; mysimp.
Qed.
Lemma wf_alt_opt : forall t (r1 r2:regexp t),
wf_regexp r1 -> wf_regexp r2 -> wf_regexp (OptAlt r1 r2).
Proof.
destruct r1 ; mysimp ; apply wf_alt_opt_r ; mysimp.
Qed.
Lemma wf_map_opt : forall t u (f: fn t u) (r1: regexp t),
wf_regexp r1 -> wf_fn f -> wf_regexp (OptMap f r1).
Proof.
destruct r1; mysimp.
Qed.
wf_regexp r1 -> wf_regexp r2 -> wf_regexp (OptCat r1 r2).
Proof.
destruct r1 ; mysimp ; destruct r2 ; mysimp.
Qed.
Lemma wf_alt_opt_r : forall t (r2 r1:regexp t),
wf_regexp r2 -> wf_regexp r1 -> wf_regexp (OptAlt_r r2 r1).
Proof.
destruct r2 ; mysimp.
Qed.
Lemma wf_alt_opt : forall t (r1 r2:regexp t),
wf_regexp r1 -> wf_regexp r2 -> wf_regexp (OptAlt r1 r2).
Proof.
destruct r1 ; mysimp ; apply wf_alt_opt_r ; mysimp.
Qed.
Lemma wf_map_opt : forall t u (f: fn t u) (r1: regexp t),
wf_regexp r1 -> wf_fn f -> wf_regexp (OptMap f r1).
Proof.
destruct r1; mysimp.
Qed.
Show that null r is well-formed if r is well-formed.
Lemma wf_null : forall t (r:regexp t), wf_regexp r -> wf_regexp (null r).
induction r ; mysimp ;
(apply wf_cat_opt || apply wf_alt_opt || apply wf_map_opt) ; auto.
Qed.
induction r ; mysimp ;
(apply wf_cat_opt || apply wf_alt_opt || apply wf_map_opt) ; auto.
Qed.
Show that deriv r c is well-formed if r is well-formed.
Lemma wf_deriv : forall t (r:regexp t) c, wf_regexp r -> wf_regexp (deriv r c).
Proof.
induction r; mysimp ;
repeat (apply wf_cat_opt || apply wf_alt_opt || apply wf_null || apply wf_map_opt) ;
simpl ; auto.
Qed.
Proof.
induction r; mysimp ;
repeat (apply wf_cat_opt || apply wf_alt_opt || apply wf_null || apply wf_map_opt) ;
simpl ; auto.
Qed.
Lift derivative parsing from characters to strings
Fixpoint deriv_parse' t (r:regexp t) (cs:list char_p) : regexp t :=
match cs with
| nil => r
| c::cs' => deriv_parse' (deriv r c) cs'
end.
match cs with
| nil => r
| c::cs' => deriv_parse' (deriv r c) cs'
end.
Derivatives with respect to strings preserve well-formedness.
Lemma wf_derivs : forall cs t (r:regexp t), wf_regexp r -> wf_regexp (deriv_parse' r cs).
induction cs ; simpl ; intros ; auto. apply IHcs. apply wf_deriv. auto.
Qed.
induction cs ; simpl ; intros ; auto. apply IHcs. apply wf_deriv. auto.
Qed.
Finally, the derivative-based parser simply runs through the input, calculating
derivatives based on the characters it sees, and calls apply_null when it gets
to the end of the input.
Definition deriv_parse t (r:regexp t) (H:wf_regexp r) cs : list t :=
apply_null (deriv_parse' r cs) (wf_derivs cs r H).
apply_null (deriv_parse' r cs) (wf_derivs cs r H).
Tactic for helping to reason about the optimizing constructors.
Ltac pv_opt :=
match goal with
| [ |- in_regexp (OptAlt ?r1 ?r2) ?cs ?v ] =>
apply (proj2 (opt_alt r1 r2 cs v))
| [ |- in_regexp (OptCat ?r1 ?r2) ?cs ?v ] =>
apply (proj2 (opt_cat r1 r2 cs v))
| [ |- in_regexp (OptMap ?f ?r) ?cs ?v ] =>
apply (proj2 (opt_map f r cs v)) ; try (eapply Map_i ; eauto ; fail)
| [ H : ?x ++ ?y = nil |- _] =>
generalize (app_eq_nil _ _ H) ; clear H ; mysimp
| [ H : nil = ?x ++ ?y |- _] =>
generalize (app_eq_nil _ _ (eq_sym H)) ; clear H ; mysimp
| [ H : in_regexp (OptCat ?r1 ?r2) ?cs ?v |- _] =>
generalize (proj1 (opt_cat r1 r2 cs v) H) ; clear H ; intro H
| [ H : in_regexp (OptAlt ?r1 ?r2) ?cs ?v |- _] =>
generalize (proj1 (opt_alt r1 r2 cs v) H) ; clear H ; intro H
| [ H : in_regexp (OptMap ?f ?r) ?cs ?v |- _] =>
generalize (proj1 (opt_map f r cs v) H) ; clear H ; mysimp
| [ v : prod _ _ |- _ ] => destruct v
| [ H : (_,_) = (_,_) |- _ ] => injection H ; clear H ; mysimp
| _ => auto with dfa
end.
match goal with
| [ |- in_regexp (OptAlt ?r1 ?r2) ?cs ?v ] =>
apply (proj2 (opt_alt r1 r2 cs v))
| [ |- in_regexp (OptCat ?r1 ?r2) ?cs ?v ] =>
apply (proj2 (opt_cat r1 r2 cs v))
| [ |- in_regexp (OptMap ?f ?r) ?cs ?v ] =>
apply (proj2 (opt_map f r cs v)) ; try (eapply Map_i ; eauto ; fail)
| [ H : ?x ++ ?y = nil |- _] =>
generalize (app_eq_nil _ _ H) ; clear H ; mysimp
| [ H : nil = ?x ++ ?y |- _] =>
generalize (app_eq_nil _ _ (eq_sym H)) ; clear H ; mysimp
| [ H : in_regexp (OptCat ?r1 ?r2) ?cs ?v |- _] =>
generalize (proj1 (opt_cat r1 r2 cs v) H) ; clear H ; intro H
| [ H : in_regexp (OptAlt ?r1 ?r2) ?cs ?v |- _] =>
generalize (proj1 (opt_alt r1 r2 cs v) H) ; clear H ; intro H
| [ H : in_regexp (OptMap ?f ?r) ?cs ?v |- _] =>
generalize (proj1 (opt_map f r cs v) H) ; clear H ; mysimp
| [ v : prod _ _ |- _ ] => destruct v
| [ H : (_,_) = (_,_) |- _ ] => injection H ; clear H ; mysimp
| _ => auto with dfa
end.
null is correct.
Lemma Null1 : forall t (r:regexp t) cs v, [[r]] cs v -> cs = nil -> [[null r]] cs v.
Proof.
induction 1 ; mysimp ; try congruence ; auto with dfa ; repeat pv_opt ; pv_in.
Qed.
Lemma Null2 : forall t (r:regexp t) cs v, [[null r]] cs v -> cs = nil -> [[r]] cs v.
Proof.
induction r ; simpl ; intros ; try in_inv ; (repeat (pv_opt ; subst ; pv_in)) ;
try apply nil ; eapply @Map_i ; eauto.
Qed.
Proof.
induction 1 ; mysimp ; try congruence ; auto with dfa ; repeat pv_opt ; pv_in.
Qed.
Lemma Null2 : forall t (r:regexp t) cs v, [[null r]] cs v -> cs = nil -> [[r]] cs v.
Proof.
induction r ; simpl ; intros ; try in_inv ; (repeat (pv_opt ; subst ; pv_in)) ;
try apply nil ; eapply @Map_i ; eauto.
Qed.
deriv is correct part 1.
Lemma Deriv1 : forall t (r:regexp t) c cs v, [[r]] (c::cs) v -> [[deriv r c]] cs v.
Proof.
induction r ; simpl ; intros ; try in_inv ;
match goal with
| [ H : in_regexp (Star _) _ _ |- _] =>
generalize (StarInv H); clear H ; mysimp ; subst
| _ => idtac
end ; try congruence ;
repeat (pv_opt ; match goal with
| [ _ : (?c :: ?cs) = (?x ++ _) |- in_regexp (Alt _ _) ?cs _] =>
destruct x ; s ; [eapply Alt_right_i | eapply Alt_left_i] ; pv_opt ; pv_opt
| [ H : in_regexp ?r1 nil _ |- in_regexp (Cat (null ?r1) _) ?cs _ ] =>
let H := fresh "H" in
assert (H:cs=nil++cs) ; [ auto | rewrite H ] ; pv_in ; apply Null1 ; auto
| [ |- in_regexp (Map (Fn_cons ?t) ?r) ?cs (?x1::?x2) ] =>
apply (@Map_i _ _ (Fn_cons t) r cs (x1,x2) (x0::x2) I (eq_refl _))
| [ H1: ?x <> nil, H2: _::?cs = ?x ++ nil |- _] => rewrite app_nil_r in H2 ; subst ;
rewrite (app_nil_end cs) ; pv_opt ; pv_in ; pv_in
| [ H2 : _::?cs = ?x ++ ?x1 |- _ ] =>
destruct x ; try congruence ; simpl in H2 ; injection H2 ;
s ; apply Cat_i ; auto with dfa
| _ => auto with dfa
end) ; pv_in.
Qed.
Proof.
induction r ; simpl ; intros ; try in_inv ;
match goal with
| [ H : in_regexp (Star _) _ _ |- _] =>
generalize (StarInv H); clear H ; mysimp ; subst
| _ => idtac
end ; try congruence ;
repeat (pv_opt ; match goal with
| [ _ : (?c :: ?cs) = (?x ++ _) |- in_regexp (Alt _ _) ?cs _] =>
destruct x ; s ; [eapply Alt_right_i | eapply Alt_left_i] ; pv_opt ; pv_opt
| [ H : in_regexp ?r1 nil _ |- in_regexp (Cat (null ?r1) _) ?cs _ ] =>
let H := fresh "H" in
assert (H:cs=nil++cs) ; [ auto | rewrite H ] ; pv_in ; apply Null1 ; auto
| [ |- in_regexp (Map (Fn_cons ?t) ?r) ?cs (?x1::?x2) ] =>
apply (@Map_i _ _ (Fn_cons t) r cs (x1,x2) (x0::x2) I (eq_refl _))
| [ H1: ?x <> nil, H2: _::?cs = ?x ++ nil |- _] => rewrite app_nil_r in H2 ; subst ;
rewrite (app_nil_end cs) ; pv_opt ; pv_in ; pv_in
| [ H2 : _::?cs = ?x ++ ?x1 |- _ ] =>
destruct x ; try congruence ; simpl in H2 ; injection H2 ;
s ; apply Cat_i ; auto with dfa
| _ => auto with dfa
end) ; pv_in.
Qed.
If null r matches cs returning v, then cs must be empty
Lemma NullNil : forall t (r:regexp t) cs v, [[null r]] cs v -> cs = nil.
Proof.
induction r ; mysimp ; try in_inv ; pv_opt ; try in_inv ; repeat
match goal with
| [ IHr : forall cs v, in_regexp (null ?r) _ _ -> _ = _,
H : in_regexp (null ?r) _ _ |- _] =>
rewrite (IHr _ _ H) ; clear IHr ; auto
end.
Qed.
Proof.
induction r ; mysimp ; try in_inv ; pv_opt ; try in_inv ; repeat
match goal with
| [ IHr : forall cs v, in_regexp (null ?r) _ _ -> _ = _,
H : in_regexp (null ?r) _ _ |- _] =>
rewrite (IHr _ _ H) ; clear IHr ; auto
end.
Qed.
deriv is correct part 2.
Lemma Deriv2 : forall t (r:regexp t) c cs v, [[deriv r c]] cs v -> [[r]] (c::cs) v.
Proof.
induction r ; simpl ; intros ; repeat
(match goal with
| [ H : context[char_eq ?c1 ?c2] |- _ ] => destruct (char_eq c1 c2) ; s
| [ H : in_regexp (null _) _ _ |- in_regexp (Cat _ _) (?c::?x++?x0) _ ] =>
let H1 := fresh "H" in
generalize (NullNil _ H) ; s ;
assert (H1:c::x0 = nil ++ (c::x0)) ; [auto | rewrite H1] ;
pv_in ; apply Null2 ; auto
| [ H: in_regexp (deriv ?r1 _) ?x ?x1 |-
in_regexp (Cat ?r1 _) (_::?x++_) (?x1,_) ] => pv_in
| [ |- in_regexp (Star _) (?c :: ?x1 ++ ?x2) _ ] =>
let H := fresh "H" in
assert (H:c :: x1 ++ x2 = (c::x1) ++ x2) ; [ auto | rewrite H ] ;
eapply Star_cat_i ; eauto ; congruence
| [ |- in_regexp (Map _ _) _ (apply_fn _ _ _) ] => eapply Map_i ; eauto
| [ H : in_regexp (Star _) _ _ |- _ ] => fail 1
| _ => pv_opt ; in_inv
end).
Qed.
Proof.
induction r ; simpl ; intros ; repeat
(match goal with
| [ H : context[char_eq ?c1 ?c2] |- _ ] => destruct (char_eq c1 c2) ; s
| [ H : in_regexp (null _) _ _ |- in_regexp (Cat _ _) (?c::?x++?x0) _ ] =>
let H1 := fresh "H" in
generalize (NullNil _ H) ; s ;
assert (H1:c::x0 = nil ++ (c::x0)) ; [auto | rewrite H1] ;
pv_in ; apply Null2 ; auto
| [ H: in_regexp (deriv ?r1 _) ?x ?x1 |-
in_regexp (Cat ?r1 _) (_::?x++_) (?x1,_) ] => pv_in
| [ |- in_regexp (Star _) (?c :: ?x1 ++ ?x2) _ ] =>
let H := fresh "H" in
assert (H:c :: x1 ++ x2 = (c::x1) ++ x2) ; [ auto | rewrite H ] ;
eapply Star_cat_i ; eauto ; congruence
| [ |- in_regexp (Map _ _) _ (apply_fn _ _ _) ] => eapply Map_i ; eauto
| [ H : in_regexp (Star _) _ _ |- _ ] => fail 1
| _ => pv_opt ; in_inv
end).
Qed.
First half of correctness for deriv_parse'.
Lemma Deriv'1 cs t (r:regexp t) v :
[[deriv_parse' r cs]] nil v -> [[r]] cs v.
Proof.
induction cs ; mysimp. apply Deriv2. apply IHcs. auto.
Qed.
[[deriv_parse' r cs]] nil v -> [[r]] cs v.
Proof.
induction cs ; mysimp. apply Deriv2. apply IHcs. auto.
Qed.
Second half of correctness for deriv_parse'.
Lemma Deriv'2 cs t (r:regexp t) v :
[[r]] cs v -> [[deriv_parse' r cs]] nil v.
Proof.
induction cs ; mysimp. apply IHcs. apply Deriv1 ; auto.
Qed.
[[r]] cs v -> [[deriv_parse' r cs]] nil v.
Proof.
induction cs ; mysimp. apply IHcs. apply Deriv1 ; auto.
Qed.
First half of correctness for deriv_parse.
Lemma DerivParse1 cs t (r:regexp t) H vs :
deriv_parse r H cs = vs -> forall v, In v vs -> [[r]] cs v.
Proof.
unfold deriv_parse. intros. subst.
generalize (ApplyNull2 _ _ _ H1). apply Deriv'1.
Qed.
deriv_parse r H cs = vs -> forall v, In v vs -> [[r]] cs v.
Proof.
unfold deriv_parse. intros. subst.
generalize (ApplyNull2 _ _ _ H1). apply Deriv'1.
Qed.
Second half of correctness for deriv_parse.
Lemma DerivParse2 cs t (r:regexp t) v H :
[[r]] cs v -> In v (deriv_parse r H cs).
Proof.
unfold deriv_parse. intros. eapply ApplyNull1 ; auto. eapply Deriv'2. auto.
Qed.
[[r]] cs v -> In v (deriv_parse r H cs).
Proof.
unfold deriv_parse. intros. eapply ApplyNull1 ; auto. eapply Deriv'2. auto.
Qed.
deriv_parse is correct
Theorem DerivParse_is_parser : is_parser deriv_parse.
Proof.
unfold is_parser ; mysimp ; [apply DerivParse2 | eapply DerivParse1] ; eauto.
Qed.
End FNMAP.
Proof.
unfold is_parser ; mysimp ; [apply DerivParse2 | eapply DerivParse1] ; eauto.
Qed.
End FNMAP.
Now we need to translate our external representation, parser to
our internal representation regexp and build an appropriate ctxt mapping function names to functions of the right type.
Add a new function to the end of the context and return its position as
a "fresh" function name, along with the new context.
Definition extend_state(s:ctxt_t) t1 t2 (f:fn_result_m(t1,t2)) : (fn_name * ctxt_t) :=
(length s, s ++ (existT fn_result_m (t1,t2) f)::nil).
(length s, s ++ (existT fn_result_m (t1,t2) f)::nil).
Convert a parser with inlined functions to a regexp and a function map.
Fixpoint par2reg t (p:parser t)(s:ctxt_t) : (regexp t) * ctxt_t :=
match p in parser t return (regexp t) * ctxt_t with
| Any_p => (Any, s)
| Char_p b => (Char b, s)
| Eps_p => (Eps, s)
| Cat_p t1 t2 p1 p2 =>
let (r1,s1) := par2reg p1 s in
let (r2,s2) := par2reg p2 s1 in
(Cat r1 r2, s2)
| Zero_p t => (Zero t, s)
| Alt_p t p1 p2 =>
let (r1,s1) := par2reg p1 s in
let (r2,s2) := par2reg p2 s1 in
(Alt r1 r2, s2)
| Star_p t p1 =>
let (r1,s1) := par2reg p1 s in
(Star r1, s1)
| Map_p t1 t2 f p =>
let (r,s1) := par2reg p s in
let (n,s2) := extend_state s1 f in
(@Map t1 t2 (Fn_name n t1 t2) r, s2)
end.
match p in parser t return (regexp t) * ctxt_t with
| Any_p => (Any, s)
| Char_p b => (Char b, s)
| Eps_p => (Eps, s)
| Cat_p t1 t2 p1 p2 =>
let (r1,s1) := par2reg p1 s in
let (r2,s2) := par2reg p2 s1 in
(Cat r1 r2, s2)
| Zero_p t => (Zero t, s)
| Alt_p t p1 p2 =>
let (r1,s1) := par2reg p1 s in
let (r2,s2) := par2reg p2 s1 in
(Alt r1 r2, s2)
| Star_p t p1 =>
let (r1,s1) := par2reg p1 s in
(Star r1, s1)
| Map_p t1 t2 f p =>
let (r,s1) := par2reg p s in
let (n,s2) := extend_state s1 f in
(@Map t1 t2 (Fn_name n t1 t2) r, s2)
end.
Initial state for the translation.
Top-level translation of parsers to regexps.
Tactic to propagate information about the translation.
Ltac unfold_par2reg :=
match goal with
| [ |- context[par2reg ?p ?s] ] =>
let H := fresh "H" in
let x := fresh "x" in
let r := fresh "r" in
let s := fresh "s" in
assert (H : exists x, x = par2reg p s) ; [eauto | idtac] ;
destruct H as [x H];
rewrite <- H in * ; destruct x as [r s]
end.
match goal with
| [ |- context[par2reg ?p ?s] ] =>
let H := fresh "H" in
let x := fresh "x" in
let r := fresh "r" in
let s := fresh "s" in
assert (H : exists x, x = par2reg p s) ; [eauto | idtac] ;
destruct H as [x H];
rewrite <- H in * ; destruct x as [r s]
end.
We need to support weakening of contexts -- i.e., adding more
bindings to the end of the context. To that end, we define a
partial order on contexts and show that the translation respects
this form of weakening.
Definition ctxt_leq(c1 c2:ctxt_t) : Prop :=
exists c:ctxt_t, c2 = c1 ++ c.
Lemma ctxt_leq_trans : forall c1 c2 c3,
ctxt_leq c1 c2 -> ctxt_leq c2 c3 -> ctxt_leq c1 c3.
Proof.
unfold ctxt_leq ; s. rewrite app_ass. eauto.
Qed.
Lemma ctxt_leq_refl : forall c, ctxt_leq c c.
unfold ctxt_leq ; s. exists nil. apply app_nil_end.
Qed.
Hint Resolve ctxt_leq_refl : dfa.
Lemma ctxt_ext : forall c c', ctxt_leq c (c ++ c').
unfold ctxt_leq ; s. eauto.
Qed.
Hint Resolve ctxt_ext : dfa.
exists c:ctxt_t, c2 = c1 ++ c.
Lemma ctxt_leq_trans : forall c1 c2 c3,
ctxt_leq c1 c2 -> ctxt_leq c2 c3 -> ctxt_leq c1 c3.
Proof.
unfold ctxt_leq ; s. rewrite app_ass. eauto.
Qed.
Lemma ctxt_leq_refl : forall c, ctxt_leq c c.
unfold ctxt_leq ; s. exists nil. apply app_nil_end.
Qed.
Hint Resolve ctxt_leq_refl : dfa.
Lemma ctxt_ext : forall c c', ctxt_leq c (c ++ c').
unfold ctxt_leq ; s. eauto.
Qed.
Hint Resolve ctxt_ext : dfa.
Simplify reasoning about inductive hypotheses that depend upon a context.
Ltac p2rsimp :=
match goal with
| [ |- context[length(?x ++ ?y)] ] => rewrite (@app_length _ x y)
| [ IH : forall _:ctxt_t, _, H : _ = par2reg ?p ?s |- _ ] =>
generalize (IH s) ;
match goal with
| [ |- context[par2reg ?p ?s] ] =>
let H' := fresh "H" in generalize (IH s) ; clear IH ; rewrite <- H ; simpl ;
intro H'
| _ => fail
end
| _ => idtac
end.
match goal with
| [ |- context[length(?x ++ ?y)] ] => rewrite (@app_length _ x y)
| [ IH : forall _:ctxt_t, _, H : _ = par2reg ?p ?s |- _ ] =>
generalize (IH s) ;
match goal with
| [ |- context[par2reg ?p ?s] ] =>
let H' := fresh "H" in generalize (IH s) ; clear IH ; rewrite <- H ; simpl ;
intro H'
| _ => fail
end
| _ => idtac
end.
The translation only results in a greater or equal ctxt
Lemma p2r_extends : forall t (p:parser t) (c:ctxt_t), ctxt_leq c (snd (par2reg p c)).
Proof.
unfold ctxt_leq ; induction p ; simpl ; unfold extend_state ; intros ;
repeat unfold_par2reg ; simpl ;
try (exists nil ; mysimp ; fail) ; repeat p2rsimp ; s ;
try rewrite app_ass ; econstructor ; eauto.
Qed.
Proof.
unfold ctxt_leq ; induction p ; simpl ; unfold extend_state ; intros ;
repeat unfold_par2reg ; simpl ;
try (exists nil ; mysimp ; fail) ; repeat p2rsimp ; s ;
try rewrite app_ass ; econstructor ; eauto.
Qed.
Functions stay well-typed under greater or equal contexts.
Lemma extends_wf_fn : forall t1 t2 (f:fn t1 t2) c c', ctxt_leq c c' ->
wf_fn c f -> wf_fn c' f.
Proof.
unfold ctxt_leq ; s ; destruct f ; auto ; simpl in * ; generalize c H0 ; clear c H0 ;
unfold fn_result, fn_result' in * ; induction f ; destruct c ;
[ mysimp | mysimp | mysimp | intro ] ; try congruence ;
simpl in *. destruct H0. generalize (IHf _ (conj (lt_S_n _ _ H) H0)). mysimp.
Qed.
wf_fn c f -> wf_fn c' f.
Proof.
unfold ctxt_leq ; s ; destruct f ; auto ; simpl in * ; generalize c H0 ; clear c H0 ;
unfold fn_result, fn_result' in * ; induction f ; destruct c ;
[ mysimp | mysimp | mysimp | intro ] ; try congruence ;
simpl in *. destruct H0. generalize (IHf _ (conj (lt_S_n _ _ H) H0)). mysimp.
Qed.
Regexps stay well-typed under greater or equal contexts.
Lemma extends_wf_regexp : forall t (r:regexp t) c c',
ctxt_leq c c' -> wf_regexp c r -> wf_regexp c' r.
Proof.
induction r ; simpl ; s. eapply extends_wf_fn ; eauto. econstructor ; eauto.
Qed.
ctxt_leq c c' -> wf_regexp c r -> wf_regexp c' r.
Proof.
induction r ; simpl ; s. eapply extends_wf_fn ; eauto. econstructor ; eauto.
Qed.
Tactic for remembering a translated sub-term is well-formed.
Ltac p2rext :=
match goal with
| [ H : (?r,?s2) = par2reg ?p ?s |- _ ] =>
generalize (p2r_extends p s) ; rewrite <- H ; simpl ; intro ; clear H
end.
Lemma FnResult : forall s p r,
fn_result (s ++ existT fn_result_m p r :: nil) (length s) = Some p.
Proof.
induction s ; mysimp.
Qed.
Hint Resolve FnResult.
match goal with
| [ H : (?r,?s2) = par2reg ?p ?s |- _ ] =>
generalize (p2r_extends p s) ; rewrite <- H ; simpl ; intro ; clear H
end.
Lemma FnResult : forall s p r,
fn_result (s ++ existT fn_result_m p r :: nil) (length s) = Some p.
Proof.
induction s ; mysimp.
Qed.
Hint Resolve FnResult.
The translation results in a well-formed regexp.
Lemma p2r_wf : forall t (p:parser t) c, wf_regexp (snd (par2reg p c)) (fst (par2reg p c)).
Proof.
induction p ; mysimp ; repeat unfold_par2reg ; repeat p2rsimp ; mysimp ; p2rext ;
try (eapply (@extends_wf_regexp _ _ s); eauto) ; try econstructor ; eauto with arith.
Qed.
Proof.
induction p ; mysimp ; repeat unfold_par2reg ; repeat p2rsimp ; mysimp ; p2rext ;
try (eapply (@extends_wf_regexp _ _ s); eauto) ; try econstructor ; eauto with arith.
Qed.
Applying the same function in an extended environment yields the same result.
Lemma extend_apply : forall x t1 t2 (f:fn t1 t2) s (H:wf_fn s f) v1 H',
apply_fn (s ++ x) f H' v1 = apply_fn s f H v1.
Proof.
destruct f ; auto. simpl.
mysimp. generalize f l0 e0 l e ; clear f l0 e0 l e.
induction s ; destruct f ; mysimp ; unfold fn_result, fn_result' in * |- ; simpl in * ;
try congruence. destruct a. rewrite (proof_irrelevance e0 e) in *. auto.
Qed.
Hint Resolve extend_apply : dfa.
apply_fn (s ++ x) f H' v1 = apply_fn s f H v1.
Proof.
destruct f ; auto. simpl.
mysimp. generalize f l0 e0 l e ; clear f l0 e0 l e.
induction s ; destruct f ; mysimp ; unfold fn_result, fn_result' in * |- ; simpl in * ;
try congruence. destruct a. rewrite (proof_irrelevance e0 e) in *. auto.
Qed.
Hint Resolve extend_apply : dfa.
in_regexp respects weakening on states.
Lemma in_leq : forall t (r:regexp t) s cs v,
in_regexp s r cs v ->
forall s', ctxt_leq s s' -> in_regexp s' r cs v.
Proof.
induction 1 ; s ;
try ((constructor ; auto with dfa ; fail) || (apply Alt_right_i ; auto with dfa)).
apply (Map_i(v1:=v1) f (@extends_wf_fn t1 t2 f _ _ (ctxt_ext s x) H)) ; auto with dfa.
Qed.
in_regexp s r cs v ->
forall s', ctxt_leq s s' -> in_regexp s' r cs v.
Proof.
induction 1 ; s ;
try ((constructor ; auto with dfa ; fail) || (apply Alt_right_i ; auto with dfa)).
apply (Map_i(v1:=v1) f (@extends_wf_fn t1 t2 f _ _ (ctxt_ext s x) H)) ; auto with dfa.
Qed.
Newly-allocated functions are well-formed with respect to the resulting context.
Lemma new_fn_wf : forall t1 t2 f s, wf_fn (s ++ existT fn_result_m (t1,t2) f :: nil)
(Fn_name (length s) t1 t2).
Proof.
induction s; mysimp.
Qed.
Hint Resolve new_fn_wf : dfa.
(Fn_name (length s) t1 t2).
Proof.
induction s; mysimp.
Qed.
Hint Resolve new_fn_wf : dfa.
If (cs,v) is in [p] then (cs,v) is in [r] under the output context s.
Lemma p2r_ok : forall t (p:parser t) cs v,
in_parser p cs v ->
forall s,
in_regexp (snd (par2reg p s)) (fst (par2reg p s)) cs v.
Proof.
induction 1 ; simpl ; intro s ; try (constructor ; fail) ;
repeat unfold_par2reg ; simpl in * ; repeat p2rsimp ; repeat p2rext ; intros ;
try (econstructor ; eauto ; eapply in_leq ; eauto ; fail) ||
(apply Alt_right_i ; auto).
apply (Map_i(v1:=v) (Fn_name (length s0) t1 t2) (new_fn_wf f s0)).
generalize s0 (new_fn_wf f s0). induction s1 ; mysimp ;
unfold fn_result, fn_result' in * ;
simpl in *. rewrite (proof_irrelevance e (eq_refl _)). auto.
generalize (IHs1 (conj (lt_S_n _ _ l) e)). clear IHs1. auto.
eapply in_leq ; eauto. auto with dfa.
Qed.
in_parser p cs v ->
forall s,
in_regexp (snd (par2reg p s)) (fst (par2reg p s)) cs v.
Proof.
induction 1 ; simpl ; intro s ; try (constructor ; fail) ;
repeat unfold_par2reg ; simpl in * ; repeat p2rsimp ; repeat p2rext ; intros ;
try (econstructor ; eauto ; eapply in_leq ; eauto ; fail) ||
(apply Alt_right_i ; auto).
apply (Map_i(v1:=v) (Fn_name (length s0) t1 t2) (new_fn_wf f s0)).
generalize s0 (new_fn_wf f s0). induction s1 ; mysimp ;
unfold fn_result, fn_result' in * ;
simpl in *. rewrite (proof_irrelevance e (eq_refl _)). auto.
generalize (IHs1 (conj (lt_S_n _ _ l) e)). clear IHs1. auto.
eapply in_leq ; eauto. auto with dfa.
Qed.
If (cs,v) is in [r] under context s2, and r is well-formed with respect to
context s1, and s1 <= s2, then (cs,v) is in [r] under context s1.
Lemma extends_in :
forall t (r:regexp t) cs v s1 s2,
in_regexp s2 r cs v ->
wf_regexp s1 r ->
ctxt_leq s1 s2 ->
in_regexp s1 r cs v.
Proof.
induction 1 ; simpl ; intros ; mysimp ; (econstructor ; eauto ; fail) ||
(eapply Alt_right_i ; eauto) ||
(apply (@Map_i s1 t1 t2 f r cs v1 v2 H4)) ; auto ; s.
Qed.
Hint Resolve extends_in : dfa.
forall t (r:regexp t) cs v s1 s2,
in_regexp s2 r cs v ->
wf_regexp s1 r ->
ctxt_leq s1 s2 ->
in_regexp s1 r cs v.
Proof.
induction 1 ; simpl ; intros ; mysimp ; (econstructor ; eauto ; fail) ||
(eapply Alt_right_i ; eauto) ||
(apply (@Map_i s1 t1 t2 f r cs v1 v2 H4)) ; auto ; s.
Qed.
Hint Resolve extends_in : dfa.
Used for induction over proofs of (cs,v) in Star r.
Fixpoint in_unwind s (t:Set) (r:regexp t) n cs (v:list t) : Prop :=
match n with
| 0 => cs = nil /\ v = nil
| S m =>
exists cs1, exists cs2,
exists v1, exists v2,
cs = cs1 ++ cs2 /\ v = (v1::v2) /\
cs1 <> nil /\
in_regexp s r cs1 v1 /\ in_unwind s r m cs2 v2
end.
match n with
| 0 => cs = nil /\ v = nil
| S m =>
exists cs1, exists cs2,
exists v1, exists v2,
cs = cs1 ++ cs2 /\ v = (v1::v2) /\
cs1 <> nil /\
in_regexp s r cs1 v1 /\ in_unwind s r m cs2 v2
end.
If (cs,v) is in Star r then there exists some n such that
(cs,v) is in the nth unwinding of r.
This gives us an easy inner induction when reasoning about the
meaning of Star r, though it's not strictly necessary.
Lemma star_rep' : forall s (t:Set) (r:regexp t) cs (v:t),
in_regexp s r cs v ->
match r in regexp t' return t' -> Prop with
| Star _ r1 => fun v => exists n, in_unwind s r1 n cs v
| _ => fun _ => True
end v.
Proof.
induction 1 ; simpl ; auto ;
[exists 0 ; simpl ; auto | idtac ] ;
let H := fresh "H" in let n := fresh "n" in destruct IHin_regexp2 as [n H];
exists (S n) ; repeat econstructor ; eauto.
Qed.
Lemma star_rep : forall s (t:Set) (r:regexp t) cs (v:list t),
in_regexp s (Star r) cs v -> exists n, in_unwind s r n cs v.
Proof.
intros s t r cs v H. apply (star_rep' H).
Qed.
in_regexp s r cs v ->
match r in regexp t' return t' -> Prop with
| Star _ r1 => fun v => exists n, in_unwind s r1 n cs v
| _ => fun _ => True
end v.
Proof.
induction 1 ; simpl ; auto ;
[exists 0 ; simpl ; auto | idtac ] ;
let H := fresh "H" in let n := fresh "n" in destruct IHin_regexp2 as [n H];
exists (S n) ; repeat econstructor ; eauto.
Qed.
Lemma star_rep : forall s (t:Set) (r:regexp t) cs (v:list t),
in_regexp s (Star r) cs v -> exists n, in_unwind s r n cs v.
Proof.
intros s t r cs v H. apply (star_rep' H).
Qed.
It's interesting to try to define star_rep without the
match and still get the proof to go through. We can't
do induction on in_regexp s (Star r1) cs v directly because
we need to limit the arguments to variables (including
the return type t. And notice that we had to use the
convoy pattern to refine v's type once we know t must
be a list t' for some t'.
This is the key lemma regarding the translation: If (cs,v) is in r under
context s, where (r,s) = par2reg p, then (cs,v) is in p.
Lemma r2p_ok : forall t (p:parser t) s cs v,
in_regexp (snd (par2reg p s)) (fst (par2reg p s)) cs v ->
in_parser p cs v.
Proof.
induction p ; simpl ; intros s cs v ; try (repeat unfold_par2reg) ;
simpl in * ; intros ; (generalize (AnyInv H) || generalize (CharInv H) ||
generalize (EpsInv H) || generalize (CatInv H1) || generalize (AltInv H1) ||
generalize (MapInv H0) || generalize (ZeroInv H) || idtac) ; s ;
try (econstructor ; eauto ; fail) ;
repeat match goal with
| [ IH : forall _ _ _, in_regexp (snd (par2reg ?p _)) _ _ _ -> _,
H : _ = par2reg ?p ?s |- _ ] =>
generalize (IH s) ; generalize (p2r_extends p s) ; generalize (p2r_wf p s) ;
rewrite <- H ; simpl ; clear IH ; intros
end ;
try (econstructor ; eauto with dfa ; fail) || (eapply Alt_right_pi ; eauto with dfa).
generalize (star_rep H0). mysimp. clear H0. subst.
generalize cs v H4. clear cs v H4.
induction x ; s ; constructor ; auto.
match goal with
| [ |- in_parser (Map_p ?r _) _ (?f _) ] => assert (f = r)
end.
generalize s0 e l. induction s1 ; unfold value, error, fn_result, fn_result' ; mysimp.
rewrite (proof_irrelevance e0 (eq_refl _)). auto.
rewrite H5. apply (@Map_pi _ _ t). apply H4.
apply (@extends_in _ _ cs x s0 (snd (extend_state s0 _)) H1 H2). unfold extend_state.
simpl. auto with dfa.
Qed.
in_regexp (snd (par2reg p s)) (fst (par2reg p s)) cs v ->
in_parser p cs v.
Proof.
induction p ; simpl ; intros s cs v ; try (repeat unfold_par2reg) ;
simpl in * ; intros ; (generalize (AnyInv H) || generalize (CharInv H) ||
generalize (EpsInv H) || generalize (CatInv H1) || generalize (AltInv H1) ||
generalize (MapInv H0) || generalize (ZeroInv H) || idtac) ; s ;
try (econstructor ; eauto ; fail) ;
repeat match goal with
| [ IH : forall _ _ _, in_regexp (snd (par2reg ?p _)) _ _ _ -> _,
H : _ = par2reg ?p ?s |- _ ] =>
generalize (IH s) ; generalize (p2r_extends p s) ; generalize (p2r_wf p s) ;
rewrite <- H ; simpl ; clear IH ; intros
end ;
try (econstructor ; eauto with dfa ; fail) || (eapply Alt_right_pi ; eauto with dfa).
generalize (star_rep H0). mysimp. clear H0. subst.
generalize cs v H4. clear cs v H4.
induction x ; s ; constructor ; auto.
match goal with
| [ |- in_parser (Map_p ?r _) _ (?f _) ] => assert (f = r)
end.
generalize s0 e l. induction s1 ; unfold value, error, fn_result, fn_result' ; mysimp.
rewrite (proof_irrelevance e0 (eq_refl _)). auto.
rewrite H5. apply (@Map_pi _ _ t). apply H4.
apply (@extends_in _ _ cs x s0 (snd (extend_state s0 _)) H1 H2). unfold extend_state.
simpl. auto with dfa.
Qed.
The translation preserves meaning
Theorem parser2regexp_equiv :
forall t (p:parser t) cs v,
in_parser p cs v <->
in_regexp (snd (parser2regexp p)) (fst (parser2regexp p)) cs v.
Proof.
unfold parser2regexp ; mysimp ; [eapply p2r_ok | eapply r2p_ok] ; eauto.
Qed.
forall t (p:parser t) cs v,
in_parser p cs v <->
in_regexp (snd (parser2regexp p)) (fst (parser2regexp p)) cs v.
Proof.
unfold parser2regexp ; mysimp ; [eapply p2r_ok | eapply r2p_ok] ; eauto.
Qed.
Finally -- convert the parser to a regexp and then run the
derivative-based parser on this.
Definition parse t (p:parser t) : list char_p -> list t :=
deriv_parse (snd (parser2regexp p))
(fst (parser2regexp p)) (p2r_wf p _).
deriv_parse (snd (parser2regexp p))
(fst (parser2regexp p)) (p2r_wf p _).
We can conclude that the parser respects the semantics
Theorem parse_correct : forall t (p:parser t) cs v,
in_parser p cs v <-> In v (parse p cs).
Proof.
intros t p cs v ;
generalize (DerivParse_is_parser (snd (parser2regexp p)) (fst (parser2regexp p))
(p2r_wf p initial_ctxt) cs v) ; intro H ; destruct H ;
generalize (parser2regexp_equiv p cs v) ; intro H1 ; destruct H1 ; split ; auto.
Qed.
End PARSER.
in_parser p cs v <-> In v (parse p cs).
Proof.
intros t p cs v ;
generalize (DerivParse_is_parser (snd (parser2regexp p)) (fst (parser2regexp p))
(p2r_wf p initial_ctxt) cs v) ; intro H ; destruct H ;
generalize (parser2regexp_equiv p cs v) ; intro H1 ; destruct H1 ; split ; auto.
Qed.
End PARSER.
This page has been generated by coqdoc