Library LexParser2
Require Import Coq.Program.Equality.
Require Import Coq.Init.Logic.
Require Import List.
Require Import Arith.
Require Import Bool.
Require Import Eqdep.
Unset Automatic Introduction.
Set Implicit Arguments.
Axiom proof_irrelevance : forall (P:Prop) (H1 H2:P), H1 = H2.
Require Import Coq.Init.Logic.
Require Import List.
Require Import Arith.
Require Import Bool.
Require Import Eqdep.
Unset Automatic Introduction.
Set Implicit Arguments.
Axiom proof_irrelevance : forall (P:Prop) (H1 H2:P), H1 = H2.
We parameterize the development over a set of characters
Now we also parameterize over names for types used in the semantic actions.
And we require a decidable equality for type names.
And we require an interpretation of type names as sets.
An inductively generated set of names for the types of results
returned by the parser. We need at least unit, char, pairs, and
lists, as well as user-defined types (tipe).
Inductive result : Set :=
| unit_t : result
| char_t : result
| pair_t : result -> result -> result
| list_t : result -> result
| sum_t : result -> result -> result
| tipe_t : tipe -> result.
| unit_t : result
| char_t : result
| pair_t : result -> result -> result
| list_t : result -> result
| sum_t : result -> result -> result
| tipe_t : tipe -> result.
This allows us to build an equality on results.
Definition result_eq : forall (r1 r2:result), {r1=r2}+{r1<>r2}.
decide equality. apply tipe_eq.
Defined.
decide equality. apply tipe_eq.
Defined.
Now we give an interpretation of results as Coq sets.
Fixpoint result_m(t:result) : Set :=
(match t with
| unit_t => unit
| char_t => char_p
| pair_t t1 t2 => (result_m t1) * (result_m t2)
| list_t t1 => list (result_m t1)
| sum_t t1 t2 => (result_m t1) + (result_m t2)
| tipe_t t => tipe_m t
end)%type.
(match t with
| unit_t => unit
| char_t => char_p
| pair_t t1 t2 => (result_m t1) * (result_m t2)
| list_t t1 => list (result_m t1)
| sum_t t1 t2 => (result_m t1) + (result_m t2)
| tipe_t t => tipe_m t
end)%type.
The parsers are indexed by the return type as a result, and we've
added a new kind of parser Map_p which is used to transform the
result of one parser to another.
Inductive parser : result -> Set :=
| Any_p : parser char_t
| Char_p : char_p -> parser char_t
| Eps_p : parser unit_t
| Cat_p : forall t1 t2 (r1:parser t1) (r2:parser t2), parser (pair_t t1 t2)
| Zero_p : forall t, parser t
| Alt_p : forall t (r1 r2:parser t), parser t
| Star_p : forall t, parser t -> parser (list_t t)
| Map_p : forall t1 t2, ((result_m t1) -> (result_m t2)) -> parser t1 -> parser t2.
| Any_p : parser char_t
| Char_p : char_p -> parser char_t
| Eps_p : parser unit_t
| Cat_p : forall t1 t2 (r1:parser t1) (r2:parser t2), parser (pair_t t1 t2)
| Zero_p : forall t, parser t
| Alt_p : forall t (r1 r2:parser t), parser t
| Star_p : forall t, parser t -> parser (list_t t)
| Map_p : forall t1 t2, ((result_m t1) -> (result_m t2)) -> parser t1 -> parser t2.
The semantics relates input strings (as lists of characters) to result values.
Inductive in_parser : forall t, parser t -> list char_p -> (result_m t) -> Prop :=
| Any_pi : forall c, in_parser Any_p (c::nil) c
| Char_pi : forall c, in_parser (Char_p c) (c::nil) c
| Eps_pi : in_parser Eps_p nil tt
| Alt_left_pi : forall t (p1 p2:parser t) cs (v:result_m t),
in_parser p1 cs v -> in_parser (Alt_p p1 p2) cs v
| Alt_right_pi : forall t (p1 p2:parser t) cs (v:result_m t),
in_parser p2 cs v -> in_parser (Alt_p p1 p2) cs v
| Cat_pi : forall t1 t2 (p1:parser t1) (p2:parser t2) cs1 cs2 v1 v2,
in_parser p1 cs1 v1 -> in_parser p2 cs2 v2 ->
in_parser (Cat_p p1 p2) (cs1 ++ cs2) (v1,v2)
| Star_eps_pi : forall t (p:parser t), in_parser (Star_p p) nil nil
| Star_cat_pi : forall t (p:parser t) cs1 cs2 v1 v2,
cs1 <> nil -> in_parser p cs1 v1 -> in_parser (Star_p p) cs2 v2 ->
in_parser (Star_p p) (cs1 ++ cs2) (v1::v2)
| Map_pi : forall t1 t2 (f:result_m t1 -> result_m t2) (p:parser t1) cs v,
in_parser p cs v -> in_parser (Map_p _ f p) cs (f v).
| Any_pi : forall c, in_parser Any_p (c::nil) c
| Char_pi : forall c, in_parser (Char_p c) (c::nil) c
| Eps_pi : in_parser Eps_p nil tt
| Alt_left_pi : forall t (p1 p2:parser t) cs (v:result_m t),
in_parser p1 cs v -> in_parser (Alt_p p1 p2) cs v
| Alt_right_pi : forall t (p1 p2:parser t) cs (v:result_m t),
in_parser p2 cs v -> in_parser (Alt_p p1 p2) cs v
| Cat_pi : forall t1 t2 (p1:parser t1) (p2:parser t2) cs1 cs2 v1 v2,
in_parser p1 cs1 v1 -> in_parser p2 cs2 v2 ->
in_parser (Cat_p p1 p2) (cs1 ++ cs2) (v1,v2)
| Star_eps_pi : forall t (p:parser t), in_parser (Star_p p) nil nil
| Star_cat_pi : forall t (p:parser t) cs1 cs2 v1 v2,
cs1 <> nil -> in_parser p cs1 v1 -> in_parser (Star_p p) cs2 v2 ->
in_parser (Star_p p) (cs1 ++ cs2) (v1::v2)
| Map_pi : forall t1 t2 (f:result_m t1 -> result_m t2) (p:parser t1) cs v,
in_parser p cs v -> in_parser (Map_p _ f p) cs (f v).
Note that for Star_cat_pi we require cs1 to be non-empty -- this is
crucial for ensuring that any regular expression matches a string and
returns a finite list of associated values. Otherwise, something like
Star Eps would take the empty string to an infinite set of possible
values.
Internally, we translate parsers to a representation called regexp
where all of the functions are replaced with a function name that we can
look up in an environment. This will allow us to put together a decidable
equality on regular expressions, which will in turn, allow us to hash-cons
them (though we don't take advantage of this yet.)
We'll represent function names as positions in an environment list.
In addition to user-level functions, we need a few built-in functions that
are used during optimization.
Inductive fn : result -> result -> Set :=
| Fn_name : forall (f:fn_name) t1 t2, fn t1 t2
| Fn_const_char : forall (c:char_p), fn unit_t char_t
| Fn_empty_list : forall t, fn unit_t (list_t t)
| Fn_cons : forall t, fn (pair_t t (list_t t)) (list_t t)
| Fn_unit_left : forall t, fn t (pair_t unit_t t)
| Fn_unit_right : forall t, fn t (pair_t t unit_t)
.
| Fn_name : forall (f:fn_name) t1 t2, fn t1 t2
| Fn_const_char : forall (c:char_p), fn unit_t char_t
| Fn_empty_list : forall t, fn unit_t (list_t t)
| Fn_cons : forall t, fn (pair_t t (list_t t)) (list_t t)
| Fn_unit_left : forall t, fn t (pair_t unit_t t)
| Fn_unit_right : forall t, fn t (pair_t t unit_t)
.
Finally, a regexp is just like a parser except that the Map constructor
takes a fn instead of an actual function. Note that this syntax doesn't preclude
us from using a function name with the wrong type -- we'll capture this constraint
later on.
Inductive regexp : result -> Set :=
| Any : regexp char_t
| Char : char_p -> regexp char_t
| Eps : regexp unit_t
| Cat : forall t1 t2, regexp t1 -> regexp t2 -> regexp (pair_t t1 t2)
| Alt : forall t, regexp t -> regexp t -> regexp t
| Zero : forall t, regexp t
| Star : forall t, regexp t -> regexp (list_t t)
| Map : forall (t u:result) (f:fn t u), regexp t -> regexp u.
| Any : regexp char_t
| Char : char_p -> regexp char_t
| Eps : regexp unit_t
| Cat : forall t1 t2, regexp t1 -> regexp t2 -> regexp (pair_t t1 t2)
| Alt : forall t, regexp t -> regexp t -> regexp t
| Zero : forall t, regexp t
| Star : forall t, regexp t -> regexp (list_t t)
| Map : forall (t u:result) (f:fn t u), regexp t -> regexp u.
A simplification tactic used through the development
Ltac mysimp :=
simpl in * ; intros ;
repeat match goal with
| [ |- context[char_eq ?x ?y] ] => destruct (char_eq x y) ; auto
| [ |- _ /\ _ ] => split
| [ H : context[result_eq ?e1 ?e2] |- _ ] =>
destruct (result_eq e1 e2) ; simpl in * ; try discriminate
| [ H : existT ?f ?t ?x = existT ?f ?t ?y |- _ ] =>
generalize (inj_pairT2 _ f t x y H) ; clear H ; intro H ; subst
| [ H : _ /\ _ |- _ ] => destruct H
| [ |- context[ _ ++ nil ] ] => rewrite <- app_nil_end
| [ H : exists x, _ |- _ ] => destruct H
| [ H : _ \/ _ |- _] => destruct H
| [ H : _ <-> _ |- _] => destruct H
| [ |- _ <-> _ ] => split
| [ H : _::_ = _::_ |- _] => injection H ; clear H
| _ => idtac
end ; auto.
simpl in * ; intros ;
repeat match goal with
| [ |- context[char_eq ?x ?y] ] => destruct (char_eq x y) ; auto
| [ |- _ /\ _ ] => split
| [ H : context[result_eq ?e1 ?e2] |- _ ] =>
destruct (result_eq e1 e2) ; simpl in * ; try discriminate
| [ H : existT ?f ?t ?x = existT ?f ?t ?y |- _ ] =>
generalize (inj_pairT2 _ f t x y H) ; clear H ; intro H ; subst
| [ H : _ /\ _ |- _ ] => destruct H
| [ |- context[ _ ++ nil ] ] => rewrite <- app_nil_end
| [ H : exists x, _ |- _ ] => destruct H
| [ H : _ \/ _ |- _] => destruct H
| [ H : _ <-> _ |- _] => destruct H
| [ |- _ <-> _ ] => split
| [ H : _::_ = _::_ |- _] => injection H ; clear H
| _ => idtac
end ; auto.
Simplificaton followed by substitution.
Ltac s := repeat (mysimp ; subst).
Now we can try to define a syntactic equality checker on regexps. We'll
begin by writing some simple functions that calculate booleans to avoid
some hair with dependencies.
Definition s2b (P Q:Prop) (x:{P}+{Q}) : bool :=
match x with
| left _ => true
| right _ => false
end.
match x with
| left _ => true
| right _ => false
end.
Function equality
Definition fn_eq t1 u1 (f1:fn t1 u1) t2 u2 (f2:fn t2 u2) : bool :=
match f1, f2 with
| Fn_name f1 t1 u1, Fn_name f2 t2 u2 =>
s2b (fn_name_eq f1 f2) && s2b (result_eq t1 t2) && s2b (result_eq u1 u2)
| Fn_const_char c1, Fn_const_char c2 => if char_eq c1 c2 then true else false
| Fn_empty_list t1, Fn_empty_list t2 => s2b (result_eq t1 t2)
| Fn_cons t1, Fn_cons t2 => s2b (result_eq t1 t2)
| Fn_unit_left t1, Fn_unit_left t2 => s2b (result_eq t1 t2)
| Fn_unit_right t1, Fn_unit_right t2 => s2b (result_eq t1 t2)
| _, _ => false
end.
match f1, f2 with
| Fn_name f1 t1 u1, Fn_name f2 t2 u2 =>
s2b (fn_name_eq f1 f2) && s2b (result_eq t1 t2) && s2b (result_eq u1 u2)
| Fn_const_char c1, Fn_const_char c2 => if char_eq c1 c2 then true else false
| Fn_empty_list t1, Fn_empty_list t2 => s2b (result_eq t1 t2)
| Fn_cons t1, Fn_cons t2 => s2b (result_eq t1 t2)
| Fn_unit_left t1, Fn_unit_left t2 => s2b (result_eq t1 t2)
| Fn_unit_right t1, Fn_unit_right t2 => s2b (result_eq t1 t2)
| _, _ => false
end.
Regexp equality
Fixpoint regexp_eq' t1 (r1:regexp t1) t2 (r2:regexp t2) : bool :=
match r1, r2 with
| Any, Any => true
| Char c1, Char c2 => if char_eq c1 c2 then true else false
| Eps, Eps => true
| Cat t1a t1b r1a r1b, Cat t2a t2b r2a r2b =>
regexp_eq' r1a r2a && regexp_eq' r1b r2b
| Alt t1 r1a r1b, Alt t2 r2a r2b =>
regexp_eq' r1a r2a && regexp_eq' r1b r2b
| Zero t1, Zero t2 => s2b(result_eq t1 t2)
| Star t1 r1a, Star t2 r2a => regexp_eq' r1a r2a
| Map t1 u1 f1 r1a, Map t2 u2 f2 r2a => fn_eq f1 f2 && regexp_eq' r1a r2a
| _, _ => false
end.
match r1, r2 with
| Any, Any => true
| Char c1, Char c2 => if char_eq c1 c2 then true else false
| Eps, Eps => true
| Cat t1a t1b r1a r1b, Cat t2a t2b r2a r2b =>
regexp_eq' r1a r2a && regexp_eq' r1b r2b
| Alt t1 r1a r1b, Alt t2 r2a r2b =>
regexp_eq' r1a r2a && regexp_eq' r1b r2b
| Zero t1, Zero t2 => s2b(result_eq t1 t2)
| Star t1 r1a, Star t2 r2a => regexp_eq' r1a r2a
| Map t1 u1 f1 r1a, Map t2 u2 f2 r2a => fn_eq f1 f2 && regexp_eq' r1a r2a
| _, _ => false
end.
A tactic for help in proving the correctness of the equality routines
Ltac eq_help :=
match goal with
| [ |- _ /\ _ ] => split ; auto
| [ H : context[char_eq ?c ?c0] |- _ ] =>
destruct (char_eq c c0) ; subst ; auto ; try congruence
| [ H : _ && _ = true |- _ ] => generalize (andb_prop _ _ H) ; clear H ; intro H ;
destruct H
| [ H : _ /\ _ |- _] => destruct H ; subst
| [ H : s2b ?e = true |- _ ] => destruct e ; simpl in H ; subst ; try congruence
| _ => auto
end.
match goal with
| [ |- _ /\ _ ] => split ; auto
| [ H : context[char_eq ?c ?c0] |- _ ] =>
destruct (char_eq c c0) ; subst ; auto ; try congruence
| [ H : _ && _ = true |- _ ] => generalize (andb_prop _ _ H) ; clear H ; intro H ;
destruct H
| [ H : _ /\ _ |- _] => destruct H ; subst
| [ H : s2b ?e = true |- _ ] => destruct e ; simpl in H ; subst ; try congruence
| _ => auto
end.
If fn_eq returns true, then we know the functions and their types are equal.
Lemma fn_eq_corr : forall t1 u1 (f1:fn t1 u1) t2 u2 (f2:fn t2 u2),
fn_eq f1 f2 = true ->
t1 = t2 /\ u1 = u2 /\
(existT (fun p => fn (fst p) (snd p)) (t1,u1) f1) =
(existT (fun p => fn (fst p) (snd p)) (t2,u2) f2).
Proof.
induction f1 ; induction f2 ; simpl ; intros; try congruence ;
repeat eq_help.
Qed.
fn_eq f1 f2 = true ->
t1 = t2 /\ u1 = u2 /\
(existT (fun p => fn (fst p) (snd p)) (t1,u1) f1) =
(existT (fun p => fn (fst p) (snd p)) (t2,u2) f2).
Proof.
induction f1 ; induction f2 ; simpl ; intros; try congruence ;
repeat eq_help.
Qed.
If regexp_eq' returns true, then we know the regexps and their types are equal.
Lemma regexp_eq'_corr : forall t1 (r1:regexp t1) t2 (r2:regexp t2),
regexp_eq' r1 r2 = true ->
t1 = t2 /\ (existT regexp t1 r1) = (existT regexp t2 r2).
Proof.
induction r1 ; destruct r2 ; simpl ; intros ; try congruence ; repeat (eq_help ;
match goal with
| [ IH : forall _ _, regexp_eq' ?r1 _ = true -> _, H : regexp_eq' ?r1 _ = true |- _]=>
generalize (IH _ _ H) ; clear IH ; intros ; s
| [ H : fn_eq ?f1 ?f2 = true |- _ ] => generalize (fn_eq_corr _ _ H) ; intros ;
clear H
| _ => auto
end).
mysimp.
Qed.
regexp_eq' r1 r2 = true ->
t1 = t2 /\ (existT regexp t1 r1) = (existT regexp t2 r2).
Proof.
induction r1 ; destruct r2 ; simpl ; intros ; try congruence ; repeat (eq_help ;
match goal with
| [ IH : forall _ _, regexp_eq' ?r1 _ = true -> _, H : regexp_eq' ?r1 _ = true |- _]=>
generalize (IH _ _ H) ; clear IH ; intros ; s
| [ H : fn_eq ?f1 ?f2 = true |- _ ] => generalize (fn_eq_corr _ _ H) ; intros ;
clear H
| _ => auto
end).
mysimp.
Qed.
Now we can use this routine when we try to optimize Alt.
Definition regexp_eq t (r1 r2:regexp t) : {r1 = r2} + {True} :=
match regexp_eq' r1 r2 as x return (regexp_eq' r1 r2 = x) -> {r1 = r2} + {True} with
| true => fun H => left _ (inj_pairT2 _ _ _ _ _ (proj2 (regexp_eq'_corr _ _ H)))
| false => fun H => right _ I
end (eq_refl _).
match regexp_eq' r1 r2 as x return (regexp_eq' r1 r2 = x) -> {r1 = r2} + {True} with
| true => fun H => left _ (inj_pairT2 _ _ _ _ _ (proj2 (regexp_eq'_corr _ _ H)))
| false => fun H => right _ I
end (eq_refl _).
Function contexts map function names to a dependent pair consisting of
- a pair of results (t1,t2)
- a function of type result_m t1 -> result_m t2.
Definition fn_result_m(p:result*result) := result_m (fst p) -> result_m (snd p).
Definition ctxt_t := list (sigT fn_result_m).
Definition fn_result'(G:ctxt_t)(n:fn_name) : option (result*result)%type :=
match nth_error G n with
| None => None
| Some (existT p _) => Some p
end.
Definition ctxt_t := list (sigT fn_result_m).
Definition fn_result'(G:ctxt_t)(n:fn_name) : option (result*result)%type :=
match nth_error G n with
| None => None
| Some (existT p _) => Some p
end.
An empty set
Inductive void : Set := .
Definition fn_result_m_opt(p:option(result*result)) : Set :=
match p with | None => void | Some p => fn_result_m p end.
Definition fn_result_m_opt(p:option(result*result)) : Set :=
match p with | None => void | Some p => fn_result_m p end.
This looks up a function in the environment, given a proof that
the function name is in bounds.
Fixpoint lookup_fn'(n:fn_name) :
forall (G:ctxt_t), n < length G -> fn_result_m_opt (fn_result' G n) :=
match n return forall G, n < length G -> fn_result_m_opt (fn_result' G n) with
| 0 =>
fun G =>
match G return 0 < length G -> fn_result_m_opt (fn_result' G 0) with
| nil => fun H => match lt_n_O _ H with end
| (existT p f)::_ => fun H => f
end
| S m =>
fun G =>
match G return (S m) < length G -> fn_result_m_opt (fn_result' G (S m)) with
| nil => fun H => match lt_n_O _ H with end
| _::G' => fun H => lookup_fn' G' (lt_S_n _ _ H)
end
end.
forall (G:ctxt_t), n < length G -> fn_result_m_opt (fn_result' G n) :=
match n return forall G, n < length G -> fn_result_m_opt (fn_result' G n) with
| 0 =>
fun G =>
match G return 0 < length G -> fn_result_m_opt (fn_result' G 0) with
| nil => fun H => match lt_n_O _ H with end
| (existT p f)::_ => fun H => f
end
| S m =>
fun G =>
match G return (S m) < length G -> fn_result_m_opt (fn_result' G (S m)) with
| nil => fun H => match lt_n_O _ H with end
| _::G' => fun H => lookup_fn' G' (lt_S_n _ _ H)
end
end.
In this section, we assume some context and some map for function names.
Specialize the lookup to the parameters
Definition fn_result := fn_result' fn_ctxt.
Definition lookup_fn(n:fn_name)(H:n < length fn_ctxt) := lookup_fn' fn_ctxt H.
Definition lookup_fn(n:fn_name)(H:n < length fn_ctxt) := lookup_fn' fn_ctxt H.
A function is well-formed if all of the function names have the right
types when we look them up in the fn_ctxt.
Definition wf_fn t u (f:fn t u) : Prop :=
match f in fn u' v' with
| Fn_name f t1 t2 => f < length fn_ctxt /\ fn_result f = Some (t1,t2)
| _ => True
end.
match f in fn u' v' with
| Fn_name f t1 t2 => f < length fn_ctxt /\ fn_result f = Some (t1,t2)
| _ => True
end.
A predicate for determining if a regexp is type-consistent with
the given fn_ctxt. Note that by construction, the only things that
can be ill-typed are the embedded fns in a Map.
Fixpoint wf_regexp t (r:regexp t) : Prop :=
match r in regexp t' with
| Any => True
| Char _ => True
| Eps => True
| Cat _ _ r1 r2 => wf_regexp r1 /\ wf_regexp r2
| Alt _ r1 r2 => wf_regexp r1 /\ wf_regexp r2
| Zero _ => True
| Star _ r => wf_regexp r
| Map _ _ f r => wf_regexp r /\ wf_fn f
end.
match r in regexp t' with
| Any => True
| Char _ => True
| Eps => True
| Cat _ _ r1 r2 => wf_regexp r1 /\ wf_regexp r2
| Alt _ r1 r2 => wf_regexp r1 /\ wf_regexp r2
| Zero _ => True
| Star _ r => wf_regexp r
| Map _ _ f r => wf_regexp r /\ wf_fn f
end.
Convert a well-formed fn to an actual function, using the fn_map.
Definition apply_fn t1 t2 (f:fn t1 t2) : wf_fn f -> result_m t1 -> result_m t2.
refine (fun t1 t2 (f:fn t1 t2) =>
match f in fn t1' t2' return wf_fn f -> result_m t1' -> result_m t2' with
| Fn_name f t1 t2 => fun H => _
| Fn_const_char c => fun _ => fun _ => c
| Fn_empty_list t => fun _ => fun _ => nil
| Fn_cons t => fun _ => fun p => (fst p)::(snd p)
| Fn_unit_left t => fun _ => fun x => (tt,x)
| Fn_unit_right t => fun _ => fun x => (x,tt)
end) ; simpl in * ; destruct H ; unfold fn_result in * ;
generalize (@lookup_fn f0 H). rewrite H0. auto.
Defined.
refine (fun t1 t2 (f:fn t1 t2) =>
match f in fn t1' t2' return wf_fn f -> result_m t1' -> result_m t2' with
| Fn_name f t1 t2 => fun H => _
| Fn_const_char c => fun _ => fun _ => c
| Fn_empty_list t => fun _ => fun _ => nil
| Fn_cons t => fun _ => fun p => (fst p)::(snd p)
| Fn_unit_left t => fun _ => fun x => (tt,x)
| Fn_unit_right t => fun _ => fun x => (x,tt)
end) ; simpl in * ; destruct H ; unfold fn_result in * ;
generalize (@lookup_fn f0 H). rewrite H0. auto.
Defined.
Now we can give a semantic interpretation to the regexps under a given context.
Inductive in_regexp : forall t, regexp t -> list char_p -> (result_m t) -> Prop :=
| Any_i : forall c, in_regexp Any (c::nil) c
| Char_i : forall c, in_regexp (Char c) (c::nil) c
| Eps_i : in_regexp Eps nil tt
| Alt_left_i : forall t (r1 r2:regexp t) cs (v:result_m t),
in_regexp r1 cs v -> in_regexp (Alt r1 r2) cs v
| Alt_right_i : forall t (r1 r2:regexp t) cs (v:result_m t),
in_regexp r2 cs v -> in_regexp (Alt r1 r2) cs v
| Cat_i : forall t1 t2 (r1:regexp t1) (r2:regexp t2) cs1 cs2 v1 v2,
in_regexp r1 cs1 v1 -> in_regexp r2 cs2 v2 -> in_regexp (Cat r1 r2) (cs1++cs2) (v1,v2)
| Star_eps_i : forall t (r:regexp t), in_regexp (Star r) nil nil
| Star_cat_i : forall t (r:regexp t) cs1 cs2 v1 v2,
cs1 <> nil -> in_regexp r cs1 v1 -> in_regexp (Star r) cs2 v2 -> in_regexp (Star r) (cs1++cs2) (v1::v2)
| Map_i : forall t1 t2 (f:fn t1 t2) (r:regexp t1) cs v1 v2 (H:wf_fn f),
apply_fn f H v1 = v2 -> in_regexp r cs v1 -> in_regexp (Map f r) cs v2.
Hint Resolve Any_i Char_i Eps_i Alt_left_i Alt_right_i Cat_i Star_eps_i Star_cat_i Map_i : dfa.
Notation "[[ r ]]" := (in_regexp r) (at level 0).
| Any_i : forall c, in_regexp Any (c::nil) c
| Char_i : forall c, in_regexp (Char c) (c::nil) c
| Eps_i : in_regexp Eps nil tt
| Alt_left_i : forall t (r1 r2:regexp t) cs (v:result_m t),
in_regexp r1 cs v -> in_regexp (Alt r1 r2) cs v
| Alt_right_i : forall t (r1 r2:regexp t) cs (v:result_m t),
in_regexp r2 cs v -> in_regexp (Alt r1 r2) cs v
| Cat_i : forall t1 t2 (r1:regexp t1) (r2:regexp t2) cs1 cs2 v1 v2,
in_regexp r1 cs1 v1 -> in_regexp r2 cs2 v2 -> in_regexp (Cat r1 r2) (cs1++cs2) (v1,v2)
| Star_eps_i : forall t (r:regexp t), in_regexp (Star r) nil nil
| Star_cat_i : forall t (r:regexp t) cs1 cs2 v1 v2,
cs1 <> nil -> in_regexp r cs1 v1 -> in_regexp (Star r) cs2 v2 -> in_regexp (Star r) (cs1++cs2) (v1::v2)
| Map_i : forall t1 t2 (f:fn t1 t2) (r:regexp t1) cs v1 v2 (H:wf_fn f),
apply_fn f H v1 = v2 -> in_regexp r cs v1 -> in_regexp (Map f r) cs v2.
Hint Resolve Any_i Char_i Eps_i Alt_left_i Alt_right_i Cat_i Star_eps_i Star_cat_i Map_i : dfa.
Notation "[[ r ]]" := (in_regexp r) (at level 0).
Equivalence of regular expression parsers.
Definition reg_eq t (r1 r2: regexp t) : Prop := forall cs v, [[r1]] cs v <-> [[r2]] cs v.
Infix "[=]" := reg_eq (right associativity, at level 85).
Infix "[=]" := reg_eq (right associativity, at level 85).
Reflexivity
Lemma reg_eq_refl : forall t (r:regexp t), r [=] r.
unfold reg_eq ; tauto.
Qed.
Hint Resolve reg_eq_refl : dfa.
unfold reg_eq ; tauto.
Qed.
Hint Resolve reg_eq_refl : dfa.
Transitivity
Lemma reg_eq_trans : forall t (r1 r2 r3: regexp t), r1 [=] r2 -> r2 [=] r3 -> r1 [=] r3.
unfold reg_eq ; mysimp ; generalize (H cs v) (H0 cs v) ; firstorder.
Qed.
unfold reg_eq ; mysimp ; generalize (H cs v) (H0 cs v) ; firstorder.
Qed.
Symmetry
Lemma reg_eq_sym : forall t (r1 r2: regexp t), r1 [=] r2 -> r2 [=] r1.
unfold reg_eq ; mysimp ; generalize (H cs v) ; firstorder.
Qed.
unfold reg_eq ; mysimp ; generalize (H cs v) ; firstorder.
Qed.
We define some explicit inversion principles for the regexp constructors
so that we can invert them even when the arguments aren't variables. If
we tried to use inversion otherwise, then Coq will choke because of the
dependencies.
An inversion principle for Eps.
Lemma EpsInv : forall cs v, in_regexp Eps cs v -> cs = nil /\ v = tt.
intros cs v H; inversion H ; mysimp.
Qed.
intros cs v H; inversion H ; mysimp.
Qed.
Inversion principle for Any.
Inversion principle for Char c.
Lemma CharInv : forall c cs v, [[Char c]] cs v -> cs = c::nil /\ v = c.
intros c cs v H ; inversion H ; mysimp.
Qed.
intros c cs v H ; inversion H ; mysimp.
Qed.
Inversion principle for Cat r1 r2.
Lemma CatInv : forall t1 t2 (r1:regexp t1) (r2:regexp t2) cs v, [[Cat r1 r2]] cs v ->
exists cs1, exists cs2, exists v1, exists v2,
[[r1]] cs1 v1 /\ [[r2]] cs2 v2 /\ cs = cs1 ++ cs2 /\ v = (v1,v2).
Proof.
intros t1 t2 r1 r2 cs v H ; inversion H ; mysimp ; repeat econstructor ; auto.
Qed.
exists cs1, exists cs2, exists v1, exists v2,
[[r1]] cs1 v1 /\ [[r2]] cs2 v2 /\ cs = cs1 ++ cs2 /\ v = (v1,v2).
Proof.
intros t1 t2 r1 r2 cs v H ; inversion H ; mysimp ; repeat econstructor ; auto.
Qed.
Inversion principle for Alt r1 r2.
Lemma AltInv : forall t (r1 r2:regexp t) cs v, [[Alt r1 r2]] cs v -> ([[r1]] cs v \/ [[r2]] cs v).
intros t r1 r2 cs v H ; inversion H ; mysimp.
Qed.
intros t r1 r2 cs v H ; inversion H ; mysimp.
Qed.
Inversion principle for Star r.
Lemma StarInv : forall t (r:regexp t) cs v, [[Star r]] cs v ->
(cs = nil /\ v = nil) \/
(exists cs1, exists v1, exists cs2, exists v2,
cs1 <> nil /\ [[r]] cs1 v1 /\ [[Star r]] cs2 v2 /\ cs = cs1 ++ cs2 /\ v = v1::v2).
Proof.
intros t r cs v H ; inversion H ; mysimp ; right.
exists cs1 ; econstructor ; exists cs2 ; econstructor ; eauto.
Qed.
(cs = nil /\ v = nil) \/
(exists cs1, exists v1, exists cs2, exists v2,
cs1 <> nil /\ [[r]] cs1 v1 /\ [[Star r]] cs2 v2 /\ cs = cs1 ++ cs2 /\ v = v1::v2).
Proof.
intros t r cs v H ; inversion H ; mysimp ; right.
exists cs1 ; econstructor ; exists cs2 ; econstructor ; eauto.
Qed.
Inversion principle for Map f r.
Lemma MapInv : forall t1 t2 (f:fn t1 t2) (r:regexp t1) cs v, [[Map f r]] cs v ->
exists H:wf_fn f,
exists v1, in_regexp r cs v1 /\ apply_fn f H v1 = v.
intros t1 t2 f r cs v H; inversion H ; mysimp. repeat econstructor ; eauto.
Qed.
exists H:wf_fn f,
exists v1, in_regexp r cs v1 /\ apply_fn f H v1 = v.
intros t1 t2 f r cs v H; inversion H ; mysimp. repeat econstructor ; eauto.
Qed.
Inversion principle for Zero.
A little tactic that takes care of inversion for the in_regexp relation.
Ltac in_inv :=
match goal with
| [ H : [[Eps]] _ _ |- _ ] => generalize (EpsInv H) ; clear H
| [ H : [[Any]] _ _ |- _ ] => generalize (AnyInv H) ; clear H
| [ H : [[Char _]] _ _ |- _] => generalize (CharInv H) ; clear H
| [ H : [[Cat _ _]] _ _ |- _] => generalize (CatInv H) ; clear H
| [ H : [[Alt _ _]] _ _ |- _] => generalize (AltInv H) ; clear H
| [ H : [[Zero _]] _ _ |- _] => contradiction (ZeroInv H)
| [ H : [[Map _ _]] _ _ |- _] => generalize (MapInv H) ; clear H
end ; mysimp ; subst ; auto with dfa.
match goal with
| [ H : [[Eps]] _ _ |- _ ] => generalize (EpsInv H) ; clear H
| [ H : [[Any]] _ _ |- _ ] => generalize (AnyInv H) ; clear H
| [ H : [[Char _]] _ _ |- _] => generalize (CharInv H) ; clear H
| [ H : [[Cat _ _]] _ _ |- _] => generalize (CatInv H) ; clear H
| [ H : [[Alt _ _]] _ _ |- _] => generalize (AltInv H) ; clear H
| [ H : [[Zero _]] _ _ |- _] => contradiction (ZeroInv H)
| [ H : [[Map _ _]] _ _ |- _] => generalize (MapInv H) ; clear H
end ; mysimp ; subst ; auto with dfa.
A little tactic that tries to prove in_regexp.
Ltac pv_in :=
try in_inv ;
match goal with
| [ H : in_regexp ?r ?cs ?v1 |- in_regexp (Map ?f ?r) ?cs ?v2 ] =>
eapply (@Map_i _ _ f r cs v1 v2 I (eq_refl _) H) ; eauto with dfa
| [ |- in_regexp (Map ?f ?r) ?cs ?v ] =>
eapply (Map_i f I (eq_refl _)) ; eauto with dfa
| [ |- in_regexp (Cat _ _) (_ ++ _) (_,_) ] => apply Cat_i ; auto
| [ |- in_regexp (Cat _ _) nil (_,_) ] =>
let H := fresh "H" in
assert (H:@nil char_p= nil ++ nil) ; [auto | rewrite H] ; clear H ;
apply Cat_i ; auto with dfa
| [ |- in_regexp (Cat _ _) (?c :: ?x ++ ?y) _ ] =>
let H := fresh "H" in assert (H: c::x++y = (c::x) ++ y) ; [ auto | rewrite H] ; clear H ;
apply Cat_i ; auto with dfa
| _ => auto with dfa
end.
try in_inv ;
match goal with
| [ H : in_regexp ?r ?cs ?v1 |- in_regexp (Map ?f ?r) ?cs ?v2 ] =>
eapply (@Map_i _ _ f r cs v1 v2 I (eq_refl _) H) ; eauto with dfa
| [ |- in_regexp (Map ?f ?r) ?cs ?v ] =>
eapply (Map_i f I (eq_refl _)) ; eauto with dfa
| [ |- in_regexp (Cat _ _) (_ ++ _) (_,_) ] => apply Cat_i ; auto
| [ |- in_regexp (Cat _ _) nil (_,_) ] =>
let H := fresh "H" in
assert (H:@nil char_p= nil ++ nil) ; [auto | rewrite H] ; clear H ;
apply Cat_i ; auto with dfa
| [ |- in_regexp (Cat _ _) (?c :: ?x ++ ?y) _ ] =>
let H := fresh "H" in assert (H: c::x++y = (c::x) ++ y) ; [ auto | rewrite H] ; clear H ;
apply Cat_i ; auto with dfa
| _ => auto with dfa
end.
Cat r Eps is equivalent to r
Lemma cat_eps_r : forall t (r:regexp t), (Cat r Eps) [=] (Map (Fn_unit_right t) r).
Proof.
unfold reg_eq ; repeat (mysimp ; pv_in). rewrite (app_nil_end cs). repeat pv_in.
Qed.
Proof.
unfold reg_eq ; repeat (mysimp ; pv_in). rewrite (app_nil_end cs). repeat pv_in.
Qed.
Cat Eps r is equivalent to r
Lemma cat_eps_l : forall t (r:regexp t), (Cat Eps r) [=] (Map (Fn_unit_left t) r).
unfold reg_eq ; repeat (mysimp ; pv_in). rewrite <- (app_nil_l cs). repeat pv_in.
Qed.
unfold reg_eq ; repeat (mysimp ; pv_in). rewrite <- (app_nil_l cs). repeat pv_in.
Qed.
Cat r Zero is equivalent to Zero
Lemma cat_zero_r : forall t1 t2 (r:regexp t1), (Cat r (Zero t2)) [=] Zero _.
unfold reg_eq ; repeat (mysimp ; pv_in).
Qed.
unfold reg_eq ; repeat (mysimp ; pv_in).
Qed.
Cat Zero r is equivalent to Zero
Lemma cat_zero_l : forall t1 t2 (r:regexp t2), (Cat (Zero t1) r) [=] Zero _.
unfold reg_eq ; repeat (mysimp ; pv_in).
Qed.
unfold reg_eq ; repeat (mysimp ; pv_in).
Qed.
In the simple matcher code, we could fuse OptCat and OptCat_r into
a single definition. Here, we broke it out to make the dependency
a little simpler to reason about.
Used in an optimizing constructor for Cat.
Definition OptCat_r t1 t2 (r2:regexp t2) (r1:regexp t1) :=
match r2 in regexp t2' return regexp (pair_t t1 t2') with
| Zero _ => Zero _
| Eps => Map (Fn_unit_right t1) r1
| r2 => Cat r1 r2
end.
match r2 in regexp t2' return regexp (pair_t t1 t2') with
| Zero _ => Zero _
| Eps => Map (Fn_unit_right t1) r1
| r2 => Cat r1 r2
end.
An optimized constructor for Cat.
Definition OptCat t1 t2 (r1:regexp t1) (r2:regexp t2) :=
match r1 in regexp t1' return regexp (pair_t t1' t2) with
| Zero _ => Zero _
| Eps => Map (Fn_unit_left t2) r2
| r1 => OptCat_r r2 r1
end.
match r1 in regexp t1' return regexp (pair_t t1' t2) with
| Zero _ => Zero _
| Eps => Map (Fn_unit_left t2) r2
| r1 => OptCat_r r2 r1
end.
OptCat r1 r2 is equivalent to Cat r1 r2
Lemma opt_cat_r : forall t1 t2 (r1:regexp t1) (r2:regexp t2),
OptCat_r r2 r1 [=] Cat r1 r2.
Proof.
destruct r2 ; simpl ; apply reg_eq_sym ;
(apply reg_eq_refl || apply cat_eps_r || apply cat_zero_r).
Qed.
Lemma opt_cat : forall t1 t2 (r1:regexp t1) (r2:regexp t2),
OptCat r1 r2 [=] Cat r1 r2.
destruct r1 ; intros ; simpl ; try (apply reg_eq_refl || apply opt_cat_r) ;
apply reg_eq_sym ; (apply cat_eps_l || apply cat_zero_l).
Qed.
OptCat_r r2 r1 [=] Cat r1 r2.
Proof.
destruct r2 ; simpl ; apply reg_eq_sym ;
(apply reg_eq_refl || apply cat_eps_r || apply cat_zero_r).
Qed.
Lemma opt_cat : forall t1 t2 (r1:regexp t1) (r2:regexp t2),
OptCat r1 r2 [=] Cat r1 r2.
destruct r1 ; intros ; simpl ; try (apply reg_eq_refl || apply opt_cat_r) ;
apply reg_eq_sym ; (apply cat_eps_l || apply cat_zero_l).
Qed.
Alt r Zero is equivalent to r.
Lemma alt_zero_r : forall t (r:regexp t), (Alt r (Zero _)) [=] r.
unfold reg_eq ; intros ; repeat (mysimp ; pv_in).
Qed.
unfold reg_eq ; intros ; repeat (mysimp ; pv_in).
Qed.
Alt Zero r is equivalent to r.
Lemma alt_zero_l : forall t (r:regexp t), (Alt (Zero _) r) [=] r.
unfold reg_eq ; intros ; repeat (mysimp ; pv_in).
Qed.
unfold reg_eq ; intros ; repeat (mysimp ; pv_in).
Qed.
Used in an optimizing version of Alt.
Definition OptAlt_r t (r2:regexp t) : regexp t -> regexp t :=
match r2 in regexp t' return regexp t' -> regexp t' with
| Zero _ => fun r => r
| r => fun r1 => if regexp_eq r r1 then r else Alt r1 r
end.
match r2 in regexp t' return regexp t' -> regexp t' with
| Zero _ => fun r => r
| r => fun r1 => if regexp_eq r r1 then r else Alt r1 r
end.
Optimized version of Alt.
Definition OptAlt t (r1:regexp t) : regexp t -> regexp t :=
match r1 in regexp t' return regexp t' -> regexp t' with
| Zero _ => fun r => r
| r => fun r2 => OptAlt_r r2 r
end.
match r1 in regexp t' return regexp t' -> regexp t' with
| Zero _ => fun r => r
| r => fun r2 => OptAlt_r r2 r
end.
Alt r r is equivalent to r
OptAlt r1 r2 is equivalent to Alt r1 r2
Lemma opt_alt_r t (r1 r2:regexp t) : OptAlt_r r2 r1 [=] Alt r1 r2.
destruct r2 ; simpl ; repeat
match goal with
| [ |- context[regexp_eq ?r1 ?r2] ] => destruct (regexp_eq r1 r2) ; subst
| [ |- ?r [=] Alt ?r ?r ] => apply reg_eq_sym ; apply alt_refl
| [ |- ?r [=] ?r ] => apply reg_eq_refl
| [ |- ?r [=] Alt ?r (Zero _) ] => apply reg_eq_sym ; apply alt_zero_r
| _ => idtac
end.
Qed.
Lemma opt_alt : forall t1 (r1 r2: regexp t1), OptAlt r1 r2 [=] Alt r1 r2.
destruct r1 ; simpl ; intros ; try (apply opt_alt_r) ; apply reg_eq_sym ;
apply alt_zero_l.
Qed.
destruct r2 ; simpl ; repeat
match goal with
| [ |- context[regexp_eq ?r1 ?r2] ] => destruct (regexp_eq r1 r2) ; subst
| [ |- ?r [=] Alt ?r ?r ] => apply reg_eq_sym ; apply alt_refl
| [ |- ?r [=] ?r ] => apply reg_eq_refl
| [ |- ?r [=] Alt ?r (Zero _) ] => apply reg_eq_sym ; apply alt_zero_r
| _ => idtac
end.
Qed.
Lemma opt_alt : forall t1 (r1 r2: regexp t1), OptAlt r1 r2 [=] Alt r1 r2.
destruct r1 ; simpl ; intros ; try (apply opt_alt_r) ; apply reg_eq_sym ;
apply alt_zero_l.
Qed.
Optimizing version of Map.
Definition OptMap (t u: result) (f: fn t u) (r1: regexp t) : regexp u :=
match r1 in regexp t' return regexp u with
| Zero _ => Zero _
| _ => Map f r1
end.
match r1 in regexp t' return regexp u with
| Zero _ => Zero _
| _ => Map f r1
end.
OptMap f r is equivalent to Map f r.
Lemma opt_map: forall (t u: result) (f: fn t u) (r1: regexp t), OptMap f r1 [=] Map f r1.
unfold reg_eq ; destruct r1 ; simpl ; repeat (mysimp ; pv_in).
Qed.
unfold reg_eq ; destruct r1 ; simpl ; repeat (mysimp ; pv_in).
Qed.
Now we define what it means for a function to be a valid
parser. A function f: regexp t -> list char_p -> list (result_m t), is a
valid parser if, when given a regexp and a string of characters s, it
returns a list of values [v1,...,vn], such that { (s,vi) } is the
relation denoted by the regular expression.
Definition is_parser (f: forall t (r:regexp t), wf_regexp r -> list char_p -> list (result_m t)) : Prop :=
forall t (r: regexp t) (H:wf_regexp r) (cs: list char_p) (v:result_m t),
([[r]] cs v -> In v (f t r H cs)) /\ (In v (f t r H cs) -> [[r]] cs v).
forall t (r: regexp t) (H:wf_regexp r) (cs: list char_p) (v:result_m t),
([[r]] cs v -> In v (f t r H cs)) /\ (In v (f t r H cs) -> [[r]] cs v).
Returns a regexp denoting { (null,v) | (null,v) in r }.
Fixpoint null t (r:regexp t) : regexp t :=
match r in regexp t' return regexp t' with
| Any => Zero _
| Char _ => Zero _
| Eps => Eps
| Zero _ => Zero _
| Alt t r1 r2 => OptAlt (null r1) (null r2)
| Cat t1 t2 r1 r2 => OptCat (null r1) (null r2)
| Star t _ => OptMap (Fn_empty_list t) Eps
| Map _ _ f r1 => OptMap f (null r1)
end.
match r in regexp t' return regexp t' with
| Any => Zero _
| Char _ => Zero _
| Eps => Eps
| Zero _ => Zero _
| Alt t r1 r2 => OptAlt (null r1) (null r2)
| Cat t1 t2 r1 r2 => OptCat (null r1) (null r2)
| Star t _ => OptMap (Fn_empty_list t) Eps
| Map _ _ f r1 => OptMap f (null r1)
end.
This is the heart of the algorithm. It returns a regexp denoting
{ (cs,v) | (c::cs,v) in r }.
Fixpoint deriv t (r:regexp t) (c:char_p) : regexp t :=
match r in regexp t' return regexp t' with
| Any => OptMap (Fn_const_char c) Eps
| Char c' => if char_eq c c' then OptMap (Fn_const_char c) Eps else Zero _
| Eps => Zero _
| Zero _ => Zero _
| Alt t r1 r2 => OptAlt (deriv r1 c) (deriv r2 c)
| Cat t1 t2 r1 r2 => OptAlt (OptCat (deriv r1 c) r2) (OptCat (null r1) (deriv r2 c))
| Star t r => OptMap (Fn_cons t) (OptCat (deriv r c) (Star r))
| Map _ _ f r1 => OptMap f (deriv r1 c)
end.
match r in regexp t' return regexp t' with
| Any => OptMap (Fn_const_char c) Eps
| Char c' => if char_eq c c' then OptMap (Fn_const_char c) Eps else Zero _
| Eps => Zero _
| Zero _ => Zero _
| Alt t r1 r2 => OptAlt (deriv r1 c) (deriv r2 c)
| Cat t1 t2 r1 r2 => OptAlt (OptCat (deriv r1 c) r2) (OptCat (null r1) (deriv r2 c))
| Star t r => OptMap (Fn_cons t) (OptCat (deriv r c) (Star r))
| Map _ _ f r1 => OptMap f (deriv r1 c)
end.
When we are done parsing using a regexp r, we are left with a regexp denoting
{ (null,v) | exists s.(s,v) in r }. This function computes all of the
values v in this set. To do so, it needs to be well-formed with respect to the
context.
Definition apply_null t (r:regexp t) : wf_regexp r -> list (result_m t).
refine (
fix apply_null t (r:regexp t) : wf_regexp r -> list (result_m t) :=
match r in regexp t' return wf_regexp r -> list (result_m t') with
| Any => fun H => nil
| Char _ => fun H => nil
| Zero _ => fun H => nil
| Eps => fun H => tt::nil
| Star _ r => fun H => nil::nil
| Alt _ r1 r2 => fun H => (apply_null _ r1 _) ++ (apply_null _ r2 _)
| Cat _ _ r1 r2 =>
fun H =>
let res1 := apply_null _ r1 _ in
let res2 := apply_null _ r2 _ in
fold_right (fun v1 a => (map (fun v2 => (v1,v2)) res2) ++ a) nil res1
| Map _ _ f r1 => fun H => map (apply_fn f _) (apply_null _ r1 _)
end
) ; mysimp.
Defined.
Lemma InConcatMap A B (x:A) (y:B) :
forall xs, In x xs ->
forall ys, In y ys ->
In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs).
Proof.
induction xs ; intro H ; [ contradiction H | destruct H ] ; s ;
apply in_or_app ; [ idtac | firstorder ].
left ; clear IHxs ; induction ys ; [ contradiction H0 | destruct H0 ; s ].
Qed.
refine (
fix apply_null t (r:regexp t) : wf_regexp r -> list (result_m t) :=
match r in regexp t' return wf_regexp r -> list (result_m t') with
| Any => fun H => nil
| Char _ => fun H => nil
| Zero _ => fun H => nil
| Eps => fun H => tt::nil
| Star _ r => fun H => nil::nil
| Alt _ r1 r2 => fun H => (apply_null _ r1 _) ++ (apply_null _ r2 _)
| Cat _ _ r1 r2 =>
fun H =>
let res1 := apply_null _ r1 _ in
let res2 := apply_null _ r2 _ in
fold_right (fun v1 a => (map (fun v2 => (v1,v2)) res2) ++ a) nil res1
| Map _ _ f r1 => fun H => map (apply_fn f _) (apply_null _ r1 _)
end
) ; mysimp.
Defined.
Lemma InConcatMap A B (x:A) (y:B) :
forall xs, In x xs ->
forall ys, In y ys ->
In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs).
Proof.
induction xs ; intro H ; [ contradiction H | destruct H ] ; s ;
apply in_or_app ; [ idtac | firstorder ].
left ; clear IHxs ; induction ys ; [ contradiction H0 | destruct H0 ; s ].
Qed.
Show that if (nil,v) is in r then apply_null r returns v as a result
Lemma ApplyNull1 t (r:regexp t) cs v H :
[[r]] cs v -> cs = nil -> In v (apply_null r H).
Proof.
induction 1 ; mysimp ; try congruence ; repeat
match goal with
| [ H : ?x ++ ?y = nil |- _ ] => generalize (app_eq_nil _ _ H) ; clear H ; mysimp
| [ |- In _ (fold_right _ nil _) ] => apply InConcatMap ; auto
| [ H : wf_regexp ?r, IH : forall _ : wf_regexp ?r, ?cs = nil -> _,
H1 : ?cs = nil |- _ ] =>
let l := fresh "l" in
generalize (IH H H1) ; generalize (apply_null r H) as l ; induction l ; s ;
left
| [ |- apply_fn ?f ?H1 _ = apply_fn ?f ?H2 _] =>
rewrite (proof_irrelevance H1 H2) ; auto
| [ H : None = Some _ |- _ ] => congruence
| [ |- In _ (_ ++ _) ] => apply in_or_app ; auto
end.
Qed.
Lemma InConcatMap1 A B (x:A) (y:B) xs ys :
In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs) ->
In x xs.
Proof.
induction xs ; mysimp ; repeat
match goal with
| [ H : In _ _ |- _ ] => destruct (in_app_or _ _ _ H) ; [left | right ; eauto]
| [ H : In (?x, _) (map _ ?ys) |- _ ] =>
let l := fresh "l" in generalize H ; generalize ys as l;
induction l ; mysimp ; firstorder ; congruence
end.
Qed.
Lemma InConcatMap2 A B (x:A) (y:B) xs ys :
In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs) ->
In y ys.
Proof.
induction xs ; mysimp ; firstorder ;
match goal with
| [ H : In _ (map _ ?ys ++ _) |- _ ] =>
let H0 := fresh "H" in let l := fresh "l" in
generalize (in_app_or _ _ _ H) ; intro H0 ; destruct H0 ; auto ;
generalize H0 ; generalize ys as l ; induction l ; mysimp ; left ; congruence
end.
Qed.
[[r]] cs v -> cs = nil -> In v (apply_null r H).
Proof.
induction 1 ; mysimp ; try congruence ; repeat
match goal with
| [ H : ?x ++ ?y = nil |- _ ] => generalize (app_eq_nil _ _ H) ; clear H ; mysimp
| [ |- In _ (fold_right _ nil _) ] => apply InConcatMap ; auto
| [ H : wf_regexp ?r, IH : forall _ : wf_regexp ?r, ?cs = nil -> _,
H1 : ?cs = nil |- _ ] =>
let l := fresh "l" in
generalize (IH H H1) ; generalize (apply_null r H) as l ; induction l ; s ;
left
| [ |- apply_fn ?f ?H1 _ = apply_fn ?f ?H2 _] =>
rewrite (proof_irrelevance H1 H2) ; auto
| [ H : None = Some _ |- _ ] => congruence
| [ |- In _ (_ ++ _) ] => apply in_or_app ; auto
end.
Qed.
Lemma InConcatMap1 A B (x:A) (y:B) xs ys :
In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs) ->
In x xs.
Proof.
induction xs ; mysimp ; repeat
match goal with
| [ H : In _ _ |- _ ] => destruct (in_app_or _ _ _ H) ; [left | right ; eauto]
| [ H : In (?x, _) (map _ ?ys) |- _ ] =>
let l := fresh "l" in generalize H ; generalize ys as l;
induction l ; mysimp ; firstorder ; congruence
end.
Qed.
Lemma InConcatMap2 A B (x:A) (y:B) xs ys :
In (x,y) (fold_right (fun v a => (map (fun w => (v,w)) ys) ++ a) nil xs) ->
In y ys.
Proof.
induction xs ; mysimp ; firstorder ;
match goal with
| [ H : In _ (map _ ?ys ++ _) |- _ ] =>
let H0 := fresh "H" in let l := fresh "l" in
generalize (in_app_or _ _ _ H) ; intro H0 ; destruct H0 ; auto ;
generalize H0 ; generalize ys as l ; induction l ; mysimp ; left ; congruence
end.
Qed.
Show that if apply_null r returns v as a result, then (nil,v) is in r
Lemma ApplyNull2 t (r:regexp t) H v :
In v (apply_null r H) -> [[r]] nil v.
Proof.
induction r ; s ; try contradiction ; auto with dfa ;
match goal with
| [ IHr1 : _, IHr2 : _, v: prod _ _ |- in_regexp (Cat _ _) _ _] =>
destruct v ; pv_in ; [ eapply IHr1 ; eauto ; eapply InConcatMap1 ; eauto |
eapply IHr2 ; eauto ; eapply InConcatMap2 ; eauto]
| [ IHr1:_, IHr2:_, H0 : In _ (_ ++ _) |- in_regexp (Alt _ _) _ _ ] =>
generalize (in_app_or _ _ _ H0) ; clear H0 ; intro H0 ; destruct H0 ;
[ apply Alt_left_i ; eapply IHr1 ; eauto
| eapply Alt_right_i ; eapply IHr2 ; eauto ]
| [ IHr: forall _, _,
H:wf_regexp ?r, H0: In _ (map _ _) |- in_regexp (Map _ _) _ _ ] =>
let l := fresh "l" in
generalize (apply_null r H) (IHr H) H0 ; intro l ; induction l ;
clear IHr H0 ; mysimp ; try tauto ; eapply Map_i ; eauto
end.
Qed.
In v (apply_null r H) -> [[r]] nil v.
Proof.
induction r ; s ; try contradiction ; auto with dfa ;
match goal with
| [ IHr1 : _, IHr2 : _, v: prod _ _ |- in_regexp (Cat _ _) _ _] =>
destruct v ; pv_in ; [ eapply IHr1 ; eauto ; eapply InConcatMap1 ; eauto |
eapply IHr2 ; eauto ; eapply InConcatMap2 ; eauto]
| [ IHr1:_, IHr2:_, H0 : In _ (_ ++ _) |- in_regexp (Alt _ _) _ _ ] =>
generalize (in_app_or _ _ _ H0) ; clear H0 ; intro H0 ; destruct H0 ;
[ apply Alt_left_i ; eapply IHr1 ; eauto
| eapply Alt_right_i ; eapply IHr2 ; eauto ]
| [ IHr: forall _, _,
H:wf_regexp ?r, H0: In _ (map _ _) |- in_regexp (Map _ _) _ _ ] =>
let l := fresh "l" in
generalize (apply_null r H) (IHr H) H0 ; intro l ; induction l ;
clear IHr H0 ; mysimp ; try tauto ; eapply Map_i ; eauto
end.
Qed.
Show that the optimizing constructors preserve well-formedness.
Lemma wf_cat_opt : forall t1 t2 (r1:regexp t1) (r2:regexp t2),
wf_regexp r1 -> wf_regexp r2 -> wf_regexp (OptCat r1 r2).
Proof.
destruct r1 ; mysimp ; destruct r2 ; mysimp.
Qed.
Lemma wf_alt_opt : forall t (r1 r2:regexp t),
wf_regexp r1 -> wf_regexp r2 -> wf_regexp (OptAlt r1 r2).
Proof.
dependent destruction r1 ; mysimp ; dependent destruction r2 ; mysimp ;
match goal with
| [ |- context[regexp_eq ?r1 ?r2] ] => destruct (regexp_eq r1 r2) ; auto
end ;
mysimp.
Qed.
Lemma wf_map_opt : forall t u (f: fn t u) (r1: regexp t),
wf_regexp r1 -> wf_fn f -> wf_regexp (OptMap f r1).
Proof.
destruct r1; mysimp.
Qed.
wf_regexp r1 -> wf_regexp r2 -> wf_regexp (OptCat r1 r2).
Proof.
destruct r1 ; mysimp ; destruct r2 ; mysimp.
Qed.
Lemma wf_alt_opt : forall t (r1 r2:regexp t),
wf_regexp r1 -> wf_regexp r2 -> wf_regexp (OptAlt r1 r2).
Proof.
dependent destruction r1 ; mysimp ; dependent destruction r2 ; mysimp ;
match goal with
| [ |- context[regexp_eq ?r1 ?r2] ] => destruct (regexp_eq r1 r2) ; auto
end ;
mysimp.
Qed.
Lemma wf_map_opt : forall t u (f: fn t u) (r1: regexp t),
wf_regexp r1 -> wf_fn f -> wf_regexp (OptMap f r1).
Proof.
destruct r1; mysimp.
Qed.
Show that null r is well-formed if r is well-formed.
Lemma wf_null : forall t (r:regexp t), wf_regexp r -> wf_regexp (null r).
induction r ; mysimp ;
(apply wf_cat_opt || apply wf_alt_opt || apply wf_map_opt) ; auto.
Qed.
induction r ; mysimp ;
(apply wf_cat_opt || apply wf_alt_opt || apply wf_map_opt) ; auto.
Qed.
Show that deriv r c is well-formed if r is well-formed.
Lemma wf_deriv : forall t (r:regexp t) c, wf_regexp r -> wf_regexp (deriv r c).
Proof.
induction r; mysimp ;
repeat (apply wf_cat_opt || apply wf_alt_opt || apply wf_null || apply wf_map_opt) ;
simpl ; auto.
Qed.
Fixpoint deriv_parse' t (r:regexp t) (cs:list char_p) : regexp t :=
match cs with
| nil => r
| c::cs' => deriv_parse' (deriv r c) cs'
end.
Lemma wf_derivs : forall cs t (r:regexp t), wf_regexp r -> wf_regexp (deriv_parse' r cs).
induction cs ; simpl ; intros ; auto. apply IHcs. apply wf_deriv. auto.
Qed.
Proof.
induction r; mysimp ;
repeat (apply wf_cat_opt || apply wf_alt_opt || apply wf_null || apply wf_map_opt) ;
simpl ; auto.
Qed.
Fixpoint deriv_parse' t (r:regexp t) (cs:list char_p) : regexp t :=
match cs with
| nil => r
| c::cs' => deriv_parse' (deriv r c) cs'
end.
Lemma wf_derivs : forall cs t (r:regexp t), wf_regexp r -> wf_regexp (deriv_parse' r cs).
induction cs ; simpl ; intros ; auto. apply IHcs. apply wf_deriv. auto.
Qed.
Finally, the derivative-based parser simply runs through the input, calculating
derivatives based on the characters it sees, and calls apply_null when it gets
to the end of the input.
Definition deriv_parse t (r:regexp t) (H:wf_regexp r) cs : list (result_m t) :=
apply_null (deriv_parse' r cs) (wf_derivs cs r H).
apply_null (deriv_parse' r cs) (wf_derivs cs r H).
Tactic for helping to reason about the optimizing constructors.
Ltac pv_opt :=
match goal with
| [ |- in_regexp (OptAlt ?r1 ?r2) ?cs ?v ] =>
apply (proj2 (opt_alt r1 r2 cs v))
| [ |- in_regexp (OptCat ?r1 ?r2) ?cs ?v ] =>
apply (proj2 (opt_cat r1 r2 cs v))
| [ |- in_regexp (OptMap ?f ?r) ?cs ?v ] =>
apply (proj2 (opt_map f r cs v)) ; try (eapply Map_i ; eauto ; fail)
| [ H : ?x ++ ?y = nil |- _] =>
generalize (app_eq_nil _ _ H) ; clear H ; mysimp
| [ H : nil = ?x ++ ?y |- _] =>
generalize (app_eq_nil _ _ (eq_sym H)) ; clear H ; mysimp
| [ H : in_regexp (OptCat ?r1 ?r2) ?cs ?v |- _] =>
generalize (proj1 (opt_cat r1 r2 cs v) H) ; clear H ; intro H
| [ H : in_regexp (OptAlt ?r1 ?r2) ?cs ?v |- _] =>
generalize (proj1 (opt_alt r1 r2 cs v) H) ; clear H ; intro H
| [ H : in_regexp (OptMap ?f ?r) ?cs ?v |- _] =>
generalize (proj1 (opt_map f r cs v) H) ; clear H ; mysimp
| [ v : prod (result_m _) (result_m _) |- _ ] => destruct v
| [ H : (_,_) = (_,_) |- _ ] => injection H ; clear H ; mysimp
| _ => auto with dfa
end.
match goal with
| [ |- in_regexp (OptAlt ?r1 ?r2) ?cs ?v ] =>
apply (proj2 (opt_alt r1 r2 cs v))
| [ |- in_regexp (OptCat ?r1 ?r2) ?cs ?v ] =>
apply (proj2 (opt_cat r1 r2 cs v))
| [ |- in_regexp (OptMap ?f ?r) ?cs ?v ] =>
apply (proj2 (opt_map f r cs v)) ; try (eapply Map_i ; eauto ; fail)
| [ H : ?x ++ ?y = nil |- _] =>
generalize (app_eq_nil _ _ H) ; clear H ; mysimp
| [ H : nil = ?x ++ ?y |- _] =>
generalize (app_eq_nil _ _ (eq_sym H)) ; clear H ; mysimp
| [ H : in_regexp (OptCat ?r1 ?r2) ?cs ?v |- _] =>
generalize (proj1 (opt_cat r1 r2 cs v) H) ; clear H ; intro H
| [ H : in_regexp (OptAlt ?r1 ?r2) ?cs ?v |- _] =>
generalize (proj1 (opt_alt r1 r2 cs v) H) ; clear H ; intro H
| [ H : in_regexp (OptMap ?f ?r) ?cs ?v |- _] =>
generalize (proj1 (opt_map f r cs v) H) ; clear H ; mysimp
| [ v : prod (result_m _) (result_m _) |- _ ] => destruct v
| [ H : (_,_) = (_,_) |- _ ] => injection H ; clear H ; mysimp
| _ => auto with dfa
end.
apply_null is correct.
Lemma Null1 : forall t (r:regexp t) cs v, [[r]] cs v -> cs = nil -> [[null r]] cs v.
Proof.
induction 1 ; mysimp ; try congruence ; auto with dfa ; repeat pv_opt ; pv_in ;
try congruence.
Qed.
Lemma Null2 : forall t (r:regexp t) cs v, [[null r]] cs v -> cs = nil -> [[r]] cs v.
Proof.
induction r ; simpl ; intros ; try in_inv ; (repeat (pv_opt ; subst ; pv_in)) ;
try apply nil ; eapply @Map_i ; eauto.
Qed.
Proof.
induction 1 ; mysimp ; try congruence ; auto with dfa ; repeat pv_opt ; pv_in ;
try congruence.
Qed.
Lemma Null2 : forall t (r:regexp t) cs v, [[null r]] cs v -> cs = nil -> [[r]] cs v.
Proof.
induction r ; simpl ; intros ; try in_inv ; (repeat (pv_opt ; subst ; pv_in)) ;
try apply nil ; eapply @Map_i ; eauto.
Qed.
deriv is correct part 1.
Lemma Deriv1 : forall t (r:regexp t) c cs v, [[r]] (c::cs) v -> [[deriv r c]] cs v.
Proof.
induction r ; simpl ; intros ; try in_inv ;
match goal with
| [ H : in_regexp (Star _) _ _ |- _] =>
generalize (StarInv H); clear H ; mysimp ; subst
| _ => idtac
end ; try congruence ;
repeat (pv_opt ; match goal with
| [ _ : (?c :: ?cs) = (?x ++ _) |- in_regexp (Alt _ _) ?cs _] =>
destruct x ; s ; [eapply Alt_right_i | eapply Alt_left_i] ; pv_opt ; pv_opt
| [ H : in_regexp ?r1 nil _ |- in_regexp (Cat (null ?r1) _) ?cs _ ] =>
let H := fresh "H" in
assert (H:cs=nil++cs) ; [ auto | rewrite H ] ; pv_in ; apply Null1 ; auto
| [ |- in_regexp (Map (Fn_cons ?t) ?r) ?cs (?x1::?x2) ] =>
apply (@Map_i _ _ (Fn_cons t) r cs (x1,x2) (x0::x2) I (eq_refl _))
| [ |- in_regexp (Map (Fn_const_char ?v) Eps) nil ?v ] =>
apply (@Map_i _ _ (Fn_const_char v) Eps nil tt v I (eq_refl _))
| [ H1: ?x <> nil, H2: _::?cs = ?x ++ nil |- _] => rewrite app_nil_r in H2 ; subst ;
rewrite (app_nil_end cs) ; pv_opt ; pv_in ; pv_in
| [ H2 : _::?cs = ?x ++ ?x1 |- _ ] =>
destruct x ; try congruence ; simpl in H2 ; injection H2 ;
s ; apply Cat_i ; auto with dfa
| _ => s ; auto with dfa
end).
Qed.
Proof.
induction r ; simpl ; intros ; try in_inv ;
match goal with
| [ H : in_regexp (Star _) _ _ |- _] =>
generalize (StarInv H); clear H ; mysimp ; subst
| _ => idtac
end ; try congruence ;
repeat (pv_opt ; match goal with
| [ _ : (?c :: ?cs) = (?x ++ _) |- in_regexp (Alt _ _) ?cs _] =>
destruct x ; s ; [eapply Alt_right_i | eapply Alt_left_i] ; pv_opt ; pv_opt
| [ H : in_regexp ?r1 nil _ |- in_regexp (Cat (null ?r1) _) ?cs _ ] =>
let H := fresh "H" in
assert (H:cs=nil++cs) ; [ auto | rewrite H ] ; pv_in ; apply Null1 ; auto
| [ |- in_regexp (Map (Fn_cons ?t) ?r) ?cs (?x1::?x2) ] =>
apply (@Map_i _ _ (Fn_cons t) r cs (x1,x2) (x0::x2) I (eq_refl _))
| [ |- in_regexp (Map (Fn_const_char ?v) Eps) nil ?v ] =>
apply (@Map_i _ _ (Fn_const_char v) Eps nil tt v I (eq_refl _))
| [ H1: ?x <> nil, H2: _::?cs = ?x ++ nil |- _] => rewrite app_nil_r in H2 ; subst ;
rewrite (app_nil_end cs) ; pv_opt ; pv_in ; pv_in
| [ H2 : _::?cs = ?x ++ ?x1 |- _ ] =>
destruct x ; try congruence ; simpl in H2 ; injection H2 ;
s ; apply Cat_i ; auto with dfa
| _ => s ; auto with dfa
end).
Qed.
If null r matches cs returning v, then cs must be empty
Lemma NullNil : forall t (r:regexp t) cs v, [[null r]] cs v -> cs = nil.
Proof.
induction r ; mysimp ; try in_inv ; pv_opt ; try in_inv ; repeat
match goal with
| [ IHr : forall cs v, in_regexp (null ?r) _ _ -> _ = _,
H : in_regexp (null ?r) _ _ |- _] =>
rewrite (IHr _ _ H) ; clear IHr ; auto
end.
Qed.
Proof.
induction r ; mysimp ; try in_inv ; pv_opt ; try in_inv ; repeat
match goal with
| [ IHr : forall cs v, in_regexp (null ?r) _ _ -> _ = _,
H : in_regexp (null ?r) _ _ |- _] =>
rewrite (IHr _ _ H) ; clear IHr ; auto
end.
Qed.
deriv is correct part 2.
Lemma Deriv2 : forall t (r:regexp t) c cs v, [[deriv r c]] cs v -> [[r]] (c::cs) v.
Proof.
induction r ; simpl ; intros ; repeat
(match goal with
| [ H : context[char_eq ?c1 ?c2] |- _ ] => destruct (char_eq c1 c2) ; s
| [ H : in_regexp (null _) _ _ |- in_regexp (Cat _ _) (?c::?x++?x0) _ ] =>
let H1 := fresh "H" in
generalize (NullNil _ H) ; s ;
assert (H1:c::x0 = nil ++ (c::x0)) ; [auto | rewrite H1] ;
pv_in ; apply Null2 ; auto
| [ H: in_regexp (deriv ?r1 _) ?x ?x1 |-
in_regexp (Cat ?r1 _) (_::?x++_) (?x1,_) ] => pv_in
| [ |- in_regexp (Star _) (?c :: ?x1 ++ ?x2) _ ] =>
let H := fresh "H" in
assert (H:c :: x1 ++ x2 = (c::x1) ++ x2) ; [ auto | rewrite H ] ;
eapply Star_cat_i ; eauto ; congruence
| [ |- in_regexp (Map _ _) _ (apply_fn _ _ _) ] => eapply Map_i ; eauto
| [ H : in_regexp (Star _) _ _ |- _ ] => fail 1
| _ => pv_opt ; in_inv
end).
Qed.
Proof.
induction r ; simpl ; intros ; repeat
(match goal with
| [ H : context[char_eq ?c1 ?c2] |- _ ] => destruct (char_eq c1 c2) ; s
| [ H : in_regexp (null _) _ _ |- in_regexp (Cat _ _) (?c::?x++?x0) _ ] =>
let H1 := fresh "H" in
generalize (NullNil _ H) ; s ;
assert (H1:c::x0 = nil ++ (c::x0)) ; [auto | rewrite H1] ;
pv_in ; apply Null2 ; auto
| [ H: in_regexp (deriv ?r1 _) ?x ?x1 |-
in_regexp (Cat ?r1 _) (_::?x++_) (?x1,_) ] => pv_in
| [ |- in_regexp (Star _) (?c :: ?x1 ++ ?x2) _ ] =>
let H := fresh "H" in
assert (H:c :: x1 ++ x2 = (c::x1) ++ x2) ; [ auto | rewrite H ] ;
eapply Star_cat_i ; eauto ; congruence
| [ |- in_regexp (Map _ _) _ (apply_fn _ _ _) ] => eapply Map_i ; eauto
| [ H : in_regexp (Star _) _ _ |- _ ] => fail 1
| _ => pv_opt ; in_inv
end).
Qed.
First half of correctness for deriv_parse.
Lemma Deriv'1 cs t (r:regexp t) v :
[[deriv_parse' r cs]] nil v -> [[r]] cs v.
Proof.
induction cs ; mysimp. apply Deriv2. apply IHcs. auto.
Qed.
Lemma Deriv'2 cs t (r:regexp t) v :
[[r]] cs v -> [[deriv_parse' r cs]] nil v.
Proof.
induction cs ; mysimp. apply IHcs. apply Deriv1 ; auto.
Qed.
Lemma DerivParse1 cs t (r:regexp t) H vs :
deriv_parse r H cs = vs -> forall v, In v vs -> [[r]] cs v.
Proof.
unfold deriv_parse. intros. subst.
generalize (ApplyNull2 _ _ _ H1). apply Deriv'1.
Qed.
[[deriv_parse' r cs]] nil v -> [[r]] cs v.
Proof.
induction cs ; mysimp. apply Deriv2. apply IHcs. auto.
Qed.
Lemma Deriv'2 cs t (r:regexp t) v :
[[r]] cs v -> [[deriv_parse' r cs]] nil v.
Proof.
induction cs ; mysimp. apply IHcs. apply Deriv1 ; auto.
Qed.
Lemma DerivParse1 cs t (r:regexp t) H vs :
deriv_parse r H cs = vs -> forall v, In v vs -> [[r]] cs v.
Proof.
unfold deriv_parse. intros. subst.
generalize (ApplyNull2 _ _ _ H1). apply Deriv'1.
Qed.
Second half of correctness for deriv_parse.
Lemma DerivParse2 cs t (r:regexp t) v H :
[[r]] cs v -> In v (deriv_parse r H cs).
Proof.
unfold deriv_parse. intros. eapply ApplyNull1 ; auto. eapply Deriv'2. auto.
Qed.
[[r]] cs v -> In v (deriv_parse r H cs).
Proof.
unfold deriv_parse. intros. eapply ApplyNull1 ; auto. eapply Deriv'2. auto.
Qed.
deriv_parse is correct
Theorem DerivParse_is_parser : is_parser deriv_parse.
Proof.
unfold is_parser ; mysimp ; [apply DerivParse2 | eapply DerivParse1] ; eauto.
Qed.
End FNMAP.
Proof.
unfold is_parser ; mysimp ; [apply DerivParse2 | eapply DerivParse1] ; eauto.
Qed.
End FNMAP.
Now we need to translate our external representation, parser to
our internal representation regexp and build an appropriate ctxt mapping function names to functions of the right type.
Add a new function to the end of the context and return its position as
a "fresh" function name, along with the new context.
Definition extend_state(s:ctxt_t) t1 t2 (f:fn_result_m(t1,t2)) : (fn_name * ctxt_t) :=
(length s, s ++ (existT fn_result_m (t1,t2) f)::nil).
(length s, s ++ (existT fn_result_m (t1,t2) f)::nil).
Convert a parser with inlined functions to a regexp and a function map.
Fixpoint par2reg t (p:parser t)(s:ctxt_t) : (regexp t) * ctxt_t :=
match p in parser t return (regexp t) * ctxt_t with
| Any_p => (Any, s)
| Char_p b => (Char b, s)
| Eps_p => (Eps, s)
| Cat_p t1 t2 p1 p2 =>
let (r1,s1) := par2reg p1 s in
let (r2,s2) := par2reg p2 s1 in
(Cat r1 r2, s2)
| Zero_p t => (Zero t, s)
| Alt_p t p1 p2 =>
let (r1,s1) := par2reg p1 s in
let (r2,s2) := par2reg p2 s1 in
(Alt r1 r2, s2)
| Star_p t p1 =>
let (r1,s1) := par2reg p1 s in
(Star r1, s1)
| Map_p t1 t2 f p =>
let (r,s1) := par2reg p s in
let (n,s2) := extend_state s1 f in
(@Map t1 t2 (Fn_name n t1 t2) r, s2)
end.
match p in parser t return (regexp t) * ctxt_t with
| Any_p => (Any, s)
| Char_p b => (Char b, s)
| Eps_p => (Eps, s)
| Cat_p t1 t2 p1 p2 =>
let (r1,s1) := par2reg p1 s in
let (r2,s2) := par2reg p2 s1 in
(Cat r1 r2, s2)
| Zero_p t => (Zero t, s)
| Alt_p t p1 p2 =>
let (r1,s1) := par2reg p1 s in
let (r2,s2) := par2reg p2 s1 in
(Alt r1 r2, s2)
| Star_p t p1 =>
let (r1,s1) := par2reg p1 s in
(Star r1, s1)
| Map_p t1 t2 f p =>
let (r,s1) := par2reg p s in
let (n,s2) := extend_state s1 f in
(@Map t1 t2 (Fn_name n t1 t2) r, s2)
end.
Initial state for the translation.
Top-level translation of parsers to regexps.
Tactic to propagate information about the translation.
Ltac unfold_par2reg :=
match goal with
| [ |- context[par2reg ?p ?s] ] =>
let H := fresh "H" in
let x := fresh "x" in
let r := fresh "r" in
let s := fresh "s" in
assert (H : exists x, x = par2reg p s) ; [eauto | idtac] ;
destruct H as [x H];
rewrite <- H in * ; destruct x as [r s]
end.
match goal with
| [ |- context[par2reg ?p ?s] ] =>
let H := fresh "H" in
let x := fresh "x" in
let r := fresh "r" in
let s := fresh "s" in
assert (H : exists x, x = par2reg p s) ; [eauto | idtac] ;
destruct H as [x H];
rewrite <- H in * ; destruct x as [r s]
end.
Define a partial order on contexts, which the translation respects. That is,
the input context is always less than the output context.
Definition ctxt_leq(c1 c2:ctxt_t) : Prop :=
exists c:ctxt_t, c2 = c1 ++ c.
Lemma ctxt_leq_trans : forall c1 c2 c3,
ctxt_leq c1 c2 -> ctxt_leq c2 c3 -> ctxt_leq c1 c3.
Proof.
unfold ctxt_leq ; s. rewrite app_ass. eauto.
Qed.
Lemma ctxt_leq_refl : forall c, ctxt_leq c c.
unfold ctxt_leq ; s. exists nil. apply app_nil_end.
Qed.
Hint Resolve ctxt_leq_refl : dfa.
Lemma ctxt_ext : forall c c', ctxt_leq c (c ++ c').
unfold ctxt_leq ; s. eauto.
Qed.
Hint Resolve ctxt_ext : dfa.
exists c:ctxt_t, c2 = c1 ++ c.
Lemma ctxt_leq_trans : forall c1 c2 c3,
ctxt_leq c1 c2 -> ctxt_leq c2 c3 -> ctxt_leq c1 c3.
Proof.
unfold ctxt_leq ; s. rewrite app_ass. eauto.
Qed.
Lemma ctxt_leq_refl : forall c, ctxt_leq c c.
unfold ctxt_leq ; s. exists nil. apply app_nil_end.
Qed.
Hint Resolve ctxt_leq_refl : dfa.
Lemma ctxt_ext : forall c c', ctxt_leq c (c ++ c').
unfold ctxt_leq ; s. eauto.
Qed.
Hint Resolve ctxt_ext : dfa.
Simplify reasoning about inductive hypotheses that depend upon a context.
Ltac p2rsimp :=
match goal with
| [ |- context[length(?x ++ ?y)] ] => rewrite (@app_length _ x y)
| [ IH : forall _:ctxt_t, _, H : _ = par2reg ?p ?s |- _ ] =>
generalize (IH s) ;
match goal with
| [ |- context[par2reg ?p ?s] ] =>
let H' := fresh "H" in generalize (IH s) ; clear IH ; rewrite <- H ; simpl ;
intro H'
| _ => fail
end
| _ => idtac
end.
match goal with
| [ |- context[length(?x ++ ?y)] ] => rewrite (@app_length _ x y)
| [ IH : forall _:ctxt_t, _, H : _ = par2reg ?p ?s |- _ ] =>
generalize (IH s) ;
match goal with
| [ |- context[par2reg ?p ?s] ] =>
let H' := fresh "H" in generalize (IH s) ; clear IH ; rewrite <- H ; simpl ;
intro H'
| _ => fail
end
| _ => idtac
end.
The translation only results in a greater or equal ctxt
Lemma p2r_extends : forall t (p:parser t) (c:ctxt_t), ctxt_leq c (snd (par2reg p c)).
Proof.
unfold ctxt_leq ; induction p ; simpl ; unfold extend_state ; intros ;
repeat unfold_par2reg ; simpl ;
try (exists nil ; mysimp ; fail) ; repeat p2rsimp ; s ;
try rewrite app_ass ; econstructor ; eauto.
Qed.
Proof.
unfold ctxt_leq ; induction p ; simpl ; unfold extend_state ; intros ;
repeat unfold_par2reg ; simpl ;
try (exists nil ; mysimp ; fail) ; repeat p2rsimp ; s ;
try rewrite app_ass ; econstructor ; eauto.
Qed.
Functions stay well-typed under greater or equal contexts.
Lemma extends_wf_fn : forall t1 t2 (f:fn t1 t2) c c', ctxt_leq c c' ->
wf_fn c f -> wf_fn c' f.
Proof.
unfold ctxt_leq ; s ; destruct f ; auto ; simpl in * ; generalize c H0 ; clear c H0 ;
unfold fn_result, fn_result' in * ; induction f ; destruct c ;
[ mysimp | mysimp | mysimp | intro ] ; try congruence ;
simpl in *. firstorder.
destruct H0. generalize (IHf _ (conj (lt_S_n _ _ H) H0)). mysimp. firstorder.
Qed.
wf_fn c f -> wf_fn c' f.
Proof.
unfold ctxt_leq ; s ; destruct f ; auto ; simpl in * ; generalize c H0 ; clear c H0 ;
unfold fn_result, fn_result' in * ; induction f ; destruct c ;
[ mysimp | mysimp | mysimp | intro ] ; try congruence ;
simpl in *. firstorder.
destruct H0. generalize (IHf _ (conj (lt_S_n _ _ H) H0)). mysimp. firstorder.
Qed.
Regexps stay well-typed under greater or equal contexts.
Lemma extends_wf_regexp : forall t (r:regexp t) c c',
ctxt_leq c c' -> wf_regexp c r -> wf_regexp c' r.
Proof.
induction r ; simpl ; s ; firstorder. eapply extends_wf_fn ; eauto. econstructor ; eauto.
Qed.
ctxt_leq c c' -> wf_regexp c r -> wf_regexp c' r.
Proof.
induction r ; simpl ; s ; firstorder. eapply extends_wf_fn ; eauto. econstructor ; eauto.
Qed.
Tactic for remembering a translated sub-term is well-formed.
Ltac p2rext :=
match goal with
| [ H : (?r,?s2) = par2reg ?p ?s |- _ ] =>
generalize (p2r_extends p s) ; rewrite <- H ; simpl ; intro ; clear H
end.
Lemma FnResult : forall s p r,
fn_result (s ++ existT fn_result_m p r :: nil) (length s) = Some p.
Proof.
induction s ; mysimp ; firstorder.
Qed.
Hint Resolve FnResult.
match goal with
| [ H : (?r,?s2) = par2reg ?p ?s |- _ ] =>
generalize (p2r_extends p s) ; rewrite <- H ; simpl ; intro ; clear H
end.
Lemma FnResult : forall s p r,
fn_result (s ++ existT fn_result_m p r :: nil) (length s) = Some p.
Proof.
induction s ; mysimp ; firstorder.
Qed.
Hint Resolve FnResult.
The translation results in a well-formed regexp.
Lemma p2r_wf : forall t (p:parser t) c, wf_regexp (snd (par2reg p c)) (fst (par2reg p c)).
Proof.
induction p ; mysimp ; repeat unfold_par2reg ; repeat p2rsimp ; mysimp ; p2rext ;
try (eapply (@extends_wf_regexp _ _ s); eauto) ; try econstructor ; eauto with arith.
Qed.
Proof.
induction p ; mysimp ; repeat unfold_par2reg ; repeat p2rsimp ; mysimp ; p2rext ;
try (eapply (@extends_wf_regexp _ _ s); eauto) ; try econstructor ; eauto with arith.
Qed.
Applying the same function in an extended environment yields the same result.
Lemma extend_apply : forall x t1 t2 (f:fn t1 t2) s (H:wf_fn s f) v1 H',
apply_fn (s ++ x) f H' v1 = apply_fn s f H v1.
Proof.
destruct f ; auto. simpl.
mysimp. generalize f l0 e0 l e ; clear f l0 e0 l e.
induction s ; destruct f ; mysimp ; firstorder ;
unfold fn_result, fn_result' in * |- ; simpl in * ;
try congruence. destruct a. rewrite (proof_irrelevance e0 e) in *. auto.
Qed.
Hint Resolve extend_apply : dfa.
apply_fn (s ++ x) f H' v1 = apply_fn s f H v1.
Proof.
destruct f ; auto. simpl.
mysimp. generalize f l0 e0 l e ; clear f l0 e0 l e.
induction s ; destruct f ; mysimp ; firstorder ;
unfold fn_result, fn_result' in * |- ; simpl in * ;
try congruence. destruct a. rewrite (proof_irrelevance e0 e) in *. auto.
Qed.
Hint Resolve extend_apply : dfa.
in_regexp respects weakening on states.
Lemma in_leq : forall t (r:regexp t) s cs v,
in_regexp s r cs v ->
forall s', ctxt_leq s s' -> in_regexp s' r cs v.
Proof.
induction 1 ; repeat (s ; firstorder) ;
try ((constructor ; auto with dfa ; fail) || (apply Alt_right_i ; auto with dfa)).
apply (Map_i(v1:=v1) f (@extends_wf_fn t1 t2 f _ _ (ctxt_ext s x) H)) ; auto with dfa.
Qed.
in_regexp s r cs v ->
forall s', ctxt_leq s s' -> in_regexp s' r cs v.
Proof.
induction 1 ; repeat (s ; firstorder) ;
try ((constructor ; auto with dfa ; fail) || (apply Alt_right_i ; auto with dfa)).
apply (Map_i(v1:=v1) f (@extends_wf_fn t1 t2 f _ _ (ctxt_ext s x) H)) ; auto with dfa.
Qed.
Newly-allocated functions are well-formed with respect to the resulting context.
Lemma new_fn_wf : forall t1 t2 f s, wf_fn (s ++ existT fn_result_m (t1,t2) f :: nil)
(Fn_name (length s) t1 t2).
Proof.
induction s; mysimp ; firstorder.
Qed.
Hint Resolve new_fn_wf : dfa.
(Fn_name (length s) t1 t2).
Proof.
induction s; mysimp ; firstorder.
Qed.
Hint Resolve new_fn_wf : dfa.
If (cs,v) is in [p] then (cs,v) is in [r] under the output context s.
Lemma p2r_ok : forall t (p:parser t) cs v,
in_parser p cs v ->
forall s,
in_regexp (snd (par2reg p s)) (fst (par2reg p s)) cs v.
Proof.
induction 1 ; simpl ; intro s ; try (constructor ; fail) ;
repeat unfold_par2reg ; simpl in * ; repeat p2rsimp ; repeat p2rext ; intros ;
try (econstructor ; eauto ; eapply in_leq ; eauto ; fail) ||
(apply Alt_right_i ; auto).
apply (Map_i(v1:=v) (Fn_name (length s0) t1 t2) (new_fn_wf f s0)).
generalize s0 (new_fn_wf f s0). induction s1 ; mysimp ;
unfold fn_result, fn_result' in * ;
simpl in *. rewrite (proof_irrelevance e (eq_refl _)). auto.
generalize (IHs1 (conj (lt_S_n _ _ l) e)). clear IHs1. auto.
eapply in_leq ; eauto. auto with dfa.
Qed.
in_parser p cs v ->
forall s,
in_regexp (snd (par2reg p s)) (fst (par2reg p s)) cs v.
Proof.
induction 1 ; simpl ; intro s ; try (constructor ; fail) ;
repeat unfold_par2reg ; simpl in * ; repeat p2rsimp ; repeat p2rext ; intros ;
try (econstructor ; eauto ; eapply in_leq ; eauto ; fail) ||
(apply Alt_right_i ; auto).
apply (Map_i(v1:=v) (Fn_name (length s0) t1 t2) (new_fn_wf f s0)).
generalize s0 (new_fn_wf f s0). induction s1 ; mysimp ;
unfold fn_result, fn_result' in * ;
simpl in *. rewrite (proof_irrelevance e (eq_refl _)). auto.
generalize (IHs1 (conj (lt_S_n _ _ l) e)). clear IHs1. auto.
eapply in_leq ; eauto. auto with dfa.
Qed.
If (cs,v) is in [r] under context s2, and r is well-formed with respect to
context s1, and s1 <= s2, then (cs,v) is in [r] under context s1.
Lemma extends_in :
forall t (r:regexp t) cs v s1 s2,
in_regexp s2 r cs v ->
wf_regexp s1 r ->
ctxt_leq s1 s2 ->
in_regexp s1 r cs v.
Proof.
induction 1 ; simpl ; intros ; mysimp ; (econstructor ; eauto ; fail) ||
(eapply Alt_right_i ; eauto) ||
(apply (@Map_i s1 t1 t2 f r cs v1 v2 H4)) ; auto ; s.
destruct H3. subst. symmetry. eapply extend_apply.
Qed.
Hint Resolve extends_in : dfa.
forall t (r:regexp t) cs v s1 s2,
in_regexp s2 r cs v ->
wf_regexp s1 r ->
ctxt_leq s1 s2 ->
in_regexp s1 r cs v.
Proof.
induction 1 ; simpl ; intros ; mysimp ; (econstructor ; eauto ; fail) ||
(eapply Alt_right_i ; eauto) ||
(apply (@Map_i s1 t1 t2 f r cs v1 v2 H4)) ; auto ; s.
destruct H3. subst. symmetry. eapply extend_apply.
Qed.
Hint Resolve extends_in : dfa.
Used for induction over proofs of (cs,v) in Star r.
Fixpoint in_unwind s t (r:regexp t) n cs (v:result_m (list_t t)) : Prop :=
match n with
| 0 => cs = nil /\ v = nil
| S m =>
exists cs1, exists cs2,
exists v1, exists v2,
cs = cs1 ++ cs2 /\ v = (v1::v2) /\
cs1 <> nil /\
in_regexp s r cs1 v1 /\ in_unwind s r m cs2 v2
end.
Definition coerce_reg t1 t2 (r:regexp t1) : t1 = t2 -> regexp t2.
intros. rewrite H in r. apply r.
Defined.
Definition coerce_val t1 t2 (v:result_m t1) : t1 = t2 -> result_m t2.
intros. rewrite H in v. apply v.
Defined.
match n with
| 0 => cs = nil /\ v = nil
| S m =>
exists cs1, exists cs2,
exists v1, exists v2,
cs = cs1 ++ cs2 /\ v = (v1::v2) /\
cs1 <> nil /\
in_regexp s r cs1 v1 /\ in_unwind s r m cs2 v2
end.
Definition coerce_reg t1 t2 (r:regexp t1) : t1 = t2 -> regexp t2.
intros. rewrite H in r. apply r.
Defined.
Definition coerce_val t1 t2 (v:result_m t1) : t1 = t2 -> result_m t2.
intros. rewrite H in v. apply v.
Defined.
If (cs,v) is in Star r then there exists some n such that
(cs,v) is in the nth unwinding of r.
This gives us an easy inner induction.
Lemma star_rep : forall s t (r:regexp t) cs (v:result_m t),
in_regexp s r cs v ->
forall t1 (r1 : regexp t1) (H: t = list_t t1),
coerce_reg r H = Star r1 ->
exists n, in_unwind s r1 n cs (coerce_val v H).
Proof.
unfold coerce_reg, coerce_val; induction 1 ; mysimp ; s ; try discriminate ; repeat
match goal with
| [ H : list_t _ = list_t _ |- _] => injection H ; intros ; subst ;
rewrite (proof_irrelevance H (eq_refl _)) in * ; simpl in *
| [ H : Star _ = Star _ |- _ ] => injection H ; mysimp
| _ => idtac
end ; [ exists 0 ; mysimp | idtac].
destruct (IHin_regexp2 t1 r1 (eq_refl _) (eq_refl _)) ; exists (S x) ;
repeat econstructor ; mysimp.
Qed.
in_regexp s r cs v ->
forall t1 (r1 : regexp t1) (H: t = list_t t1),
coerce_reg r H = Star r1 ->
exists n, in_unwind s r1 n cs (coerce_val v H).
Proof.
unfold coerce_reg, coerce_val; induction 1 ; mysimp ; s ; try discriminate ; repeat
match goal with
| [ H : list_t _ = list_t _ |- _] => injection H ; intros ; subst ;
rewrite (proof_irrelevance H (eq_refl _)) in * ; simpl in *
| [ H : Star _ = Star _ |- _ ] => injection H ; mysimp
| _ => idtac
end ; [ exists 0 ; mysimp | idtac].
destruct (IHin_regexp2 t1 r1 (eq_refl _) (eq_refl _)) ; exists (S x) ;
repeat econstructor ; mysimp.
Qed.
If (cs,v) is in r under context s, where (r,s) = par2reg p, then
(cs,v) is in p.
Lemma r2p_ok : forall t (p:parser t) s cs v,
in_regexp (snd (par2reg p s)) (fst (par2reg p s)) cs v ->
in_parser p cs v.
Proof.
induction p ; simpl ; intros s cs v ; try (repeat unfold_par2reg) ;
simpl in * ; intros ; (generalize (AnyInv H) || generalize (CharInv H) ||
generalize (EpsInv H) || generalize (CatInv H1) || generalize (AltInv H1) ||
generalize (MapInv H0) || generalize (ZeroInv H) || idtac) ; s ;
try (econstructor ; eauto ; fail) ;
repeat match goal with
| [ IH : forall _ _ _, in_regexp (snd (par2reg ?p _)) _ _ _ -> _,
H : _ = par2reg ?p ?s |- _ ] =>
generalize (IH s) ; generalize (p2r_extends p s) ; generalize (p2r_wf p s) ;
rewrite <- H ; simpl ; clear IH ; intros
end ;
try contradiction ||
(econstructor ; eauto with dfa ; fail) || (eapply Alt_right_pi ; eauto with dfa).
generalize (star_rep H0 (eq_refl _) (eq_refl _)). mysimp. clear H0. subst.
generalize cs v H4. clear cs v H4.
induction x ; s ; constructor ; auto.
match goal with
| [ |- in_parser (Map_p _ r _) _ (?f x) ] => assert (f = r)
end.
generalize s0 e l. induction s1 ; unfold value, error, fn_result, fn_result' ; mysimp.
rewrite (proof_irrelevance e0 (eq_refl _)). auto.
rewrite H5. apply (Map_pi _ r). apply H4.
apply (@extends_in _ r0 cs x s0 (snd (extend_state s0 r)) H1 H2). unfold extend_state.
simpl. auto with dfa.
Qed.
in_regexp (snd (par2reg p s)) (fst (par2reg p s)) cs v ->
in_parser p cs v.
Proof.
induction p ; simpl ; intros s cs v ; try (repeat unfold_par2reg) ;
simpl in * ; intros ; (generalize (AnyInv H) || generalize (CharInv H) ||
generalize (EpsInv H) || generalize (CatInv H1) || generalize (AltInv H1) ||
generalize (MapInv H0) || generalize (ZeroInv H) || idtac) ; s ;
try (econstructor ; eauto ; fail) ;
repeat match goal with
| [ IH : forall _ _ _, in_regexp (snd (par2reg ?p _)) _ _ _ -> _,
H : _ = par2reg ?p ?s |- _ ] =>
generalize (IH s) ; generalize (p2r_extends p s) ; generalize (p2r_wf p s) ;
rewrite <- H ; simpl ; clear IH ; intros
end ;
try contradiction ||
(econstructor ; eauto with dfa ; fail) || (eapply Alt_right_pi ; eauto with dfa).
generalize (star_rep H0 (eq_refl _) (eq_refl _)). mysimp. clear H0. subst.
generalize cs v H4. clear cs v H4.
induction x ; s ; constructor ; auto.
match goal with
| [ |- in_parser (Map_p _ r _) _ (?f x) ] => assert (f = r)
end.
generalize s0 e l. induction s1 ; unfold value, error, fn_result, fn_result' ; mysimp.
rewrite (proof_irrelevance e0 (eq_refl _)). auto.
rewrite H5. apply (Map_pi _ r). apply H4.
apply (@extends_in _ r0 cs x s0 (snd (extend_state s0 r)) H1 H2). unfold extend_state.
simpl. auto with dfa.
Qed.
The translation preserves meaning
Theorem parser2regexp_equiv :
forall t (p:parser t) cs v,
in_parser p cs v <->
in_regexp (snd (parser2regexp p)) (fst (parser2regexp p)) cs v.
Proof.
unfold parser2regexp ; mysimp ; firstorder ; [eapply p2r_ok | eapply r2p_ok] ; eauto.
Qed.
forall t (p:parser t) cs v,
in_parser p cs v <->
in_regexp (snd (parser2regexp p)) (fst (parser2regexp p)) cs v.
Proof.
unfold parser2regexp ; mysimp ; firstorder ; [eapply p2r_ok | eapply r2p_ok] ; eauto.
Qed.
Finally -- convert the parser to a regexp and then run the
derivative-based parser on this.
Definition parse t (p:parser t) : list char_p -> list (result_m t) :=
deriv_parse (snd (parser2regexp p))
(fst (parser2regexp p)) (p2r_wf p _).
Theorem parse_correct : forall t (p:parser t) cs v,
in_parser p cs v <-> In v (parse p cs).
Proof.
intros t p cs v ;
generalize (DerivParse_is_parser (snd (parser2regexp p)) (fst (parser2regexp p))
(p2r_wf p initial_ctxt) cs v) ; intro H ; destruct H ;
generalize (parser2regexp_equiv p cs v) ; intro H1 ; destruct H1 ; split ; auto.
Qed.
End PARSER.
deriv_parse (snd (parser2regexp p))
(fst (parser2regexp p)) (p2r_wf p _).
Theorem parse_correct : forall t (p:parser t) cs v,
in_parser p cs v <-> In v (parse p cs).
Proof.
intros t p cs v ;
generalize (DerivParse_is_parser (snd (parser2regexp p)) (fst (parser2regexp p))
(p2r_wf p initial_ctxt) cs v) ; intro H ; destruct H ;
generalize (parser2regexp_equiv p cs v) ; intro H1 ; destruct H1 ; split ; auto.
Qed.
End PARSER.
This page has been generated by coqdoc