theory Reg_demo_sol
imports Main
begin

(* 
The aim of this demo is to:
(1) define a datatype representing regular expressions
(2) define the language expressed by a regular expression
(3) define the reverse of a regular expression
(4) prove that the language of the reverse of an 
    expression e is equal to the reverse of the language of e.
*)

(*-----------------------------------------------------*)
(* Defining the datatype of regular expressions *)

(*
Define a datatype representing regular expressions,
for instance the regular expression \<not> ( (a.b|c)* )

Then define a few regular expressions, like the one above

Remember the syntax: 
   datatype ’a list = Nil | Cons 'a "'a list"

Note: you can define notation for your constructors;
      for example  Trans 'a 'b (infixr "[\<longrightarrow>]" 60)
      means that (Trans a b) can be noted (a [\<longrightarrow>] b)
      where the operator is infix, associates to the 
      right, and is of priority 60.
*)

datatype regexp =
  Atom char ("<_>")
 |Alt regexp regexp (infixl "\<bar>" 50)
 |Conc regexp regexp (infixl "\<cdot>"  60)
 |Star regexp ("_ \<star>" [79] 80)
 |Neg regexp ("!_" [70] 70)
   
primrec all_Alt :: "nat \<Rightarrow> regexp" where
  "all_Alt 0 = Atom (char_of_nat 0)"
| "all_Alt (Suc n) = Alt (Atom (char_of_nat (Suc n))) (all_Alt n)"
definition Dot :: regexp where "Dot = all_Alt 255"

term "!(<CHR ''a''>\<cdot><CHR ''b''>)"
term "(!(<CHR ''a''>))\<cdot><CHR ''b''>"
term "!((<CHR ''a''>\<cdot><CHR ''b''>\<bar><CHR ''c''>)\<star>)"

definition mye0::regexp
where
  "mye0=Alt (Atom CHR ''a'') (Conc (Atom CHR ''b'')(Atom CHR ''c''))"
  
thm mye0_def

definition mye1::regexp
where
  "mye1=Conc (Atom CHR ''a'') (Alt (Atom CHR ''b'')(Atom CHR ''c''))"

thm mye1_def
  

(*-----------------------------------------------------*)
(* Defining the language of a regular expression *)
 
type_synonym word = "char list"

(* 
Define a recursive function computing the language of a
given regular expression:
   lang:: "regexp \<Rightarrow> word set"
  
Try it on small examples.

Hint: You might need to define an inductive set. 
      Recall the notation: 
         inductive_set Fin :: "'a set set"
         where
         emptyI:  "{} \<in> Fin" |
         insertI: "A \<in> Fin \<Longrightarrow> insert a A \<in> Fin"
*)

inductive_set star for L
where 
  star_empty[simp,intro!]: "[] \<in> star L"
 |star_app: "\<lbrakk>u\<in>L; v\<in>star L\<rbrakk> \<Longrightarrow> u@v \<in> star L"
 
print_theorems
 

definition conc :: "word set \<Rightarrow> word set \<Rightarrow> word set"
  where
  "conc  l1 l2 \<equiv>{ w1@w2  |w1 w2. w1 \<in>  l1 \<and> w2 \<in>  l2}"
  
primrec 
  lang :: "regexp \<Rightarrow> word set"
where
  "lang (Atom c) = {[c]}"
 |"lang (Alt e1 e2) = lang e1 \<union> lang e2"
 |"lang (Conc e1 e2) = conc (lang e1) (lang e2)"
 |"lang (Star e) = star (lang e)"
 |"lang (Neg e) = - (lang e)"
 

value "lang mye0" 
 
value "lang mye1" 
 
lemma lang_all_Alt: "lang (all_Alt n) = (\<lambda>x. [char_of_nat x]) ` {..n}"
  apply(induct n)
   apply auto[1]
  apply clarsimp
  apply(rule equalityI)
   apply clarsimp
  apply(clarsimp simp add: atMost_Suc)
  done
    
lemma lang_Dot: "lang Dot = (\<lambda>x. [x]) ` UNIV"
  unfolding Dot_def
  apply(simp add: lang_all_Alt)
  apply(rule equalityI)
   apply clarsimp
  apply (clarsimp simp add: image_iff)
  apply(rule_tac x="nat_of_char xa" in bexI)
   apply clarsimp
  apply clarsimp
  apply(simp add: less_Suc_eq_le[symmetric])
  done
    
lemmas star_sing = star_app[where u = "[x]" for x, simplified]

lemma star_lang_Dot: "x \<in> star (lang Dot)"
  apply(induct x)
   apply simp
  apply(rule star_sing)
   apply(clarsimp simp add: lang_Dot)
  apply assumption
  done
      
lemma lang_Dot_Star: "lang (Star Dot) = UNIV"
  apply(simp)
  apply(rule equalityI)
   apply simp
  apply(rule subsetI)
  apply simp
  apply(rule star_lang_Dot)
  done
definition Empty where "Empty = Neg(Star Dot)"
  
lemma lang_Empty:"lang Empty = {}"
  unfolding Empty_def
  apply (simp only: lang.simps(5) lang_Dot_Star)
  apply simp
  done
    
(*-----------------------------------------------------*)
(* Defining the reverse of a regular expression *)

primrec rev_regexp :: "regexp \<Rightarrow> regexp"
where 
  "rev_regexp (Atom c) = Atom c"
 |"rev_regexp (Alt e1 e2) = Alt (rev_regexp e1) (rev_regexp e2)"
 |"rev_regexp (Conc e1 e2) = Conc (rev_regexp e2) (rev_regexp e1)"
 |"rev_regexp (Star e) = Star (rev_regexp e)"
 |"rev_regexp (Neg e) = Neg (rev_regexp e)"


(*-----------------------------------------------------*)
(* Proof that the language of the reverse of an 
    expression e is equal to the reverse of the language of e
     "lang (rev_regexp e) = rev ` (lang e)"
    *)

lemma star_app_aux:
  "xa \<in> star l \<Longrightarrow> x \<in> l \<Longrightarrow>  xa @ x \<in> star l"
  apply (erule star.induct)
   apply clarsimp
   apply (drule_tac v="[]" in star_app, simp_all)[1]
  apply clarsimp
  apply (rule star_app, simp_all)
  done
  
lemma star_rev_aux1:
  " x \<in> star (rev ` l) \<Longrightarrow> x \<in> rev ` star l"
  thm star.induct
  apply (induct rule: star.induct) 
   apply simp
  apply clarsimp
  apply (rename_tac x y)
  apply (rule_tac x="y@x" in image_eqI, simp)
  apply (clarsimp simp: star_app_aux)
  done

lemma star_rev_aux2:
  " xa \<in> star l \<Longrightarrow> rev xa \<in> star (rev ` l)"
  apply (induct rule: star.induct)
   apply simp
  apply clarsimp
  apply (erule star_app_aux)
  apply simp
  done
  
  
lemma star_rev:
  "star (rev ` l) = rev ` star l"
  apply (rule equalityI)
   apply (clarsimp simp: star_rev_aux1)
  apply (clarsimp simp: star_rev_aux2)
  done
  

lemma conc_rev:
  "conc (rev ` l2) (rev ` l1) = rev ` conc l1 l2"
  apply (simp add: conc_def)
  apply (rule equalityI)
   apply clarsimp
   apply (rename_tac u v)
   apply (rule_tac x="v@u" in image_eqI, auto)[1]
  apply clarsimp
  apply auto
  done
  
lemma neg_rev:
  "- rev ` l = rev ` (- l)"
  apply (rule bij_image_Compl_eq[symmetric])
  apply (rule bijI)
   apply (rule injI)
   apply simp
  apply (rule_tac f=rev in surjI)
  apply auto[1]
  done
  
lemma "lang (rev_regexp e) = rev ` (lang e)"
  apply (induct e) 
      -- "Atom"
      apply clarsimp
     -- "Alt"
     apply auto[1]  (* or image_Un *)
    -- "Conc"
    apply (clarsimp simp: conc_rev)
   -- "Star" 
   apply (clarsimp simp add: star_rev)
  -- "Neg"
  apply (clarsimp simp: neg_rev)
  done


end