theory s2
  imports Main
begin


section "1. Lists as multisets"


text \<open>
This question explores lists as multisets. A list @{term "[0,0,0,2,1,3,2]"}
can be seen as a multiset containing 0,1,2,3, with multiplicity 3,1,2,1, respectively.

Isabelle has a built-in function @{term count_list} that gives multiplicity
\<close>
value "count_list [0::nat,0,0,2,1,3,2] (0::nat)"
text \<open>
In the following, instead of using this count_list,
 we define multiplicity as a finite map @{term m_count} and
 prove some properties about it.

In working on these questions, the @{text ext} lemma can be useful.
Also, it is often useful (and necessary) to fine control the simplifier
 by using "simp only:", "simp (no_asm_simp)", etc.
\<close>


subsection "1.1 Multiplicity function for lists as multisets"

text \<open> We define {@term m_count}, as below. It converts a list as a multiset
 to a function that returns @{term "nat option"}:
 @{term "m_count ls x"} returns @{term None}
if @{term x} does not appear in @{term ls} and returns @{term "Some n"}
 if @{term x} appears @{term "n+1"} times in @{term ls}.
 The @{term nat} value is offset by 1 because we start from
@{term "Some 0"} when @{term x} appears once in @{term ls}.
\<close>

primrec m_count :: "'a list \<Rightarrow> ('a, nat) map" where
  "m_count [] = Map.empty"
|  "m_count (a#as) =
    (case (m_count as) a of
   None \<Rightarrow> (m_count as)(a\<mapsto>0)
| Some n \<Rightarrow> (m_count as)(a\<mapsto>Suc n))"
print_theorems

value "m_count [0::nat,0,0,2,1,3,2] 0"


text \<open>
1-(a) State the correspondence between Isabelle's @{term count_list} and
 @{term m_count} as logical equality and prove it.
\<close>
lemma "count_list ls x =
  (case (m_count ls x) of None \<Rightarrow> 0 | Some n \<Rightarrow> Suc n)"
  by (induct ls; clarsimp split: option.splits)


text \<open>
1-(b) Prove that @{term "m_count ls x = None"} is equivalent to
 @{term x} not being a member of @{term ls}.\<close>

lemma m_notin: "m_count ls x = None \<longleftrightarrow> x \<notin> set ls"
  by (induct ls; clarsimp split: option.splits)


subsection "1.2 Ordering on multisets"

text \<open>
We can define an ordering on multisets: @{term "ls1 \<le> ls2"} holds if,
 for any @{term "x::'a"}, the multiplicity of @{term x} is higher in
 @{term ls2} than in @{term ls1}. This ordering is defined
in terms of @{term m_count} as below:
\<close>

definition le :: "('a, nat) map \<Rightarrow> ('a, nat) map \<Rightarrow> bool" where
  "le m1 m2 \<equiv>
      \<forall>x. case (m1 x, m2 x) of
            (Some a, Some b) \<Rightarrow> a \<le> b
          | (Some a, None) \<Rightarrow> False
          | (None, _) \<Rightarrow> True"


text \<open>
1-(c) Give two examples of multisets @{term my_ls1}, @{term my_ls2} such that
 @{term "le (m_count my_ls1) (m_count my_ls2)"} holds.
 Then show that @{term le} is defined correctly by proving it.
\<close>

(* replace the two undefined below with your multisets *)
definition "my_ls1 = [1,2,3,2::nat]"
definition "my_ls2 = [1,2,3,2,3::nat]"

value "m_count [1,2,3,2::nat]"
value "m_count [1,2,3,2,3::nat]"

term "le (m_count [1,2,3,2::nat]) (m_count [1,2,3,2,3::nat])"

lemma le_example: "le (m_count my_ls1) (m_count my_ls2)"
  unfolding my_ls1_def my_ls2_def le_def
  by auto


text \<open>
1-(d) Prove that adding an element to a list by @{term Cons} always gives a list
  greater than the original list.\<close>

lemma le_m_count: "le (m_count ls) (m_count (x#ls))" 
  by (fastforce simp: le_def split: option.splits)

text \<open>
1-(e) Prove that @{term m_count} is preserved
  by changing the order of the elements in the list.
\<close>

lemma m_count_swap: "m_count (a#b#ls) = m_count (b#a#ls)"
  by (fastforce simp: le_def split: option.splits)

lemma m_count_perm:
  "ls = xs @ x # ys  \<Longrightarrow> m_count ls = m_count (x # xs @ ys)"
  apply (induct ls arbitrary: xs x ys)
   apply simp
  apply (case_tac xs)
   apply simp
  apply (simp only: append_Cons)
  apply (subst m_count_swap)
  by (fastforce split: option.split)


subsection "1.3 Addition of multisets"

text \<open>Next, we consider addition of two multisets. For lists as multisets,
 this corresponds to @{term append}. We can also consider addition of
 multiplicity functions, which is defined for
@{term m_count} as below: \<close>

definition m_add :: "('a, nat) map \<Rightarrow> ('a, nat) map \<Rightarrow> ('a,nat) map" where
  "m_add m1 m2 \<equiv>
      \<lambda>x. case (m1 x, m2 x) of
           (Some a, Some b) \<Rightarrow> Some (a + b + 1)
          | (Some a, None) \<Rightarrow> m1 x
          | (None, _) \<Rightarrow> m2 x"


text \<open>
1-(f) Prove that the empty map is an identity for addition of maps
 (both for left and right).
\<close>

lemma m_add_empty[simp]: "m_add Map.empty m = m"
  apply (simp add: m_add_def)
  done

lemma m_add_empty2[simp]: "m_add m Map.empty = m"
  apply (rule ext)
  apply (simp add: m_add_def split: option.splits)
  done


text \<open>
1-(g) Prove that @{term m_add} correctly returns the multiplicity of 
added multisets, i.e., two lists appended by Isabelle @{term append}.\<close>

value "m_add (m_count (1#[1,2,3,2::nat])) (m_count (2#[1,2,3,2,3::nat])) 2"

lemma m_cons_swap:
  "m_add (m_count (a#ls1)) (m_count ls2) =
       m_add (m_count ls1) (m_count (a#ls2))"
  by (fastforce simp add: m_add_def split: option.splits)

lemma m_add_append: "m_add (m_count ls1) (m_count ls2) = m_count (append ls1 ls2)"
  apply (induct ls1 arbitrary: ls2)
   apply (simp add: m_add_def)
  apply (simp only: m_cons_swap)
  apply (simp only: append_Cons)
  apply (simp add: m_count_perm)
  done


subsection "1.4 Sorting of multisets"

text \<open>
Consider the simple sorting function defined as below:
\<close>

primrec insort' :: "'a::ord \<Rightarrow> 'a list \<Rightarrow> 'a list" where
  "insort' x [] = [x]" |
  "insort' x (y#ys) =
  (if x \<le> y then (x#y#ys) else y#(insort' x ys))"

definition sort' :: "('a::ord) list \<Rightarrow> 'a list" where
  "sort' xs = foldr insort' xs []"


text \<open>
1-(h) With respect to the above sorting function, prove that the sorting of a list
does not change the multiset it represents; in other words, that @{term sort'}
preserves multiplicity.
\<close>

lemma m_count_insort':
  "m_count (insort' a ls) = m_count (a#ls)"
  apply (induct ls arbitrary: a)
   apply simp
  apply (simp only: insort'.simps split: if_split)
  apply safe
  apply (simp only: m_count_swap)
  apply (drule_tac x=aa in meta_spec)
  apply (simp only: m_count.simps(2))
  done

lemma m_count_sort':
  "m_count (sort' ls) = m_count ls"
  unfolding sort'_def
  apply (induct ls; simp)
  apply (simp only: m_count_insort')
  apply (simp only: m_count.simps(2))
  done


subsection "1.5 Union of multisets"

text \<open>
We now consider a union of two multisets, whose multiplicity can be
given by the following function:
\<close>
definition m_union :: "('a, nat) map \<Rightarrow> ('a, nat) map \<Rightarrow> ('a,nat) map" where
  "m_union m1 m2 \<equiv>
      \<lambda>x. case (m1 x, m2 x) of
            (Some a, Some b) \<Rightarrow> Some (max a b)
          | (Some a, None) \<Rightarrow> m1 x
          | (None, _) \<Rightarrow> m2 x"

text \<open>
The union @{text m_Un} of two lists as multisets can be defined as follows:
\<close>

primrec m_Un' :: "'a list \<Rightarrow> 'a list \<Rightarrow> 'a list \<Rightarrow> 'a list" where
  "m_Un' [] ys ac = ys @ ac"
| "m_Un' (x#xs) ys ac =
     (if x \<in> set ys then m_Un' xs (remove1 x ys) (x#ac)
      else m_Un' xs ys (x#ac))"

definition m_Un where "m_Un l1 l2 = m_Un' l1 l2 []"

text \<open>
1-(i) Prove the following lemma about @{term m_Un'} and @{term append}.
\<close>

lemma ac_append:
  "m_Un' l1 l2 (ac @ ls) = (m_Un' l1 l2 ac) @ ls"
  apply (induct l1 arbitrary: l2 ac; simp)
  apply safe
   apply (drule_tac x="remove1 a l2" and y="a#ac" in meta_spec2)
   apply simp
   apply (drule_tac x=l2 and y="a#ac" in meta_spec2)
  apply simp
  done

text \<open>
1-(j) Prove the following lemma about of @{term remove1} and @{term m_count}.
\<close>

lemma m_count_remove1:
  "m_count (remove1 a ls) =
    (case m_count ls a of
      None \<Rightarrow> m_count ls
    | Some n \<Rightarrow> if n = 0 then (m_count ls)(a := None)
                else (m_count ls)(a:= map_option (\<lambda>x. x - 1) (m_count ls a)))"
  by (induct ls; fastforce split: option.split)

text \<open>
1-(k) Prove the correctness of @{term m_Un} with respect to @{term m_union}.
\<close>

lemma m_union_empty[simp]:
  "m_union Map.empty m = m"
  apply (simp add: m_union_def)
  done

lemma ac1: "m_Un' l1 l2 [a] = (m_Un' l1 l2 []) @ [a]"
  by (simp add: ac_append[symmetric])

lemma m_count_union':
  "m_count (m_Un' l1 l2 []) = m_union (m_count l1) (m_count l2)"
  apply (induct l1 arbitrary: l2)
   apply simp
  apply (rule ext)
  apply (simp split del: if_split)
  apply (case_tac "a \<in> set l2")
  apply (simp add: ac1 m_add_append[symmetric])
  apply (simp add: m_count_remove1)
  apply (subgoal_tac "\<exists>n. m_count l2 a = Some n")
   prefer 2
   apply (rule ccontr; simp add: m_notin)
  apply (fastforce simp: m_add_def m_union_def split: option.split)
  apply (simp add: ac1 m_add_append[symmetric])
   apply (simp add: m_add_def m_notin[symmetric] split: option.split)
  apply (fastforce simp: m_add_def m_union_def split: option.split)
  done


lemma m_count_union:
  "m_count (m_Un l1 l2) = m_union (m_count l1) (m_count l2)"
  unfolding m_Un_def
  using m_count_union'
  by auto

\<comment> \<open> Alternative proof for m_count_union:

lemma filter_not_mem: "x \<notin> set xs \<Longrightarrow> filter ((=) x) xs = []"
  by(subst filter_False) auto

lemma length_filter_m_Un':
  shows "length (filter ((=) x) (m_Un' l1 l2 l3))
         = length (filter ((=) x) l3) + max (length (filter ((=) x) l1)) (length (filter ((=) x) l2))"
  by(induct l1 arbitrary: l2 l3) (auto simp add: filter_remove1 length_remove1 filter_not_mem)

lemma length_filter_m_Un: "length (filter ((=) x) (m_Un l1 l2))
       = max (length (filter ((=) x) l1)) (length (filter ((=) x) l2))"
  by(simp add: m_Un_def length_filter_m_Un')

lemma m_count_alt:
  "m_count xs x = (case length(filter ((=) x) xs) of 0 \<Rightarrow> None | Suc n \<Rightarrow> Some n)"
  by(induct xs) (auto split: option.split option.split_asm nat.split nat.split_asm)

lemma m_count_union:
  "m_count (m_Un l1 l2) = m_union (m_count l1) (m_count l2)"
  apply(rule ext)
  by(auto simp add: m_count_alt m_union_def length_filter_m_Un
               split: option.split option.split_asm nat.split_asm nat.split)
\<close>

(*---------------------------------------------------------*)


section "2. Compiler for arithmetic expressions"


(* Syntax of the language *)
type_synonym vname = string
type_synonym val = int

datatype aexp = N val | V vname | Plus aexp aexp

(* Evaluation function *)
type_synonym vstate = "vname \<Rightarrow> val"

primrec eval :: "aexp \<Rightarrow> vstate \<Rightarrow> val" where
  "eval (N n) s = n"
| "eval (V x) s = s x"
| "eval (Plus e1 e2) s = eval e1 s + eval e2 s"

(* Programs for the register machine *)
type_synonym reg = nat
datatype prog =
    LoadI reg val
  | Load reg vname
  | Add reg reg
  | Seq prog prog ("_ ;; _" 100)
  | Skip

(* Compiler from expression to register programs *)
primrec compile :: "aexp \<Rightarrow> reg \<Rightarrow> prog \<times> reg"
where
  "compile (N n) r = (LoadI r n, r + 1)" |
  "compile (V x) r = (Load r x, r + 1)" |
  "compile (Plus e1 e2) r1 =
    (let (p1, r2) = compile e1 r1;
         (p2, r3) = compile e2 r2
      in ((Seq p1 (Seq p2 (Add r1 r2))), r3))"


(* ---------- *)

text\<open>
2-(a): Give an example of an arithmetic expression which evaluates to 7 and uses @{term Plus}
    twice or more. Prove that your example actually evaluates to 7.
\<close>

lemma "eval (Plus (N 1) ((Plus (N 4) (N 2)))) s = 7"
  by simp


text\<open>
2-(b): Prove that the compiler always returns a register identifier strictly higher
  than the one given to it.
\<close>

lemma compile_reg_increase:
  "compile e r = (p, r') \<Longrightarrow> r' > r"
  by (induct e arbitrary:r r' p; fastforce split:prod.splits)

(* ---------- *)

subsection "2.1 Target machine big-step semantics and compiler correctness"

(* Big step operational semantics to programs for the register machine *)
type_synonym rstate = "reg \<Rightarrow> val"
type_synonym mstate = "rstate \<times> vstate \<times> prog"

inductive sem :: "mstate \<Rightarrow> rstate \<Rightarrow> bool"
  ("_ \<Down> _" [0,60] 61) where
  sem_LoadI: "(rs, \<sigma>, LoadI r n) \<Down> rs(r := n)"
| sem_Load: "(rs, \<sigma>, Load r v) \<Down> rs(r := \<sigma> v)"
| sem_Add: "(rs, \<sigma>, Add r1 r2) \<Down> rs(r1 := rs r1 + rs r2)"
| sem_Seq: "\<lbrakk>(rs, \<sigma>, p) \<Down> rs'; (rs', \<sigma>, q) \<Down> rs''\<rbrakk> \<Longrightarrow> (rs, \<sigma>, p ;; q) \<Down> rs''"

(* ---------- *)


text\<open>
2-(c): Prove that the register machine semantics is deterministic.
\<close>

lemma sem_det: "\<lbrakk>(rs, \<sigma>, e) \<Down> rs'; (rs, \<sigma>, e) \<Down> rs''\<rbrakk> \<Longrightarrow> rs' = rs''"
  apply (induct e arbitrary: rs \<sigma> rs' rs'')
      apply (fastforce elim: sem.cases)
     apply (fastforce elim: sem.cases)
    apply (fastforce elim: sem.cases)
   defer
   apply (fastforce elim: sem.cases)
  apply (erule sem.cases; simp)
  apply (rename_tac rs0 rs1 rs2)
  apply (erule sem.cases; simp)
  apply (rename_tac rs0' rs1' rs2')
  apply (drule_tac x=rs0' and y=\<sigma> in meta_spec2)
  apply (drule_tac x=rs1 and y=rs1' in meta_spec2)
  apply simp
  done


text\<open>
2-(d): Prove that the compiler produces programs that do not modify any registers
  of identifier lower than the register identifier given to it.
\<close>

lemma compile_no_modify_lower_reg:
  "compile e r = (p, r') \<Longrightarrow>
   (rs, \<sigma>, p) \<Down> rs' \<Longrightarrow>
   r'' < r \<Longrightarrow>
   rs' r'' = rs r''"
  apply (induct e arbitrary: r p r' rs rs'; simp)
    apply (erule sem.cases; simp)
   apply (erule sem.cases; simp)
  apply (case_tac "compile e1 r"; simp)
  apply (case_tac "compile e2 b"; simp)
  apply clarsimp
  apply (erule sem.cases; simp)
  apply (drule_tac x=r and y=a in meta_spec2)
  apply (drule_tac x=b and y=rsa in meta_spec2)
  apply (drule_tac x=rs'a in meta_spec)
  apply simp
  apply (rotate_tac -2)  
  apply (erule sem.cases; simp)
  apply (drule_tac x=b and y=aa in meta_spec2)
  apply (drule_tac x=r' and y=rsb in meta_spec2)
  apply (drule_tac x=rs'b in meta_spec)
  apply simp
  apply (rotate_tac -2)
  apply (erule sem.cases; simp)
  apply clarsimp
  apply (erule meta_mp)
  apply (drule compile_reg_increase)
  apply (drule compile_reg_increase)
  apply auto
  done


text\<open>
2-(e): Prove that the compiler produces programs that, when executed, yield the value
  of the expression in the register *provided* as the argument to @{term compile} in
  the final @{term rstate} according to the program's big-step semantics.
\<close>

lemma compile_correct:
  "compile e r = (p, r') \<Longrightarrow>
   (rs, \<sigma>, p) \<Down> rs' \<Longrightarrow>
   rs' r = eval e \<sigma>"
  apply (induct e arbitrary: r p r' rs rs')
    apply (erule sem.cases; simp)
   apply (erule sem.cases; simp)
  apply simp
  apply (case_tac "compile e1 r"; simp)
  apply (case_tac "compile e2 b"; simp)
  apply clarsimp
  apply (erule sem.cases; simp)
  apply (drule_tac x=r and y=a in meta_spec2)
  apply (drule_tac x=b and y=rsa in meta_spec2)
  apply (drule_tac x=rs'a in meta_spec)
  apply simp
  apply (rotate_tac -2)  
  apply (erule sem.cases; simp)
  apply (drule_tac x=b and y=aa in meta_spec2)
  apply (drule_tac x=r' and y=rsb in meta_spec2)
  apply (drule_tac x=rs'b in meta_spec)
  apply simp
  apply (rotate_tac -2)
  apply (erule sem.cases; simp)
  apply clarsimp
  apply (frule compile_reg_increase)
  apply (frule compile_no_modify_lower_reg[rotated], simp, simp)
  apply (frule_tac e=e2 in compile_no_modify_lower_reg; simp)
  done


(* ---------- *)


(*  Small-step semantics *)
inductive s_sem :: "mstate \<Rightarrow> mstate \<Rightarrow> bool" ("_ \<leadsto> _" 100)
  where
  "(rs, \<sigma>, LoadI r n) \<leadsto> (rs(r := n), \<sigma>, Skip)"
| "(rs, \<sigma>, Load r v) \<leadsto> (rs(r := \<sigma> v), \<sigma>, Skip)"
| "(rs, \<sigma>, Add r1 r2) \<leadsto> (rs(r1 := rs r1 + rs r2), \<sigma>, Skip)"
| "(rs, \<sigma>, p) \<leadsto> (rs', \<sigma>, p') \<Longrightarrow> (rs, \<sigma>, p ;; q) \<leadsto> (rs', \<sigma>, p' ;; q)"
| "(rs, \<sigma>, Skip ;; p) \<leadsto> (rs, \<sigma>, p)"

primrec term_with_n_Suc :: "nat \<Rightarrow> aexp" where
  "term_with_n_Suc 0 = N 0"
| "term_with_n_Suc (Suc n) = (Plus (term_with_n_Suc n) (N 0))"


(* ---------- *)


text\<open>
2-(f): Define a function s_sem_n:: nat \<Rightarrow> mstate \<Rightarrow> mstate \<Rightarrow> bool
that executes n steps of the small-step semantics.
\<close>

primrec s_sem_n :: "nat \<Rightarrow> mstate \<Rightarrow> mstate \<Rightarrow> bool" where
  "s_sem_n 0 ms ms' = (ms = ms')"
| "s_sem_n (Suc n) ms ms' = (\<exists>ms''. ms \<leadsto> ms'' \<and> s_sem_n n ms'' ms')"


text\<open>
2-(g): Prove that two executions of resp. n and m steps according to
   s_sem_n compose into a single execution of (n+m) steps if their
   resp. final and initial machine state match.
\<close>

lemma s_sem_n_add:
  "s_sem_n n ms ms' \<Longrightarrow> s_sem_n m ms' ms'' \<Longrightarrow> s_sem_n (n+m) ms ms''"
  apply(induct n arbitrary: ms)
   apply simp
  apply(subst add_Suc)
  apply clarsimp
  apply force
  done


text\<open>
2-(h): Prove that if p executes to p' in n steps according to s_sem_n, then
   p ;; q will execute to p' ;; q in n steps with all other parts of the
   state being the same as in the original execution.
\<close>

lemma s_sem_n_Seq:
  "s_sem_n n (rs, \<sigma>, p) (rs', \<sigma>, p') \<Longrightarrow>
     s_sem_n n (rs, \<sigma>, p;;q) (rs', \<sigma>, p';;q)"
  apply (induct n arbitrary: rs p rs'; simp)
  apply clarsimp
  apply (erule s_sem.cases; fastforce intro: s_sem.intros)
  done

text\<open>
2-(i): Prove that if a program executes in the big-step semantics to a 
   resulting rstate rs', then it executes in the small-step semantics
   to the same resulting rstate and the resulting program Skip with
   no changes to the vstate.
\<close>

lemma s_sem_n_correct:
  "ms \<Down> rs' \<Longrightarrow> \<exists>n. s_sem_n n ms (rs', fst (snd ms), Skip)"
  apply (induct ms rs' rule: sem.induct; simp)
     apply (rule_tac x=1 in exI)
     apply simp
     apply (fold fun_upd_def)
     apply (rule s_sem.intros)
    apply (rule_tac x=1 in exI)
    apply simp
    apply (rule s_sem.intros)
    apply (rule_tac x=1 in exI)
    apply simp
   apply (rule s_sem.intros)
  apply clarsimp
  apply (rename_tac n m)
  apply (rule_tac x="n+(m+1)" in exI)
  apply (rule s_sem_n_add)
   apply (erule s_sem_n_Seq)
  apply simp
  apply (fastforce intro: s_sem.intros)
  done


text\<open>
2-(j): Prove that compiling a "term_with_n_Suc h" will use a number of
   registers that is indeed strictly lower bounded by h.
\<close>

lemma compile_term_with_n_Suc_lower_bound_n:
   "h < snd (compile (term_with_n_Suc h) r) - r"
  apply (induct h)
   apply simp_all
  apply (case_tac "compile (term_with_n_Suc h) r")
  apply simp
  done


text\<open>
2-(k): Using this fact, prove that there is no universal bound on the number
   of registers used for any compiled program.
\<close>

lemma compile_has_no_universal_register_bound:
  "\<not> (\<exists>h. (\<forall>p. h \<ge> snd (compile p r) - r))"
  apply simp
  apply (rule allI)
  apply (rule_tac x="term_with_n_Suc h" in exI)
  apply (simp add: not_le)
  apply (rule compile_term_with_n_Suc_lower_bound_n)
  done


(*---------------------------------------------------------*)

end