theory a3 imports "autocorres-1.4/autocorres/AutoCorres" begin (* To run this file you need the AutoCorres tool used in the lecture. 1. Download AutoCorres from \url{http://www.cse.unsw.edu.au/~cs4161/autocorres-1.4.tar.gz} 2. Unpack the .tar.gz file, which will create the directory autocorres-1.4 tar -xzf autocorres-1.4.tar.gz 3. Build the AutoCorres heap L4V_ARCH=X64 isabelle build -v -b -d autocorres-1.4 AutoCorres 4. Load this file using the AutoCorres heap L4V_ARCH=X64 isabelle jedit -d autocorres-1.4 -l AutoCorres a3.thy *) section "Question 1: Regular Expression Matching" (* Negation is too hard with this one, so we add Null and One instead *) datatype regexp = Null | One | Atom char | Alt regexp regexp | Conc regexp regexp (infixl "\" 60) | Star regexp (* Same definitions for regular languages as in the lecture, but with Null and One *) inductive_set star :: "string set \ string set" for L where star_empty[simp,intro!]: "[] \ star L" | star_app[elim]: "\ u \ L; v \ star L \ \ u@v \ star L" definition conc :: "string set \ string set \ string set" where "conc A B \ {xs@ys |xs ys. xs \ A \ ys \ B}" primrec lang :: "regexp \ string set" where "lang Null = {}" | "lang One = {[]}" | "lang (Atom c) = {[c]}" | "lang (Alt e1 e2) = lang e1 \ lang e2" | "lang (Conc e1 e2) = conc (lang e1) (lang e2)" | "lang (Star e) = star (lang e)" (* For examples/testing. *) primrec string :: "char list \ regexp" where "string [] = One" | "string (x#xs) = Atom x \ string xs" (* Automatically coerce type "char list" into "regexp" using the function "string" *) declare [[coercion string]] (* Enable automatic type coercion *) declare [[coercion_enabled]] (* Can now write: *) term "Star ''abc''" (* This definition is taken from https://www.schoolofhaskell.com/school/to-infinity-and-beyond/pick-of-the-week/a-regular-expression-matcher *) function matches :: "regexp \ string \ (string \ bool) \ bool" where "matches Null cs k = False" | "matches One cs k = k cs" | "matches (Atom c) cs k = (cs \ [] \ c = hd cs \ k (tl cs))" | "matches (Alt r1 r2) cs k = (matches r1 cs k \ matches r2 cs k)" | "matches (Conc r1 r2) cs k = matches r1 cs (\cs'. matches r2 cs' k)" | "matches (Star r) cs k = (k cs \ matches r cs (\cs'. if cs' \ cs then matches (Star r) cs' k else False))" by pat_completeness auto (* Either change the cs' = cs condition to something more obviously terminating so that property "matches_correct" below still holds (easier) or prove termination of the version above directly (harder). *) termination matches sorry (* TODO *) value "matches ''xy'' ''xy'' (op = [])" value "matches ''xyz'' ''xy'' (op = [])" value "matches (Star ''xy'') ''xyxy'' (op = [])" lemma concD[dest!]: "xs \ conc A B \ \as bs. as \ A \ bs \ B \ xs = as@bs" by (auto simp: conc_def) lemma star_cases: "xs \ star A \ xs = [] \ (\u v. xs = u@v \ u \ A \ v \ star A \ u \ [])" oops (* TODO *) lemma matches_correct: "matches r cs (op = []) = (cs \ lang r)" oops (* TODO *) (*-------------------------------------------------*) section "Question 2: Binary Search" (* Hints: - remember to try the @{text arith} proof method for arithmetic goals on integers or natural numbers. - use find_theorems to find Isabelle library theorems about existing concepts. - the lemma @{thm sorted_equals_nth_mono} might be useful - you are allowed to use sledgehammer and other automation - if you can't prove one of the lemmas below, you can still assume it in the rest of the proof - the function @{const int} converts an Isabelle nat to an int - the function @{const nat} converts an Isabelle int to a nat *) thm sorted_equals_nth_mono term int term nat install_C_file "binsearch.c" autocorres [unsigned_word_abs=binary_search] "binsearch.c" (*******************************************************************************) context binsearch begin (* The monadic definition that autocorres produces for the C code: *) thm binary_search'_def (* Abbreviation for signed machine integers *) type_synonym s_int = "32 signed word" (* The heap only stores unsigned machine integers; they have the same representation as signed ones and can be converted to each other *) type_synonym u_int = "32 word" (*******************************************************************************) (* A few lemmas to help improve automation: *) (* Pointer arithmetic on pointers to signed and unsigned words is the same *) lemma ptr_coerce_add_signed_unsigned [simp]: "(ptr_coerce ((a :: s_int ptr) +\<^sub>p x) :: u_int ptr) = ptr_coerce a +\<^sub>p x" by (cases a) (simp add: ptr_add_def) (* Pointer arithmetic distributivity law *) lemma ptr_add_add [simp]: "p +\<^sub>p (x + y) = p +\<^sub>p x +\<^sub>p y" by (simp add: ptr_add_def distrib_left mult.commute) (* C division is the same as Isabelle division for non-negative numbers: *) lemma sdiv [simp]: "0 \ a \ a sdiv 2 = a div (2::int)" by (auto simp: sdiv_int_def sgn_if) (* Some useful facts about INT_MIN and INT_MAX to improve automation: *) lemma INT_MIN_neg [simp]: "INT_MIN < 0" by (simp add: INT_MIN_def) lemma INT_MAX_gr [simp]: "- 1 < INT_MAX" "-1 \ INT_MAX" "1 \ INT_MAX" by (auto simp add: INT_MAX_def) (*******************************************************************************) (* This function enumerates the addresses of the entries of an signed int array: *) fun array_addrs :: "s_int ptr \ nat \ s_int ptr list" where "array_addrs p 0 = []" | "array_addrs p (Suc len) = p # array_addrs (p +\<^sub>p 1) len" text \ Prove the following lemma: \ lemma length_array_addrs [simp]: "length (array_addrs a len) = len" oops (* TODO *) text \ Prove the following lemma: \ lemma array_addrs_nth [simp]: "\ 0 \ x; nat x < len \ \ array_addrs a len ! nat x = a +\<^sub>p x" oops (* TODO *) (*******************************************************************************) text \ fill in the array_list definition \ definition array_list :: "(u_int ptr \ u_int) \ s_int ptr \ int \ int list" where "array_list h p len = TODO" (*******************************************************************************) text \ Prove the following lemma: \ lemma ptr_array: "\ 0 \ x; x < len \ \ uint (heap_w32 s (ptr_coerce (a :: s_int ptr) +\<^sub>p x)) = array_list (heap_w32 s) a len ! nat x" oops (*TODO*) (*******************************************************************************) text \ fill in the valid_array definition. It might make sense to do this in two parts and look at the invariant and proof obligations first. \ definition valid_array :: "(u_int ptr \ bool) \ (u_int ptr \ u_int) \ s_int ptr \ int \ bool" where "valid_array vld h p len = TODO" (* OR you can also define: definition valid_array1 :: "(u_int ptr \ bool) \ s_int ptr \ int \ bool" where "valid_array1 vld p len = TODO" definition valid_array2 :: "(u_int ptr \ bool) \ (u_int ptr \ u_int) \ s_int ptr \ int \ bool" where "valid_array2 vld h p len = TODO" definition valid_array :: "(u_int ptr \ bool) \ (u_int ptr \ u_int) \ s_int ptr \ int \ bool" where "valid_array vld h p len = (valid_array1 vld p len \ valid_array2 vld h p len)" *) (*******************************************************************************) text \ Prove the following lemma: \ lemma key_lt: "\ key < xs ! nat mid; mid - 1 < x; sorted xs; 0 \ mid; x < int (length xs) \ \ key < xs ! nat x" oops (*TODO*) text \ Prove the following lemma: \ lemma key_gt: "\ xs ! nat mid < key; 0 \ x; x \ mid; sorted xs; mid < int (length xs) \ \ xs ! nat x < key" oops (*TODO*) (*******************************************************************************) lemma binary_search_correct: notes ptr_array [where len=len, simp] shows "\\s. sorted (array_list (heap_w32 s) a len) \ valid_array (is_valid_w32 s) (heap_w32 s) a len \ TODO \ binary_search' a len key \ \r s. (r < 0 \ key \ set (array_list (heap_w32 s) a len)) \ (0 \ r \ r < len \ (array_list (heap_w32 s) a len ! nat r) = key)\!" unfolding binary_search'_def apply(subst whileLoopE_add_inv[where I="\(high,low) s. valid_array (is_valid_w32 s) (heap_w32 s) a len \ sorted (array_list (heap_w32 s) a len) \ TODO1" and M="\((high,low),_). TODO2"]) apply wp oops end end