-- -- The Computer Language Shootout -- http://shootout.alioth.debian.org/ -- -- Contributed by Don Stewart -- Uses a port of the simple hashtable from the Clean entry -- -- !!!!!! kill kill import GHC.Exts import GHC.IOBase import Foreign import Char import List import Maybe import Text.Printf import Data.ByteString.Base import qualified Data.ByteString.Char8 as S import Data.Array.Base import qualified Data.Array.IO as A main = do (PS fp o l) <- get (S.pack ">TH") withForeignPtr fp $ \p -> do let sec = p `plusPtr` o mapM_ (writeFreqs l sec) [1,2] mapM_ (writeFrame l sec) =<< mapM toseq strs strs = ["GGT","GGTA","GGTATT","GGTATTTTAATT","GGTATTTTAATTTATAGT"] get p = do s <- S.getContents let Just n = S.findSubstring p s return $! S.map toUpper -- array fusion! . S.filter ((/=) '\n') . S.dropWhile ((/=) '\n') . S.copy . S.drop n $ s writeFreqs size p n = do h <- htNew n size htInsert size p n h let vs = htNodes h mapM_ draw (sortBy kf vs) putChar '\n' where draw (Node p f) = printf "%s %.3f\n" (ppr n p) pct where pct = (100 * (fromIntegral f) / total) :: Double total = fromIntegral (size - n + 1) kf (Node k x) (Node j y) = case compare y x of EQ -> compare (ppr n k) (ppr n j); x -> x writeFrame size p (n,k) = do h <- htNew n size htInsert size p n h Node k v <- htFind k h putStrLn $ (show v) ++ ('\t' : ppr n k) ppr n p = inlinePerformIO (map w2c `fmap` peekArray n p) toseq s = fmap ((,) (length s)) (newArray0 0 (map c2w s)) ------------------------------------------------------------------------ -- -- An implementation of simpl_hash.c in Haskell -- data Hash = HT !Int !Int !(A.IOArray Int Buckets) data Buckets = Empty | Bucket !(Ptr Word8) !Int | Buckets [Node] data Node = Node !(Ptr Word8) !Int htNew :: Int -> Int -> IO Hash htNew fl sz | fl `seq` sz `seq` False = undefined htNew fl sz = HT fl nprime `fmap` A.newArray (0,nprime-1) Empty where n = htSize fl sz nprime = head (dropWhile (< n) primes) htSize :: Int -> Int -> Int htSize fl buflen | fl `seq` buflen `seq` False = undefined htSize fl buflen = min lim (go (fl-1) 4) where lim = (buflen - fl) `div` 1024 go n m | n `seq` m `seq` False = undefined | n > 0 && m < lim = go (n-1) (m*4) | otherwise = m htInsert :: Int -> Ptr Word8 -> Int -> Hash -> IO () htInsert s p n h | s `seq` p `seq` n `seq` h `seq` False = undefined htInsert s p n h = mapM_ (htInc h . plusPtr p) [0..s-n] htInc :: Hash -> Ptr Word8 -> IO () htInc ht@(HT n size arr) k = case htHash size n k of i -> i `seq` do b <- unsafeRead arr i unsafeWrite arr i $! inc b where equal = eq n inc :: Buckets -> Buckets inc (Bucket k' v) | k' `seq` v `seq` False = undefined | k' `equal` k = Bucket k' (v+1) | otherwise = Buckets $ Node k' v : [Node k 1] inc (Buckets b) = Buckets $ incL b inc Empty = Bucket k 1 incL :: [Node] -> [Node] incL (i@(Node k' v):ls) | i `seq` False = undefined | k' `equal` k = Node k' (v+1) : ls | otherwise = i : incL ls incL [] = [Node k 1] htNodes :: Hash -> [Node] htNodes ht@(HT n size arr) = items 0 where read i = inlinePerformIO $! unsafeRead arr i items i |i `seq` False = undefined | i >= size = [] | otherwise = items_bucket (read i) (i+1) items_bucket b i | i `seq` False = undefined items_bucket (Bucket k' v) i = k' `seq` v `seq` Node k' v : items i items_bucket (Buckets b) i = b `seq` items_list b i items_bucket Empty i = items i items_list _ i | i `seq` False = undefined items_list (e:l) i = e `seq` e : items_list l i items_list [] i = items i htFind :: Ptr Word8 -> Hash -> IO Node htFind k h@(HT n size arr) = k `seq` h `seq` do let i = htHash size n k v <- unsafeRead arr $! i return $! find v where equal = eq n find (Bucket k' v) | k' `equal` k = Node k' v | otherwise = Node k 0 find (Buckets l) = find' l find Empty = Node k 0 find' (i@(Node k' v):ls) | k' `equal` k = i | otherwise = find' ls find' [] = Node k 0 -- not portable! htHash :: Int -> Int -> Ptr Word8 -> Int htHash (I# max) (I# size) ptr@(Ptr p) = abs . inlinePerformIO . IO $ go p 0# where lim = p `plusAddr#` size go p acc s | p `geAddr#` lim = (# s, I# (acc `remInt#` max) #) | otherwise = case readInt8OffAddr# p 0# s of (# s, i #) -> go (p `plusAddr#` 1#) (5# *# acc +# i) s -- A fast Ptr comparison for Hash keys eq n p q = n `seq` inlinePerformIO $ do a <- peek p :: IO Word8 b <- peek q :: IO Word8 if a /= b then return False else go n p q where go n p q | n `seq` p `seq` q `seq` False = undefined | n == 0 = return True | otherwise = do a <- peek p :: IO Word8 b <- peek q :: IO Word8 if a /= b then return False else go (n-1) (p `plusPtr` 1) (q `plusPtr` 1) primes = [ 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, 196613, 93241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, 805306457 ]