[Reading of stanza files Manuel M T Chakravarty **20060423192318] { hunk ./LambdaFeed.hs 13 --- The feed source is a directory containing channel descriptor, which are +-- The feed source is a directory containing channel descriptors, which are hunk ./LambdaFeed.hs 23 +-- Text files in Stanza format +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- This is the format used for channel descriptors and channel items. A +-- stanza file is comprised of a list of stanzas which a separated by one or +-- more empty lines (ie, lines containing nothing but white space). Each +-- such stanza is an association list of tags and values. Tags start in the +-- leftmost column and are terminated by a colon or the end of line. There +-- can be no white space within a tag, but there can be trailing white space, +-- which is discarded. The value associated with a tag is the string +-- following the tag-terminating colon. Such a value string extends to the +-- start of the next tag or end of the current stanza, whichever comes first. +-- Hence, value strings can span multiple lines, but any line after the +-- initial one, ie the one containing the tag, must have white space in the +-- leftmost column. If a tag is terminated by the end of line, instead of a +-- colon, its associated value string is empty. +-- +-- A line starting with `--' is a comment line, and hence, ignored, +-- +-- Channel descriptors and channel items +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- The tags used in channel descriptors and channel items are the same as +-- those in the RSS 2.0 specification, but we permit arbitrary use of upper +-- and lowercase letters and have the following exceptions: +-- +-- `lastBuildDate': ignored, as it is added by lambdaFeed +-- `generator' : ignored, as it is added by lambdaFeed +-- hunk ./LambdaFeed.hs 56 --- * Command line option to specify the name of the directory containing the --- feed source. --- --- * Multiple channels. +-- * Read channels in RSS format (probably with HaXml). They'd just be .xml +-- files in the feed directory. hunk ./LambdaFeed.hs 69 - isDigit, digitToInt) + isDigit, digitToInt, isSpace) hunk ./LambdaFeed.hs 73 +import System.Directory ( + getDirectoryContents) hunk ./LambdaFeed.hs 82 - Month(..), ClockTime, CalendarTime(..), toUTCTime, toClockTime, + Month(..), ClockTime, CalendarTime(..), getClockTime, toUTCTime, toClockTime, hunk ./LambdaFeed.hs 92 -date = "22 Apr 2006" +date = "23 Apr 2006" hunk ./LambdaFeed.hs 112 - feedDirCfg :: String, + feedDirCfg :: FilePath, hunk ./LambdaFeed.hs 125 -data Option = FeedDirOpt String +data Option = FeedDirOpt FilePath hunk ./LambdaFeed.hs 169 --- Print help information if `Help' requested and remove any `Help' options. +-- Print help information if `Help' requested and terminate successfully. hunk ./LambdaFeed.hs 176 - when (Help `elem` opts) $ + when (Help `elem` opts) $ do hunk ./LambdaFeed.hs 178 - return $ filter (/= Help) opts + exitWith ExitSuccess + return opts hunk ./LambdaFeed.hs 188 + processOneOption config Verbose = + return $ config {verboseCfg = True} hunk ./LambdaFeed.hs 223 +-- * Cf +-- hunk ./LambdaFeed.hs 368 --- Feed data structure (modeled after RSS 2.0.1 (rv 6)) +-- Feed data structure (following after RSS 2.0.1 (rv 6)) hunk ./LambdaFeed.hs 377 -data Feed = Feed [Chan] +data Feed = Feed [Channel] hunk ./LambdaFeed.hs 381 -data Chan = Chan { - titleChan :: String, - linkChan :: URL, - descriptionChan :: String, - languageChan :: Maybe String, - copyrightChan :: Maybe String, - managingEditorChan :: Maybe String, - webMasterChan :: Maybe String, - pubDateChan :: Maybe Date, - lastBuildDateChan :: Maybe Date, - categoryChan :: [Category], -- may be empty - generatorChan :: Maybe String, - docsChan :: Maybe URL, - cloudChan :: (), - ttlChan :: Maybe Int, -- in minutes - imageChan :: Maybe Image, - ratingChan :: (), - textInputChan :: (), - skipHoursChan :: (), - skipDaysChan :: (), - itemsChan :: [Item] - } +data Channel = Channel { + titleChan :: String, + linkChan :: URL, + descriptionChan :: String, + languageChan :: Maybe String, + copyrightChan :: Maybe String, + managingEditorChan :: Maybe String, + webMasterChan :: Maybe String, + pubDateChan :: Maybe Date, + lastBuildDateChan :: Maybe Date, + categoryChan :: [Category], -- may be empty + generatorChan :: Maybe String, + docsChan :: Maybe URL, + cloudChan :: (), + ttlChan :: Maybe Int, -- in minutes + imageChan :: Maybe Image, + ratingChan :: (), + textInputChan :: (), + skipHoursChan :: (), + skipDaysChan :: (), + itemsChan :: [Item] + } hunk ./LambdaFeed.hs 474 - return $ undefined + let feedDir = feedDirCfg config + say config $ "Getting channels from feed directory `" ++ feedDir ++ "'" + -- + fnames <- getDirectoryContents feedDir + let chanFNames = [ feedDir fname + | fname <- fnames, suffix fname == chanSuffix] + -- + say config $ "Reading " ++ show (length chanFNames) ++ + " channel(s)" + -- + chanDescs <- mapM readChanDesc chanFNames + liftM Feed $ mapM readChan chanDescs + +-- Read a channel description from the file of the given name and modify its +-- build date to be the current time. +-- +readChanDesc :: FilePath -> IO (Channel, FilePath) +readChanDesc fname = + do + text <- readStanzas fname + print $ show text + currTime <- getClockTime + let (channel, fname) = parseChanDesc text + return $ (channel {lastBuildDateChan = Just $ Date currTime}, fname) + +-- Parse a channel description, returning our internal channel representation +-- (without any items) and the name of the directory containing the channels +-- items. +-- +parseChanDesc :: [Stanza] -> (Channel, FilePath) +parseChanDesc = undefined + +readChan :: (Channel, FilePath) -> IO Channel +readChan = undefined hunk ./LambdaFeed.hs 524 +-- Filename operations +-- ------------------- + +-- Split the suffix of a filename. +-- +suffix :: FilePath -> String +suffix = reverse . takeWhile (/= '.') . reverse + +-- Join a directory and file path. +-- +() :: FilePath -> FilePath -> FilePath +dir fname | null dir = fname + | last dir == '/' = dir ++ fname + | otherwise = dir ++ "/" ++ fname + + +-- File operations +-- --------------- + +-- A stanza is a list of associations, where each association comprises +-- +-- (1) a tag string (without the terminating colon of the textual +-- representation), +-- (2) a range of source file lines (containing the association in the textual +-- representation), and +-- (3) a value string. +-- +type Stanza = [(String, (Int, Int), String)] + +-- Read a text file in stanza format (including line numbers for better error +-- messages). +-- +-- * See top of file for details of the format. +-- +readStanzas :: FilePath -> IO [Stanza] +readStanzas fname = + do + contents <- readFile fname + let numbered = zip [1..] (filter notComment . lines $ contents) + stanzas = splitIntoStanzas numbered + return $ map splitIntoAssocs stanzas + where + splitIntoStanzas [] = [] + splitIntoStanzas lines = + let + lines0 = dropWhile isEmptyLine lines + (stanza, lines') = break isEmptyLine lines0 + stanzas = splitIntoStanzas lines' + in + stanza:stanzas + -- + splitIntoAssocs [] = [] + splitIntoAssocs ((start, line):lines) = + let + (values, lines') = break isTagLine lines + assocs = splitIntoAssocs lines' + -- + (tag, value1) = break (== ':') line + allValues = stripColonNClean value1 : map (clean . snd) values + -- + end | null values = start + | otherwise = (fst . last) values + in + case extractTag tag of + Just rawTag -> + (rawTag, (start, end), unwords' allValues):assocs + Nothing -> malformedTagErr fname start tag + -- + extractTag "" = Nothing + extractTag tag | isSpace (head tag) = Nothing + | any isSpace cleanedTag = Nothing + | otherwise = Just cleanedTag + where cleanedTag = cleanTrailing tag + -- + notComment ('-':'-':_) = False + notComment _ = True + -- + stripColonNClean (':':str) = clean str -- strip leading colon & clean w/s + stripColonNClean str = clean str + -- + isEmptyLine = all isSpace . snd + isTagLine = not . isSpace . (!!0) . snd + -- + unwords' = dropWhile isSpace . unwords + +-- Complain about a malformed tag and abort +-- +malformedTagErr :: FilePath -> Int -> String -> a +malformedTagErr fname line tag = + error $ fname ++ ":" ++ show line ++ ": `" ++ cleanTrailing tag ++ + "' is not a valid tag" + +-- Lose leading and trailing whitespace +-- +clean :: String -> String +clean = reverse . dropWhile isSpace . reverse . dropWhile isSpace + +-- Lose trailing whitespace +-- +cleanTrailing :: String -> String +cleanTrailing = reverse . dropWhile isSpace . reverse + + +-- Logging +-- ------- + +-- Information messages (if in verbose mode) +-- +say :: Config -> String -> IO () +say config str | verboseCfg config = putStrLn str + | otherwise = return () + + hunk ./LambdaFeed.hs 645 - readFeed config >>= writeFeed config >>= printSummary config + (readFeed config >>= writeFeed config >>= printSummary config) + `catch` \e -> abort (show e) hunk ./LambdaFeed.hs 648 + where + abort err = + do + name <- getProgName + putStrLn $ name ++ ": " ++ err hunk ./lambdaFeed.sh 3 - -ignore-dot-ghci \ + -ignore-dot-ghci -w \ }