[Complete channel descriptor parsing Manuel M T Chakravarty **20060425193055] { hunk ./LambdaFeed.hs 56 --- `location' : required element giving the filename of a directory with +-- `items' : required element giving the filename of a directory with hunk ./LambdaFeed.hs 65 +-- * Validate the format of some fields; eg, URLs should really look like +-- URLs. +-- hunk ./LambdaFeed.hs 81 - isDigit, digitToInt, isSpace) - + isDigit, digitToInt, isSpace, toLower) hunk ./LambdaFeed.hs 84 +import Data.Maybe ( + fromJust) hunk ./LambdaFeed.hs 95 +import System.IO ( + stderr) hunk ./LambdaFeed.hs 110 -date = "23 Apr 2006" +date = "25 Apr 2006" hunk ./LambdaFeed.hs 130 + debugCfg :: Bool, hunk ./LambdaFeed.hs 132 - verboseCfg :: Bool + verboseCfg :: Bool, + quietCfg :: Bool hunk ./LambdaFeed.hs 139 + debugCfg = False, hunk ./LambdaFeed.hs 141 - verboseCfg = False + verboseCfg = False, + quietCfg = False hunk ./LambdaFeed.hs 147 -data Option = FeedDirOpt FilePath +data Option = Debug + | FeedDirOpt FilePath hunk ./LambdaFeed.hs 152 + | Quiet hunk ./LambdaFeed.hs 158 -options = [ Option ['f'] ["feed"] (ReqArg FeedDirOpt "DIR") +options = [ Option ['d'] ["debug"] (NoArg Debug) + "produces extra diagnostic output" + , Option ['f'] ["feed"] (ReqArg FeedDirOpt "DIR") hunk ./LambdaFeed.hs 168 + , Option ['q'] ["quiet"] (NoArg Quiet) + "suppress all output, including parser warnings" hunk ./LambdaFeed.hs 214 + processOneOption config Debug = + return $ config {debugCfg = True} hunk ./LambdaFeed.hs 220 + processOneOption config Quiet = + return $ config {quietCfg = True} hunk ./LambdaFeed.hs 252 --- Parse an RFC822 date string (but allowing four digits for years and UTC, in --- addition to UT) +-- Parse an RFC822 date string (but allowing four digits for years and also +-- UTC, in addition to UT) hunk ./LambdaFeed.hs 409 -data Feed = Feed [Channel] +data Feed items = Feed [Channel items] + deriving Show hunk ./LambdaFeed.hs 414 -data Channel = Channel { - titleChan :: String, - linkChan :: URL, - descriptionChan :: String, - languageChan :: Maybe String, - copyrightChan :: Maybe String, - managingEditorChan :: Maybe String, - webMasterChan :: Maybe String, - pubDateChan :: Maybe Date, - lastBuildDateChan :: Maybe Date, - categoryChan :: [Category], -- may be empty - generatorChan :: Maybe String, - docsChan :: Maybe URL, - cloudChan :: (), - ttlChan :: Maybe Int, -- in minutes - imageChan :: Maybe Image, - ratingChan :: (), - textInputChan :: (), - skipHoursChan :: (), - skipDaysChan :: (), - itemsChan :: [Item] - } +data Channel items = + Channel { + titleChan :: String, + linkChan :: URL, + descriptionChan :: String, + languageChan :: Maybe String, + copyrightChan :: Maybe String, + managingEditorChan :: Maybe String, + webMasterChan :: Maybe String, + pubDateChan :: Maybe Date, + lastBuildDateChan :: Maybe Date, + categoryChan :: [Category], -- may be empty + generatorChan :: Maybe String, + docsChan :: Maybe URL, + cloudChan :: (), + ttlChan :: Maybe Int, -- in minutes + imageChan :: Maybe Image, + ratingChan :: (), + textInputChan :: (), + skipHoursChan :: (), + skipDaysChan :: (), + itemsChan :: items + } + deriving Show hunk ./LambdaFeed.hs 449 + deriving Show hunk ./LambdaFeed.hs 454 - domainCategory :: String, - categoryCategory :: String + categoryCategory :: String, + domainCategory :: Maybe String hunk ./LambdaFeed.hs 457 + deriving Show hunk ./LambdaFeed.hs 475 + deriving Show hunk ./LambdaFeed.hs 484 + deriving Show hunk ./LambdaFeed.hs 492 + deriving Show hunk ./LambdaFeed.hs 500 + deriving Show + +-- Copies all components of a channel structure except `itemsChan'. +-- +copyChannelInfo :: Channel a -> Channel b +copyChannelInfo chan = + Channel { + titleChan = titleChan chan, + linkChan = linkChan chan, + descriptionChan = descriptionChan chan, + languageChan = languageChan chan, + copyrightChan = copyrightChan chan, + managingEditorChan = managingEditorChan chan, + webMasterChan = webMasterChan chan, + pubDateChan = pubDateChan chan, + lastBuildDateChan = lastBuildDateChan chan, + categoryChan = categoryChan chan, + generatorChan = generatorChan chan, + docsChan = docsChan chan, + cloudChan = cloudChan chan, + ttlChan = ttlChan chan, + imageChan = imageChan chan, + ratingChan = ratingChan chan, + textInputChan = textInputChan chan, + skipHoursChan = skipHoursChan chan, + skipDaysChan = skipDaysChan chan, + itemsChan = error "channel missing items (after copy)" + } + +-- Channel value with default components. +-- +defaultChannel :: Channel items +defaultChannel = Channel { + titleChan = error "channel missing title", + linkChan = error "channel missing link", + descriptionChan = error "channel missing description", + languageChan = Nothing, + copyrightChan = Nothing, + managingEditorChan = Nothing, + webMasterChan = Nothing, + pubDateChan = Nothing, + lastBuildDateChan = Nothing, + categoryChan = [], + generatorChan = Nothing, + docsChan = Nothing, + cloudChan = (), + ttlChan = Nothing, + imageChan = Nothing, + ratingChan = (), + textInputChan = (), + skipHoursChan = (), + skipDaysChan = (), + itemsChan = error "channel missing items" + } + +-- Image value with default components. +-- +defaultImage :: Image +defaultImage = Image { + urlImage = error "image missing url", + titleImage = error "image missing title", + linkImage = error "image missing link", + widthImage = Nothing, + heightImage = Nothing, + descriptionImage = Nothing + } + +-- Category value with default components.. +-- +defaultCategory :: Category +defaultCategory = Category { + categoryCategory = error "category missing category", + domainCategory = Nothing + } + +-- Items value with default components. +-- +defaultItem :: Item +defaultItem = Item { + titleItem = Nothing, + linkItem = Nothing, + descriptionItem = Nothing, + authorItem = Nothing, + categoryItem = [], + commentsItem = Nothing, + enclosureItem = Nothing, + guidItem = Nothing, + pubDateItem = Nothing, + sourceItem = Nothing + } hunk ./LambdaFeed.hs 601 -readFeed :: Config -> IO Feed +readFeed :: Config -> IO (Feed [Item]) hunk ./LambdaFeed.hs 605 - say config $ "Getting channels from feed directory `" ++ feedDir ++ "'" + sayV config $ "Getting channels from feed directory `" ++ feedDir ++ "'" hunk ./LambdaFeed.hs 611 - say config $ "Reading " ++ show (length chanFNames) ++ - " channel(s)" + sayV config $ "Reading " ++ show (length chanFNames) ++ " channel(s)" + -- + chanDescs <- mapM (readChanDesc config) chanFNames + -- + sayD config $ show chanDescs hunk ./LambdaFeed.hs 617 - chanDescs <- mapM readChanDesc chanFNames - liftM Feed $ mapM readChan chanDescs + liftM Feed $ mapM (readChan config) chanDescs hunk ./LambdaFeed.hs 622 -readChanDesc :: FilePath -> IO (Channel, FilePath) -readChanDesc fname = +readChanDesc :: Config -> FilePath -> IO (Channel FilePath) +readChanDesc config fname = hunk ./LambdaFeed.hs 626 - print $ show text hunk ./LambdaFeed.hs 627 - let (channel, fname) = parseChanDesc text - return $ (channel {lastBuildDateChan = Just $ Date currTime}, fname) + let (chan, errs) = parseChanDesc fname text + mapM (say config) errs + return $ chan {lastBuildDateChan = Just $ Date currTime} hunk ./LambdaFeed.hs 631 --- Parse a channel description, returning our internal channel representation --- (without any items) and the name of the directory containing the channels --- items. +-- Read the items from the items directrory specified by the channel into the +-- channel data structure. hunk ./LambdaFeed.hs 634 -parseChanDesc :: [Stanza] -> (Channel, FilePath) -parseChanDesc = undefined +readChan :: Config -> Channel FilePath -> IO (Channel [Item]) +readChan config chan = + do + let itemsDir = itemsChan chan + sayV config $ "Getting items for " ++ titleChan chan ++ " from `" ++ + itemsDir ++ "'" + -- + fnames <- getDirectoryContents itemsDir + let itemFNames = [ itemsDir fname + | fname <- fnames, suffix fname == itemSuffix] + -- + sayV config $ "Reading " ++ show (length itemFNames) ++ " items(s)" + -- + items <- mapM (readItem config) itemFNames + return $ (copyChannelInfo chan) {itemsChan = items} hunk ./LambdaFeed.hs 650 -readChan :: (Channel, FilePath) -> IO Channel -readChan = undefined +-- Read an item from the file of the given name. +-- +readItem :: Config -> FilePath -> IO Item +readItem = undefined hunk ./LambdaFeed.hs 657 -writeFeed :: Config -> Feed -> IO Summary +writeFeed :: Config -> Feed [Item] -> IO Summary hunk ./LambdaFeed.hs 670 --- Filename operations --- ------------------- +-- Stanza parsing +-- -------------- hunk ./LambdaFeed.hs 673 --- Split the suffix of a filename. +-- Parse a channel description, returning our internal channel representation +-- (without any items) and the name of the directory containing the channels +-- items. hunk ./LambdaFeed.hs 677 -suffix :: FilePath -> String -suffix = reverse . takeWhile (/= '.') . reverse +-- * The first argument is to identify the source in error messages. +-- +-- * Missing tags in a channel description lead to a fatal error (i.e., no +-- channel description is returned, only some errors). Otherwise, errors +-- are not fatal (parsing will still have produced a valid channel +-- descriptor, although may be not exactly what the user intended). +-- +parseChanDesc :: String -> [Stanza] -> (Channel FilePath, [String]) +parseChanDesc sname stanzas = + case parseStanzas chanDescProc sname stanzas defaultChannel of + (Nothing , errs) -> abortWith errs + (Just chan, errs) -> (chan, errs) + where + chanDescProc = + -- required elements + [ ("title", Proc True $ Simple $ + cantFail $ \v c -> c {titleChan = v} + ) + , ("link", Proc True $ Simple $ + cantFail $ \v c -> c {linkChan = v} + ) + , ("description", Proc True $ Simple $ + cantFail $ \v c -> c {descriptionChan = v} + ) + , ("items", Proc True $ Simple $ + cantFail $ \v c -> c {itemsChan = v} + ) + -- optional elements + , ("language", Proc False $ Simple $ + cantFail $ \v c -> c {languageChan = Just v} + ) + , ("copyright", Proc False $ Simple $ + cantFail $ \v c -> c {copyrightChan = Just v} + ) + , ("managingeditor", Proc False $ Simple $ + cantFail $ \v c -> c {managingEditorChan = Just v} + ) + , ("webmaster", Proc False $ Simple $ + cantFail $ \v c -> c {webMasterChan = Just v} + ) + , ("pubdate", Proc False $ Simple $ + mayFail parseDate "bad date format (see RSS 2.0 spec)" $ + \v c -> c {pubDateChan = Just v} + ) + , ("lastbuilddate", Proc False $ Simple $ + mayFail parseDate "bad date format (see RSS 2.0 spec)" $ + \v c -> c {lastBuildDateChan = Just v} + ) + , ("category", Proc False $ Compound $ + cantFailStanza parseCategory $ + \v c -> c {categoryChan = categoryChan c ++ [v]} + ) + , ("generator", Proc False $ Simple $ + cantFail $ \v c -> c {generatorChan = Just v} + ) + , ("docs", Proc False $ Simple $ + cantFail $ \v c -> c {docsChan = Just v} + ) + , ("cloud", Proc False $ Simple $ + cantFail $ \v c -> c {cloudChan = ()} -- ignore + ) + , ("ttl", Proc False $ Simple $ + mayFail parseNat "positive integer expected" $ + \v c -> c {ttlChan = Just v} + ) + , ("image", Proc False $ Compound $ + mayFailStanza parseImage $ \v c -> c {imageChan = v} + ) + , ("rating", Proc False $ Simple $ + cantFail $ \v c -> c {ratingChan = ()} -- ignore + ) + , ("textinput", Proc False $ Simple $ + cantFail $ \v c -> c {textInputChan = ()} -- ignore + ) + , ("skiphours", Proc False $ Simple $ + cantFail $ \v c -> c {skipDaysChan = ()} -- ignore + ) + , ("skipdays", Proc False $ Simple $ + cantFail $ \v c -> c {skipDaysChan = ()} -- ignore + ) + ] + -- + mayFail = mayFailS sname + mayFailStanza = mayFailStanzaS sname hunk ./LambdaFeed.hs 762 --- Join a directory and file path. +-- Parse an image element. hunk ./LambdaFeed.hs 764 -() :: FilePath -> FilePath -> FilePath -dir fname | null dir = fname - | last dir == '/' = dir ++ fname - | otherwise = dir ++ "/" ++ fname +parseImage :: String -> Stanza -> (Maybe Image, [String]) +parseImage sname stanza@((_, (start, end), _):_) = + parseStanzas imageProc extendedSName [stanza] defaultImage + where + imageProc = + -- required elements + [ ("image", Proc True $ Simple $ + mayFail parseNull "image tag can have no value" $ + \v i -> i -- image tag itself has no data + ) + , ("url", Proc True $ Simple $ + cantFail $ \v i -> i {urlImage = v} + ) + , ("title", Proc True $ Simple $ + cantFail $ \v i -> i {titleImage = v} + ) + , ("link", Proc True $ Simple $ + cantFail $ \v i -> i {linkImage = v} + ) + -- optional elements + , ("width", Proc False $ Simple $ + mayFail parseNat "positive integer expected" $ + \v c -> c {widthImage = Just v} + ) + , ("height", Proc False $ Simple $ + mayFail parseNat "positive integer expected" $ + \v c -> c {heightImage = Just v} + ) + , ("description", Proc False $ Simple $ + cantFail $ \v i -> i {descriptionImage = Just v} + ) + ] + -- + mayFail = mayFailS sname + -- + extendedSName = sname ++ ":" ++ show start ++ "-" ++ show end ++ + ": image" + +-- Parse a category element (can't fail) +-- +parseCategory :: Stanza -> Category +parseCategory stanza = + (fromJust . fst) (parseStanzas categoryProc "" [stanza] defaultCategory) + where + categoryProc = + -- required elements + [ ("category", Proc True $ Simple $ + cantFail $ \v i -> i {categoryCategory = v} + ) + -- optional elements + , ("domain", Proc False $ Simple $ + cantFail $ \v i -> i {domainCategory = Just v} + ) + ] + +-- Parse an integer value. +-- +parseNat :: String -> Maybe Int +parseNat str | all isDigit str = Just $ read str + | otherwise = Nothing + +-- Parser that simply asserts that it is being passed an empty string. +-- +parseNull :: String -> Maybe () +parseNull s | null s = Just () + | otherwise = Nothing + +-- Simple action without any special parsing. +-- +cantFail :: (String -> r -> r) -- record updater + -> (StanzaAssoc -> r -> (r, [String])) +cantFail upd = \(_, _, v) c -> (upd v c, []) + +-- Compound action without any special parsing. +-- +cantFailStanza :: (Stanza -> a) -- stanza parser + -> (a -> r -> r) -- record updater + -> (Stanza -> r -> (r, [String])) +cantFailStanza parse upd = \stanza rec -> (upd (parse stanza) rec, []) + +-- Action with parsing that may fail. +-- +mayFailS :: String -- source name + -> (String -> Maybe a) -- value parser + -> String -- error message + -> (a -> r -> r) -- record updater + -> (StanzaAssoc -> r -> (r, [String])) +mayFailS sname parse err upd = \(_, range, v) rec -> + case parse v of + Just x -> (upd x rec, []) + Nothing -> (rec , [elementErrStr sname range err]) + +-- Action with parsing that may fail. +-- +mayFailStanzaS :: String -- source name + -> (String -> Stanza -> (a, [String])) -- stanza parser + -> (a -> r -> r) -- record updater + -> (Stanza -> r -> (r, [String])) +mayFailStanzaS sname parse upd = \stanza rec -> + let (v, errs) = parse sname stanza + in + (upd v rec, errs) hunk ./LambdaFeed.hs 960 --- some structure `a'. +-- some structure `a' (and may also return a list of error messages). +-- +-- * We ignore case when comparing tag names. hunk ./LambdaFeed.hs 979 -data Action a = Simple (StanzaAssoc -> (a -> a)) - | Compound (Stanza -> (a -> a)) +data Action a = Simple (StanzaAssoc -> (a -> (a, [String]))) + | Compound (Stanza -> (a -> (a, [String]))) hunk ./LambdaFeed.hs 984 --- * The string indicates the source of the stanza list and is used in error --- messages. +-- * The second argument indicates the source of the stanza list and is used +-- in error messages. +-- +-- * If required tags are missing, we inject an the error message into the +-- errors produced by the transformer. As this error is fatal in some +-- context, we indicate the error also by producing a transformer that +-- returns constant `Nothing' in its first component. +-- +parseStanzas :: StanzaProcessor a -- stanza processor for parsing + -> String -- name of stanza source + -> [Stanza] -- stanzas to parse + -> (a -> (Maybe a, [String])) +-- +-- The following is a two step process: hunk ./LambdaFeed.hs 999 -parseStanzas :: String -> StanzaProcessor a -> [Stanza] -> (a -> a) -parseStanzas sname stanzaProc stanzas = - case processStanzas required stanzas of - (transformer, [] ) -> transformer - (_ , req) -> - error $ sname ++ ": required tags missing: " ++ show req +-- (1) The stanza processor computes from the input stanza a *function* that +-- when applied to a value of type `a' (usually a record) adds the +-- information from the stanza to that value. +-- (2) We apply the function from Step (1) to a value of type `a' (usually a +-- record with default values in all components) to get the final result +-- of parsing. +-- +parseStanzas stanzaProc sname stanzas = \rec -> + let + (transformer, req) = processStanzas required stanzas + (results, errs) = transformer rec + in + if null req + then -- a required tags were found + (Just results, errs) + else -- some required tags were missing => no result + (Nothing, [sname ++ ": required tags missing: " ++ show req, + sname ++ ": invalidates the whole element" ] ++ errs) hunk ./LambdaFeed.hs 1020 - processStanzas reqd [] = (id, reqd) + --processStanzas :: [String] -> [Stanza] -> (a -> (a, [String]), [String]) + processStanzas reqd [] = (idNil, reqd) hunk ./LambdaFeed.hs 1025 - case lookup tag stanzaProc of + case lookup (map toLower tag) stanzaProc of hunk ./LambdaFeed.hs 1027 - Just (Proc _ (Compound act)) -> (act stanza, reqd \\ [tag]) - Nothing -> unknownTag range tag + Just (Proc _ (Compound act)) -> (act stanza, reqd \| tag) + Nothing -> (addError (unknownTag range tag), + reqd) hunk ./LambdaFeed.hs 1032 - (trans2 . trans1, reqd2) + (trans2 .++ trans1, reqd2) hunk ./LambdaFeed.hs 1034 - processAssocs reqd [] = (id, reqd) + --processAssocs :: [String] -> Stanza -> (a -> (a, [String]), [String]) + processAssocs reqd [] = (idNil, reqd) hunk ./LambdaFeed.hs 1039 - case lookup tag stanzaProc of - Just (Proc _ (Simple act)) -> (act assoc , reqd \\ [tag]) - Just (Proc _ (Compound act)) -> (act [assoc], reqd \\ [tag]) - Nothing -> unknownTag range tag + case lookup (map toLower tag) stanzaProc of + Just (Proc _ (Simple act)) -> (act assoc , reqd \| tag) + Just (Proc _ (Compound act)) -> (act [assoc], reqd \| tag) + Nothing -> (addError (unknownTag range tag), + reqd) hunk ./LambdaFeed.hs 1046 - (trans2 . trans1, reqd2) + (trans2 .++ trans1, reqd2) + -- + idNil = \a -> (a, []) + f1 .++ f2 = \a -> + let (a1, e1) = f1 a; (a2, e2) = f2 a1 in (a2, e1 ++ e2) + -- + l \| tag = l \\ [map toLower tag] hunk ./LambdaFeed.hs 1054 - unknownTag range tag = elementErr sname range $ - "unknown tag `" ++ tag ++ "'" + addError err = \a -> (a, [err]) + -- + unknownTag range tag = elementErrStr sname range $ + "unknown tag `" ++ tag ++ "' (ignoring)" hunk ./LambdaFeed.hs 1061 -elementErr :: String -> (Int, Int) -> String -> a -elementErr sname (start, end) err = - error $ sname ++ ":" ++ show start ++ "-" ++ show end ++": " ++ err +elementErrStr :: String -> (Int, Int) -> String -> String +elementErrStr sname (start, end) err = + sname ++ ":" ++ show start ++ "-" ++ show end ++": " ++ err + + +-- Filename operations +-- ------------------- + +-- Split the suffix of a filename. +-- +suffix :: FilePath -> String +suffix = reverse . takeWhile (/= '.') . reverse + +-- Join a directory and file path. +-- +() :: FilePath -> FilePath -> FilePath +dir fname | null dir = fname + | last dir == '/' = dir ++ fname + | otherwise = dir ++ "/" ++ fname hunk ./LambdaFeed.hs 1085 --- Information messages (if in verbose mode) +-- Warnings if not quiet. hunk ./LambdaFeed.hs 1088 -say config str | verboseCfg config = putStrLn str - | otherwise = return () +say config str | not (quietCfg config) = putStrLn str + | otherwise = return () + +-- Information messages if in verbose mode. +-- +sayV :: Config -> String -> IO () +sayV config str | verboseCfg config = putStrLn str + | otherwise = return () + +-- Diagnostic messages if in debug mode. +-- +sayD :: Config -> String -> IO () +sayD config str | debugCfg config = putStrLn str + | otherwise = return () + + +-- Error handling +-- -------------- + +-- Terminate with a set of errors (in pure code). +-- +abortWith :: [String] -> a +abortWith = error . ("FATAL ERROR" ++) . concat . map ('\n':) }