[Most infrastructure is in place Manuel M T Chakravarty **20060423031406] { addfile ./LambdaFeed.hs addfile ./lambdaFeed.sh hunk ./LambdaFeed.hs 1 +-- |A simple feed generator +-- +-- Copyright (c) 2006 Manuel M T Chakravarty +-- +-- License: +-- +--- Description --------------------------------------------------------------- +-- +-- Language: Haskell 98 +-- +-- Feed source +-- ~~~~~~~~~~~ +-- The feed source is a directory containing channel descriptor, which are +-- files with suffix `.lfc'. Each channel descriptor specifies a channel +-- directory, which contains the feed for the channel. These items are +-- individual files in those sub-directories with the suffix `.lfi'. +-- +-- Timestamps +-- ~~~~~~~~~~ +-- Complete feed items include a timestamp. If they don't, the timestamp of +-- the file containing an item is appended to the item contents. +-- +-- Docs +-- ~~~~ +-- RSS 2.0 spec: +-- +--- Todo ---------------------------------------------------------------------- +-- +-- * Command line option to specify the name of the directory containing the +-- feed source. +-- +-- * Multiple channels. +-- +-- * It might be nice to generate atom feeds, too. +-- + +module Main (main) +where + +import Control.Monad ( + when, foldM, liftM) + +import Data.Char ( + isDigit, digitToInt) + +import System.Console.GetOpt ( + ArgOrder(..), OptDescr(..), ArgDescr(..), getOpt, usageInfo) +import System.Environment ( + getArgs, getProgName) +import System.Exit ( + ExitCode(..), exitWith, exitFailure) +import System.Locale ( + TimeLocale(..), defaultTimeLocale, rfc822DateFormat) +import System.Time ( + Month(..), ClockTime, CalendarTime(..), toUTCTime, toClockTime, + formatCalendarTime) + + +-- Version information +-- ------------------- + +name = "lambdaFeed" +versnum = "0.1.0" +versnick = "\"Lambdas for all!\"" +date = "22 Apr 2006" +version = name ++ ", version " ++ versnum ++ " " ++ versnick ++ ", " ++ date +copyright = "Copyright (c) 2006 Manuel M T Chakravarty" +disclaimer = "This software is distributed under the \ + \terms of the GNU Public Licence.\n\ + \NO WARRANTY WHATSOEVER IS PROVIDED. \ + \See ." + + +-- Configuration +-- ------------- + +-- Constants +-- +chanSuffix = "lfc" +itemSuffix = "lfi" + +-- Configuration +-- +data Config = Config { + feedDirCfg :: String, + verboseCfg :: Bool + } + +-- Default configuration +-- +dftConfig = Config { + feedDirCfg = "feed", + verboseCfg = False + } + +-- Available options +-- +data Option = FeedDirOpt String + | Help + | Verbose + | Version + deriving Eq + +-- Option description +-- +options :: [OptDescr Option] +options = [ Option ['f'] ["feed"] (ReqArg FeedDirOpt "DIR") + "feed directory with channel descriptions (default: feed/)" + , Option ['h', '?'] ["help"] (NoArg Help) + "this help message" + , Option ['v'] ["verbose"] (NoArg Verbose) + "print summary information (default: off)" + , Option ['V'] ["version"] (NoArg Version) + "version information" + ] + +-- Option processing +-- +processOptions :: [String] -> IO Config +processOptions args = + case getOpt RequireOrder options args of + (opts, [] , [] ) -> processVersion opts >>= + processHelp >>= + processConfig dftConfig + (opts, args, [] ) -> abortWithOptionsErrors [unrecErr ++ unwords args] + (_ , _ , errs) -> abortWithOptionsErrors errs + where + unrecErr = "Unrecognised arguments: " + +-- Print version information if `Help' or `Version' requested and remove any +-- `Version' options. +-- +processVersion :: [Option] -> IO [Option] +processVersion opts = + do + when (Version `elem` opts || Help `elem` opts) $ do + putStrLn version + putStrLn copyright + putStrLn disclaimer + return $ filter (/= Version) opts + +-- Print help information if `Help' requested and remove any `Help' options. +-- +processHelp :: [Option] -> IO [Option] +processHelp opts = + do + name <- getProgName + let header = "\nUsage: " ++ name ++ " [ option... ]\n" + when (Help `elem` opts) $ + putStrLn $ usageInfo header options + return $ filter (/= Help) opts + +-- Process configuration options. +-- +processConfig :: Config -> [Option] -> IO Config +processConfig = foldM processOneOption + where + processOneOption config (FeedDirOpt dir) = + return $ config {feedDirCfg = dir} + +-- Error during parsing command line options +-- +abortWithOptionsErrors :: [String] -> IO a +abortWithOptionsErrors errs = + do + putStrLn (unlines errs) + putStrLn "Try the option `--help' on its own for more information." + exitFailure + + +-- Date information +-- ---------------- + +-- We convert all dates into an internal representation for storage and +-- comparison. All I/O uses the RFC822 format as required by the RSS 2.0 +-- specification. +-- +newtype Date = Date ClockTime + +instance Eq Date where + Date d1 == Date d2 = d1 == d2 + +instance Ord Date where + compare (Date d1) (Date d2) = compare d1 d2 + +instance Show Date where + show (Date d) = + formatCalendarTime defaultTimeLocale rfc822DateFormat (toUTCTime d) + +-- Parse an RFC822 date string (but allowing four digits for years and UTC, in +-- addition to UT) +-- +parseDate :: String -> Maybe Date +parseDate = liftM Date . parseDay + where + parseDay (_:_:_:',':' ':noWDay) = parseDay noWDay -- drop weekday + parseDay (d1 :' ':monthEtc) -- 1 digit day + | isDigit d1 = parseMonth (convDay '0' d1 ) monthEtc + parseDay (d1:d2:' ':monthEtc) -- 2 digit day + | isDigit d1 && isDigit d2 = parseMonth (convDay d1 d2) monthEtc + parseDay _ = Nothing + -- + parseMonth day (m1:m2:m3:' ':yearEtc) = + parseYear day (parseMonth' (m1:m2:m3:[])) yearEtc + -- + parseMonth' "Jan" = Just January + parseMonth' "Feb" = Just February + parseMonth' "Mar" = Just March + parseMonth' "Apr" = Just April + parseMonth' "May" = Just May + parseMonth' "Jun" = Just June + parseMonth' "Jul" = Just July + parseMonth' "Aug" = Just August + parseMonth' "Sep" = Just September + parseMonth' "Oct" = Just October + parseMonth' "Nov" = Just November + parseMonth' "Dec" = Just December + parseMonth' _ = Nothing + -- + parseYear day mons (y1:y2 :' ':timeStr) -- 2 digit year + | isDigit y1 && isDigit y2 = + convert day mons (convYear Nothing y1 y2) (parseTime timeStr) + parseYear day mons (y1:y2:y3:y4:' ':timeStr) -- 4 digit year + | isDigit y1 && isDigit y2 && isDigit y3 && isDigit y4 = + convert day mons (convYear (Just (y1, y2)) y3 y4) (parseTime timeStr) + parseYear _ _ _ = Nothing + -- + parseTime (h1:h2:':':m1:m2:':':s1:s2:' ':zoneStr) -- w/ seconds + | isDigit h1 && isDigit h2 && isDigit m1 && isDigit m2 && + isDigit s1 && isDigit s2 = + do + hour <- convHour h1 h2 + mins <- convMins m1 m2 + secs <- convSecs s1 s2 + zone <- parseZone zoneStr + return $ (hour, mins, secs, zone) + parseTime (h1:h2:':':m1:m2 :' ':zoneStr) -- w/o seconds + | isDigit h1 && isDigit h2 && isDigit m1 && isDigit m2 = + do + hour <- convHour h1 h2 + mins <- convMins m1 m2 + let secs = 0 + zone <- parseZone zoneStr + return (hour, mins, secs, zone) + parseTime _ = Nothing + -- + convYear Nothing y1 y2 | 70 <= year = Just $ 1900 + year + | otherwise = Just $ 2000 + year + where year = digitToInt y1 * 10 + + digitToInt y2 + convYear (Just (y1, y2)) y3 y4 = Just $ digitToInt y1 * 1000 + + digitToInt y2 * 100 + + digitToInt y3 * 10 + + digitToInt y4 + convDay d1 d2 | 1 <= day && day <= 31 = Just day + | otherwise = Nothing + where day = digitToInt d1 * 10 + digitToInt d2 + convHour h1 h2 | 0 <= hour && hour <= 23 = Just hour + | otherwise = Nothing + where hour = digitToInt h1 * 10 + digitToInt h2 + convMins m1 m2 | 0 <= mins && mins <= 59 = Just mins + | otherwise = Nothing + where mins = digitToInt m1 * 10 + digitToInt m2 + convSecs s1 s2 | 0 <= secs && secs <= 59 = Just secs + | otherwise = Nothing + where secs = digitToInt s1 * 10 + digitToInt s2 + -- + parseZone :: String -> Maybe Int + parseZone "UT" = Just 0 + parseZone "UTC" = Just 0 + parseZone "GMT" = Just 0 + parseZone "EST" = Just (hourToSecs (-5)) + parseZone "EDT" = Just (hourToSecs (-4)) + parseZone "CST" = Just (hourToSecs (-6)) + parseZone "CDT" = Just (hourToSecs (-5)) + parseZone "MST" = Just (hourToSecs (-7)) + parseZone "MDT" = Just (hourToSecs (-6)) + parseZone "PST" = Just (hourToSecs (-8)) + parseZone "PDT" = Just (hourToSecs (-7)) + parseZone "Z" = Just 0 + parseZone "A" = Just (hourToSecs (-1)) + parseZone "B" = Just (hourToSecs (-2)) + parseZone "C" = Just (hourToSecs (-3)) + parseZone "D" = Just (hourToSecs (-4)) + parseZone "E" = Just (hourToSecs (-5)) + parseZone "F" = Just (hourToSecs (-6)) + parseZone "G" = Just (hourToSecs (-7)) + parseZone "H" = Just (hourToSecs (-8)) + parseZone "I" = Just (hourToSecs (-9)) + parseZone "K" = Just (hourToSecs (-10)) + parseZone "L" = Just (hourToSecs (-11)) + parseZone "M" = Just (hourToSecs (-12)) + parseZone "N" = Just (hourToSecs 1) + parseZone "O" = Just (hourToSecs 2) + parseZone "P" = Just (hourToSecs 3) + parseZone "Q" = Just (hourToSecs 4) + parseZone "R" = Just (hourToSecs 5) + parseZone "S" = Just (hourToSecs 6) + parseZone "T" = Just (hourToSecs 7) + parseZone "U" = Just (hourToSecs 8) + parseZone "V" = Just (hourToSecs 9) + parseZone "W" = Just (hourToSecs 10) + parseZone "X" = Just (hourToSecs 11) + parseZone "Y" = Just (hourToSecs 12) + parseZone ('+':hhmm) = parseDiff 1 hhmm + parseZone ('-':hhmm) = parseDiff (-1) hhmm + parseZone _ = Nothing + -- + parseDiff :: Int -> String -> Maybe Int + parseDiff sign (h1:h2:m1:m2:[]) = + do + hour <- convHour h1 h2 + mins <- convMins m1 m2 + return $ sign * (hour * 60 + mins) * 60 + -- + hourToSecs hour = hour * 60 * 60 + -- + convert day mons year time = + do + day' <- day + mons' <- mons + year' <- year + (hour, mins, secs, zone) <- time + return $ toClockTime $ CalendarTime { + ctYear = year', + ctMonth = mons', + ctDay = day' , + ctHour = hour , + ctMin = mins , + ctSec = secs , + ctPicosec = 0 , + ctTZ = zone + } + + +-- Feed data structure (modeled after RSS 2.0.1 (rv 6)) +-- ------------------- + +-- URL data type +-- +type URL = String + +-- A feed consists of a set of channels. +-- +data Feed = Feed [Chan] + +-- Channel description +-- +data Chan = Chan { + titleChan :: String, + linkChan :: URL, + descriptionChan :: String, + languageChan :: Maybe String, + copyrightChan :: Maybe String, + managingEditorChan :: Maybe String, + webMasterChan :: Maybe String, + pubDateChan :: Maybe Date, + lastBuildDateChan :: Maybe Date, + categoryChan :: [Category], -- may be empty + generatorChan :: Maybe String, + docsChan :: Maybe URL, + cloudChan :: (), + ttlChan :: Maybe Int, -- in minutes + imageChan :: Maybe Image, + ratingChan :: (), + textInputChan :: (), + skipHoursChan :: (), + skipDaysChan :: (), + itemsChan :: [Item] + } + +-- Inline image +-- +data Image = Image { + urlImage :: URL, + titleImage :: String, + linkImage :: URL, + widthImage :: Maybe Int, + heightImage :: Maybe Int, + descriptionImage :: Maybe String + } + +-- Generic categorisation information +-- +data Category = Category { + domainCategory :: String, + categoryCategory :: String + } + +-- Feed item +-- +-- * Either `titleItem' or `descriptionItem' must be present. +-- +data Item = Item { + titleItem :: Maybe String, + linkItem :: Maybe URL, + descriptionItem :: Maybe String, + authorItem :: Maybe String, -- email of author + categoryItem :: [Category], -- may be empty + commentsItem :: Maybe URL, + enclosureItem :: Maybe Enclosure, + guidItem :: Maybe GUID, + pubDateItem :: Maybe Date, + sourceItem :: Maybe Source + } + +-- Inline media content +-- +data Enclosure = Enclosure { + urlEnclosure :: URL, + lengthEnclosure :: Int, -- in bytes + typeEnclosure :: String + } + +-- Global identification +-- +data GUID = GUID { + isPermaLink :: Bool, + guidGUID :: String + } + +-- Item source +-- +data Source = Source { + urlSource :: URL, + sourceSource :: String + } + + +-- Feed processing +-- --------------- + +-- Summary information +-- +data Summary = Summary + +-- Read all feed data into our internal feed structure. +-- +readFeed :: Config -> IO Feed +readFeed config = + do + return $ undefined + +-- Write feed data to RSS files. +-- +writeFeed :: Config -> Feed -> IO Summary +writeFeed config feed = + do + return $ undefined + +-- Print summary of feed data. +-- +printSummary :: Config -> Summary -> IO () +printSummary config summary = + do + return () + + +-- Main +-- ---- + +main :: IO () +main = + do + args <- getArgs + config <- processOptions args + readFeed config >>= writeFeed config >>= printSummary config + exitWith ExitSuccess hunk ./lambdaFeed.sh 1 +#!/bin/sh +exec ghc \ + -ignore-dot-ghci \ + -e "System.Environment.withArgs (words \"$*\") Main.main" \ + LambdaFeed.hs +# This would be nicer, but it sets the wrong progName (which I believe is a +# mis-feature of runhaskell aka runghc) +#exec runhaskell LambdaFeed.hs $* }