~alcinnz/rhapsode

ref: 2d6a43f4853f32149d5e751dc7af7cbc6c2e7122 rhapsode/src/Input.hs -rw-r--r-- 4.0 KiB
2d6a43f4 — Adrian Cochrane Fix CSS charset detection. 4 years ago
                                                                                
c900dfa4 Adrian Cochrane
e41dd031 Adrian Cochrane
c900dfa4 Adrian Cochrane
e41dd031 Adrian Cochrane
c900dfa4 Adrian Cochrane
e41dd031 Adrian Cochrane
c900dfa4 Adrian Cochrane
e41dd031 Adrian Cochrane
c900dfa4 Adrian Cochrane
36f30081 Adrian Cochrane
c900dfa4 Adrian Cochrane
e41dd031 Adrian Cochrane
c900dfa4 Adrian Cochrane
36f30081 Adrian Cochrane
c900dfa4 Adrian Cochrane
36f30081 Adrian Cochrane
c900dfa4 Adrian Cochrane
36f30081 Adrian Cochrane
c900dfa4 Adrian Cochrane
36f30081 Adrian Cochrane
c900dfa4 Adrian Cochrane
36f30081 Adrian Cochrane
c900dfa4 Adrian Cochrane
36f30081 Adrian Cochrane
c900dfa4 Adrian Cochrane
e41dd031 Adrian Cochrane
c900dfa4 Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
{-# LANGUAGE OverloadedStrings #-}
module Input(parseArgs, ProgramCtl(..)) where

import           Data.Text.Lazy (fromStrict)
import qualified Data.Text as Txt
import           Data.Text.Encoding
import qualified Data.ByteString.Lazy as B
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import           Network.URI
import           Network.URI.Fetch
import qualified Data.Map as M

import Control.Concurrent.Async
import System.IO
import System.Environment
import System.Directory
import Data.List
import Data.Default.Class

--- Commandline arguments
data ProgramCtl = ProgramCtl {
    docs :: [(URI, XML.Document)],
    outSSML :: Maybe Handle,
    outLinks :: Maybe Handle
}
parseArgs :: Session -> IO ProgramCtl
parseArgs http = do
    args <- getArgs
    let (inputs, outputs) = partition (isPrefixOf "-" . fst) $ preparseArgs args
    cwd <- getCurrentDirectory
    let base = URI {uriScheme = "file:", uriPath = cwd,
        uriAuthority = Nothing, uriQuery = "", uriFragment = ""}
    let inputs' = [(f, relativeTo uri base) | (f, Just uri) <- inputs]
    docs <- forConcurrently inputs' $ evalInput http

    outSSML <- parseSSMLout outputs
    outLinks <- parseLinksOut outputs
    return $ ProgramCtl {
        docs = [(uri, doc) | ((_, uri), doc) <- zip inputs' docs],
        outSSML = outSSML,
        outLinks = outLinks
    }

preparseArgs (flag@('-':_):val:args) | Just url <- parseURIReference val =
    (flag, Just url) : preparseArgs args
preparseArgs (flag@('+':_):args@(('+':_):_)) = (flag, Nothing) : preparseArgs args
preparseArgs (flag@('+':_):args@(('-':_):_)) = (flag, Nothing) : preparseArgs args
preparseArgs [flag@('+':_)] = [(flag, Nothing)]
preparseArgs (flag@('+':_):val:args) = (flag, parseURIReference val) : preparseArgs args
preparseArgs (val:args) | Just url <- parseURIReference val =
    ("-h", Just url) : preparseArgs args
preparseArgs [] = []
preparseArgs (arg:_) = error ("Unsupported argument " ++ arg)

parseSSMLout (("+s", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
parseSSMLout (("+ssml", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
parseSSMLout (("+s", Nothing):_) = return $ Just stdout
parseSSMLout (("+ssml", Nothing):_) = return $ Just stdout
parseSSMLout (_:args) = parseSSMLout args
parseSSMLout [] = return Nothing

parseLinksOut (("+l", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
parseLinksOut (("+links", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
parseLinksOut (("+l", Nothing):_) = return $ Just stdout
parseLinksOut (("+links", Nothing):_) = return $ Just stdout
parseLinksOut (_:args) = parseLinksOut args
parseLinksOut [] = return Nothing


evalInput http ("-h", url) = fetchDocument http ["text/html", "text/xml", "text/plain"] url
evalInput http ("-html", url) = fetchDocument http ["text/html", "text/xml", "text/plain"] url
evalInput http ("-x", url) = fetchDocument http ["text/xml", "text/html", "text/plain"] url
evalInput http ("-xml", url) = fetchDocument http ["text/xml", "text/html", "text/plain"] url
evalInput _ (flag, _) = error ("Unsupported input flag " ++ flag)

fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument
parseDocument ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text
parseDocument ("text/html", Right bytes) = return $ HTML.parseLBS bytes
parseDocument ("text/plain", Left text) = return $ docForText text
parseDocument ("text/plain", Right bytes) = return $ docForText $ decodeUtf8 $ B.toStrict bytes
parseDocument (_, Left text) | Right doc <- XML.parseText def $ fromStrict text = return doc
parseDocument (_, Right bytes) | Right doc <- XML.parseLBS def bytes = return doc
parseDocument (mime, _) = return $ docForText $ Txt.concat ["Unsupported MIMEtype ", Txt.pack mime]

docForText txt = XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "pre",
            XML.elementAttributes = M.empty,
            XML.elementNodes = [XML.NodeContent txt]
        },
        XML.documentEpilogue = []
    }