~alcinnz/rhapsode

ref: afbc3c0556a0b1d45834091a6c9a3ddfc3ebe4f8 rhapsode/src/Main.hs -rw-r--r-- 2.3 KiB
afbc3c05 — Adrian Cochrane Fix for parsing HTTP responses, test for speech media. 5 years ago
                                                                                
aea2a653 Adrian Cochrane
7523fb56 Adrian Cochrane
18bd686d Adrian Cochrane
aea2a653 Adrian Cochrane
18bd686d Adrian Cochrane
aea2a653 Adrian Cochrane
18bd686d Adrian Cochrane
aea2a653 Adrian Cochrane
18bd686d Adrian Cochrane
7523fb56 Adrian Cochrane
18bd686d Adrian Cochrane
aea2a653 Adrian Cochrane
afbc3c05 Adrian Cochrane
aea2a653 Adrian Cochrane
afbc3c05 Adrian Cochrane
aea2a653 Adrian Cochrane
afbc3c05 Adrian Cochrane
aea2a653 Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
{-# LANGUAGE OverloadedStrings #-}
module Main where

import System.Environment

import qualified Network.HTTP.Client as HTTP
import           Network.HTTP.Client.Internal
import qualified Network.HTTP.Client.TLS as TLS
import           Network.URI
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import Data.Text (unpack)
import qualified Data.Map as M
import qualified Data.ByteString.Lazy.Char8 as C8

import DefaultCSS

main :: IO ()
main = do
    url:_ <- getArgs
    -- TODO support more URI schemes, and do nonblocking networking. This could be it's own module.
    request <- HTTP.parseRequest url
    manager <- HTTP.newManager TLS.tlsManagerSettings
    response <- HTTP.httpLbs request manager
    let html = XML.documentRoot $ HTML.parseLBS $ HTTP.responseBody response
    style <- retreiveStyles html manager request
    putStrLn $ unlines style

retreiveStyles html manager base = do
    css <- externalStyles html manager base
    return $ userAgentCSS : css ++ internalStyles html

externalStyles html manager base = go $ linkedStyles html
    where -- TODO parallelise loads
        go (link:links) = do
            request <- setUriRelative base link
            response <- HTTP.httpLbs request manager
            rest <- go links
            return (C8.unpack (HTTP.responseBody response) : rest)
        go [] = return []

linkedStyles (XML.Element "link" attrs _)
    | Just link <- "href" `M.lookup` attrs,
        Just "stylesheet" <- "rel" `M.lookup` attrs,
        testMedia attrs,
        Just uri <- parseURIReference $ unpack link = [uri]
linkedStyles (XML.Element _ _ children) =
    concat [linkedStyles el | XML.NodeElement el <- children]

internalStyles (XML.Element "style" attrs children)
    | testMedia attrs = [strContent children]
internalStyles (XML.Element _ _ children) =
    concat [internalStyles el | XML.NodeElement el <- children]

testMedia attrs = media == Nothing || media == Just "speech"
    where media = "media" `M.lookup` attrs


strContent (XML.NodeContent text : rest) = unpack text ++ strContent rest
-- We do want to read in comments for CSS, just not for display.
strContent (XML.NodeComment text : rest) = unpack text ++ strContent rest
strContent (XML.NodeElement (XML.Element _ _ children):rest) =
    strContent children ++ strContent rest
strContent (_:rest) = strContent rest
strContent [] = ""