{-# LANGUAGE OverloadedStrings #-} module Input(fetchDocument, docForText) where import Data.Text.Lazy (fromStrict) import qualified Data.Text as Txt import Data.Text.Encoding import qualified Data.ByteString.Lazy as B import qualified Text.HTML.DOM as HTML import qualified Text.XML as XML import Network.URI import Network.URI.Fetch import qualified Data.Map as M import System.IO import System.IO.Temp import Data.Default.Class -- For C API import Types import Data.HTML2CSS (html2css) import Data.Maybe (fromMaybe) import Foreign.StablePtr import Foreign.C.String -- FIXME segfaults when exposed to C. --fetchURLs session refererDoc refererURL srcs = forConcurrently srcs $ \(mime, url) -> do -- let u = relativeTo url refererURL -- let charsets' = map Txt.unpack charsets -- resp <- fetchURL session mime u -- (a, b) <- case resp of -- ("text/css", bytes) -> let -- cssParser = CSS.parseForURL (conditionalStyles u "document") u -- in return (refererDoc, cssParser $ applyCSScharset charsets' bytes) -- _ -> parseDocument session resp >>= \doc -> return (doc, H2C.html2css doc refererURL) -- return (u, a, b) --applyCSScharset (charset:charsets) (Right bytes) -- | cssCharset (CSSTok.tokenize text) == Txt.pack charset = text -- | otherwise = applyCSScharset charsets $ Right bytes -- where -- text = convertCharset charset $ B.toStrict bytes --applyCSScharset _ (Right bytes) = convertCharset "utf-8" $ B.toStrict bytes --applyCSScharset _ (Left text) = text --cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks, -- (CSSTok.String charset:_) <- skipCSSspace toks' = charset -- | otherwise = "" --skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks --skipCSSspace toks = toks fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument http parseDocument _ ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text parseDocument _ ("text/html", Right bytes) = return $ HTML.parseLBS bytes parseDocument _ ("text/plain", Left text) = return $ docForText text parseDocument _ ("text/plain", Right bytes) = return $ docForText $ decodeUtf8 $ B.toStrict bytes parseDocument _ ("application/xhtml+xml", Left text) | Right doc <- XML.parseText def $ fromStrict text = return doc | otherwise = return $ docForText "Unreadable webpage!" parseDocument _ ("application/xhtml+xml", Right bytes) | Right doc <- XML.parseLBS def bytes = return doc | otherwise = return $ docForText "Unreadable webpage!" parseDocument session (mime, download) = do localURI <- withSystemTempFile "rhapsode-download" $ writeDownloadToFile download result <- dispatchByMIME session mime localURI case result of Just text -> return $ docForText $ Txt.pack text Nothing -> parseDocument session ("application/xhtml+xml", download) writeDownloadToFile (Left text) file handle = do hPutStr handle $ Txt.unpack text return $ URI "file:" Nothing file "" "" writeDownloadToFile (Right bytes) file handle = do B.hPut handle bytes return $ URI "file:" Nothing file "" "" docForText txt = XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "pre", XML.elementAttributes = M.empty, XML.elementNodes = [XML.NodeContent txt] }, XML.documentEpilogue = [] } -- C API foreign export ccall c_newSession :: IO (StablePtr Session) foreign export ccall c_freeSession :: StablePtr Session -> IO () c_newSession = newSession >>= newStablePtr c_freeSession = freeStablePtr foreign export ccall c_fetchURL :: StablePtr Session -> CString -> StablePtr Page -> CString -> IO (StablePtr Page) c_fetchURL c_session c_mimes c_referer c_uri = do session <- deRefStablePtr c_session mimes <- peekCString c_mimes referer <- deRefStablePtr c_referer uri <- peekCString c_uri let uri' = nullURI `fromMaybe` parseURIReference uri `relativeTo` url referer doc <- fetchDocument session (words mimes) uri' newStablePtr $ Page {url = uri', html = doc, css = html2css doc uri'}