{-# LANGUAGE OverloadedStrings #-}
module Input(fetchDocument, docForText, writeDownloadToFile) where
import Data.Text.Lazy (fromStrict)
import qualified Data.Text as Txt
import Data.Text.Encoding
import qualified Data.ByteString.Lazy as B
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import Network.URI
import Network.URI.Fetch
import qualified Data.Map as M
import System.IO
import System.IO.Temp
import Data.Default.Class
-- For C API
import Types
import Data.HTML2CSS (html2css)
import Data.Maybe (fromMaybe)
import Foreign.StablePtr
import Foreign.C.String
-- FIXME segfaults when exposed to C.
--fetchURLs session refererDoc refererURL srcs = forConcurrently srcs $ \(mime, url) -> do
-- let u = relativeTo url refererURL
-- let charsets' = map Txt.unpack charsets
-- resp <- fetchURL session mime u
-- (a, b) <- case resp of
-- ("text/css", bytes) -> let
-- cssParser = CSS.parseForURL (conditionalStyles u "document") u
-- in return (refererDoc, cssParser $ applyCSScharset charsets' bytes)
-- _ -> parseDocument session resp >>= \doc -> return (doc, H2C.html2css doc refererURL)
-- return (u, a, b)
--applyCSScharset (charset:charsets) (Right bytes)
-- | cssCharset (CSSTok.tokenize text) == Txt.pack charset = text
-- | otherwise = applyCSScharset charsets $ Right bytes
-- where
-- text = convertCharset charset $ B.toStrict bytes
--applyCSScharset _ (Right bytes) = convertCharset "utf-8" $ B.toStrict bytes
--applyCSScharset _ (Left text) = text
--cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks,
-- (CSSTok.String charset:_) <- skipCSSspace toks' = charset
-- | otherwise = ""
--skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks
--skipCSSspace toks = toks
fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument http
parseDocument _ ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text
parseDocument _ ("text/html", Right bytes) = return $ HTML.parseLBS bytes
parseDocument _ ("text/plain", Left text) = return $ docForText text
parseDocument _ ("text/plain", Right bytes) = return $ docForText $ decodeUtf8 $ B.toStrict bytes
parseDocument _ ("application/xhtml+xml", Left text) | Right doc <- XML.parseText def $ fromStrict text = return doc
| otherwise = return $ docForText "Unreadable webpage!"
parseDocument _ ("application/xhtml+xml", Right bytes) | Right doc <- XML.parseLBS def bytes = return doc
| otherwise = return $ docForText "Unreadable webpage!"
parseDocument session (mime, download) = do
localURI <- writeDownloadToFile download
result <- dispatchByMIME session mime localURI
-- I'm not sure when I can delete this file.
case result of
Just text -> return $ docForText $ Txt.pack text
Nothing -> parseDocument session ("application/xhtml+xml", download)
writeDownloadToFile (Left text) = do
path <- writeSystemTempFile "rhapsode-download" $ Txt.unpack text
return $ URI "file:" (Just $ URIAuth "" "" "") path "" ""
writeDownloadToFile (Right bytes) = do
temp <- getCanonicalTemporaryDirectory
(path, handle) <- openBinaryTempFile temp "rhapsode-download"
B.hPut handle bytes
return $ URI "file:" (Just $ URIAuth "" "" "") path "" ""
docForText txt = XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = XML.Element {
XML.elementName = "pre",
XML.elementAttributes = M.empty,
XML.elementNodes = [XML.NodeContent txt]
},
XML.documentEpilogue = []
}
--------
---- C API
--------
foreign export ccall c_newSession :: IO (StablePtr Session)
foreign export ccall c_freeSession :: StablePtr Session -> IO ()
c_newSession = newSession >>= newStablePtr
c_freeSession = freeStablePtr
foreign export ccall c_fetchURL :: StablePtr Session -> CString -> StablePtr Page -> CString -> IO (StablePtr Page)
c_fetchURL c_session c_mimes c_referer c_uri = do
session <- deRefStablePtr c_session
mimes <- peekCString c_mimes
referer <- deRefStablePtr c_referer
uri <- peekCString c_uri
let uri' = nullURI `fromMaybe` parseURIReference uri `relativeTo` url referer
doc <- fetchDocument session (words mimes) uri'
newStablePtr $ Page {url = uri', html = doc, css = html2css doc uri'}