{-# LANGUAGE OverloadedStrings #-} module Links(extractLinks, linkToText, Link(..), c_extractLinks) where import Text.XML import qualified Data.Map as M import Network.URI import Data.Text (Text, unpack, append, pack, replace, strip) import qualified Data.Text.Foreign as FTxt import Data.Maybe import Types import Foreign.StablePtr import Foreign.C.String import Foreign.Marshal.Array import Control.Monad (forM) import Control.Exception (catch) import System.Directory -- For locating links.xml, suggestions.gmni import System.FilePath import System.IO (hPrint, stderr) -- For error reporting -- For suggestions.gmni import Data.Trie.Text (Trie) import qualified Data.Trie.Text as Trie import Data.List (nub, intercalate) import Control.Concurrent.MVar (readMVar) import Control.Concurrent (forkIO) data Link = Link { label :: Text, title :: Text, href :: URI } linkToText :: Link -> Text linkToText (Link label' title' href') = rmWs label' +++ "\t" +++ rmWs title' +++ "\t" +++ pack (show href') rmWs text = strip $ replace "\t" " " $ replace "\n" " " text extractLinks :: Document -> [Link] extractLinks (Document prologue root misc) = extractMisc (prologueBefore prologue) ++ extractMisc (prologueAfter prologue) ++ extractEl [] root ++ extractMisc misc extractMisc :: [Miscellaneous] -> [Link] extractMisc (MiscInstruction (Instruction target dat):misc) | Just uri <- parseURIReference $ unpack target = Link dat "" uri : extractMisc misc extractMisc (_:misc) = extractMisc misc extractMisc [] = [] extractEl path el@(Element (Name "details" _ _) _ childs) = [Link (nodesText summary' $ nodesText childs "") "+" nullURI { uriFragment = '#':'.':intercalate "." (map show $ reverse path) } | NodeElement summary@(Element (Name "summary" _ _) _ summary') <- childs] extractEl path el@(Element _ _ children) = extractElAttr el "href" ++ extractElAttr el "longdesc" ++ extractElAttr el "src" ++ extractNodes (0:path) children extractElAttr (Element _ attrs children) attr | Just val <- attr `M.lookup` attrs', Just uri <- parseURIReference $ unpack val = [Link label' title' uri] | otherwise = [] where label' = nodesText children $ M.findWithDefault "" "rel" attrs' title' = fromMaybe "" $ M.lookup "title" attrs' attrs' = M.mapKeys nameLocalName attrs extractNodes p@(n:path) (NodeElement el:nodes) = extractEl p el ++ extractNodes (succ n:path) nodes extractNodes path (NodeInstruction instruct:nodes) = extractMisc [MiscInstruction instruct] ++ extractNodes path nodes extractNodes path (_:nodes) = extractNodes path nodes extractNodes _ [] = [] (+++) = append nodesText :: [Node] -> Text -> Text nodesText (NodeElement (Element _ attrs children):nodes) def = nodesText children def +++ nodesText nodes def nodesText (NodeContent text:nodes) def = text +++ nodesText nodes def nodesText (_:nodes) def = nodesText nodes def nodesText [] def = def linksFromPage :: Page -> [Link] linksFromPage Page { url = url', pageTitle = title', html = html', apps = apps', backStack = back', forwardStack = forward' } = -- TODO internationalize! link' "reload" title' url' : link' "aggiorna" title' url' : -- Italian link' "ladda om" title' url' : -- Swedish (Svenska) link' "last på nytt" title' url' : -- Norwegian Bokmål link' "reload without cache" "Fetch again from server without checking for a local copy" uncached : link' "aggiorna senza cache" "" uncached : -- Italian link' "ladda om utan cache" "hämta från servern igen utan att kolla efter en lokal kopia" uncached : -- Swedish (Svenska) link' "last på nytt uten mellomlager" "Last siden på nytt uten å bruke lokal kopi" uncached : -- Norwegian Bokmål [link' l t u | (t, u) <- head' back', l <- backLabels] ++ [link' l t u | (t, u) <- head' forward', l <- forwardLabels] ++ [link' n desc $ URI "app:" Nothing id "" "" | Application n _ desc id <- apps'] ++ extractLinks html' where uncached = url' { uriScheme = "nocache+" ++ uriScheme url' } backLabels = ["back", {- Italian -} "indietro", {- Swedish -} "tillbaka", {- Norwegian Bokmål -}"tilbake"] forwardLabels = ["forward", {- Italian -} "avanti", {- Swedish -} "framåt", {- Norwegian Bokmål -} "forover", "videre"] head' (a:_) = [a] head' [] = [] link' l t h = Link (pack l) (pack t) h readBookmarks :: IO Document readBookmarks = do dir <- getXdgDirectory XdgData "rhapsode" let file = dir "links.xml" exists <- doesFileExist file if exists then Text.XML.readFile def file `catch` handleInvalid else nodoc where handleInvalid err@(InvalidXMLFile _ _) = hPrint stderr err >> nodoc nodoc = return $ Document (Prologue [] Nothing []) (Element "empty" M.empty []) [] -- | Write out a file of most frequently encountered unvisited links. -- Hopefully this'll help surfers rely less on YouTube, et al's hueristics. updateSuggestions :: Page -> IO () updateSuggestions page = do hist <- readMVar $ visitedURLs page let links = extractLinks $ html page let domain = maybe "" show $ uriAuthority $ url page dir <- getXdgDirectory XdgData "rhapsode" let path = dir "suggestions.gmni" exists <- doesFileExist path suggestions <- if not exists then return [] else do file <- Prelude.readFile path return [line' | line <- lines file, line'@(_:uri':_) <- [words line], not (pack uri' `Trie.member` hist)] let suggestions' = suggestions ++ nub [["=>", uri', domain] | link <- links, let uri' = uriToString id (href link) "", not (pack uri' `Trie.member` hist)] createDirectoryIfMissing True dir Prelude.writeFile path $ unlines $ map unwords suggestions' -- C API foreign export ccall c_extractLinks :: StablePtr Page -> IO (CArray CString) c_extractLinks c_page = do page <- deRefStablePtr c_page forkIO $ updateSuggestions page -- background process for useful navigation aid. bookmarks <- readBookmarks ret <- forM (linksFromPage page ++ extractLinks bookmarks) $ \link -> do c_label <- text2cstring $ strip $ label link c_title <- text2cstring $ strip $ title link c_href <- newCString $ uriToString id (href link) "" return [c_label, c_title, c_href] nil <- newCString " " newArray0 nil $ concat ret text2cstring txt = FTxt.withCStringLen txt $ \s -> (peekCStringLen s >>= newCString)