{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} module Links(extractLinks, linkToText, Link(..), c_extractLinks) where import Text.XML import qualified Data.Map as M import Network.MIME.Info as MIME import Network.URI import Data.Text (Text, unpack, append, pack, replace, strip) import qualified Data.Text.Lazy as LTxt import qualified Data.Text.Foreign as FTxt import Data.Maybe import Text.Read (readMaybe) -- FIXME: Expose this API from HURL XML... import Network.URI.Fetch.XML.Table (splitTable) import Types import Foreign.StablePtr import Foreign.C.String import Foreign.Marshal.Array import Control.Monad (forM) import Control.Exception (catch, IOException) import System.Directory -- For locating links.xml, suggestions.gmni import System.FilePath import System.IO (hPrint, stderr, hGetContents) -- For error reporting, Voice2Json -- For suggestions.gmni import qualified Data.Set as Set import Data.List (nub, intercalate) import Control.Concurrent (forkIO) -- For Voice2Json import Data.Char import System.Process import Data.Aeson import qualified Data.HashMap.Strict as HM import qualified Data.ByteString.Lazy as LBS data Link = Link { label :: Text, title :: Text, href :: URI } linkToText :: Link -> Text linkToText (Link label' title' href') = rmWs label' +++ "\t" +++ rmWs title' +++ "\t" +++ pack (show href') rmWs text = strip $ replace "\t" " " $ replace "\n" " " text extractLinks :: Document -> [Link] extractLinks (Document prologue root misc) = extractMisc (prologueBefore prologue) ++ extractMisc (prologueAfter prologue) ++ extractEl [] root ++ extractMisc misc extractMisc :: [Miscellaneous] -> [Link] extractMisc (MiscInstruction (Instruction target dat):misc) | Just uri <- parseURIReference $ unpack target = Link dat "" uri : extractMisc misc extractMisc (_:misc) = extractMisc misc extractMisc [] = [] extractEl path el@(Element (Name "details" _ _) _ childs) = [Link (nodesText summary' $ nodesText childs "") "+" nullURI { uriFragment = '#':'.':intercalate "." (map show $ reverse path) } | NodeElement summary@(Element (Name "summary" _ _) _ summary') <- childs] ++ extractNodes (0:path) childs -- Special case for showing Appstream metadata of compatible apps. -- Fallback for incompatible package manager UIs. extractEl _ (Element "{https://specifications.freedesktop.org/metainfo/1.0}url" attrs childs) | Just label <- "{https://specifications.freedesktop.org/metainfo/1.0}type" `M.lookup` attrs, Just url <- parseAbsoluteURI $ unpack $ nodesText childs "" = [Link label "" url] extractEl _ el@(Element _ attrs []) | Just "alternate" <- "rel" `M.lookup` attrs', Just typ <- "type" `M.lookup` attrs', Just val <- "href" `M.lookup` attrs', Just uri <- parseURIReference $ unpack val = -- let Application name _ title _ = mimeInfo $ unpack typ -- in [Link (pack name) (pack title) uri] -- FIXME `mimeInfo` freezes... [Link typ "" uri] where attrs' = M.mapKeys nameLocalName attrs extractEl _ el@(Element (Name "link" _ _) attrs []) | Just "stylesheet" <- "rel" `M.lookup` attrs', Nothing <- "title" `M.lookup` attrs', Just val <- "href" `M.lookup` attrs', Just uri <- parseURIReference $ unpack val = let Application name _ title _ = mimeInfo "text/css" in [Link (pack name) (pack title) uri] where attrs' = M.mapKeys nameLocalName attrs extractEl path (Element (Name "table" _ _) _ childs) = extractTable path (splitTable childs) ++ extractNodes (0:path) childs extractEl path el@(Element _ _ children) = extractElAttr el "href" ++ extractElAttr el "longdesc" ++ extractElAttr el "src" ++ extractNodes (0:path) children extractElAttr (Element _ attrs children) attr | Just val <- attr `M.lookup` attrs', Just uri <- parseURIReference $ unpack val = [Link label' title' uri] | otherwise = [] where label' = nodesText children $ M.findWithDefault "" "rel" attrs' title' = fromMaybe "" $ M.lookup "title" attrs' attrs' = M.mapKeys nameLocalName attrs extractTable path (thead, _, _) = extractTable' path [el | NodeElement el <- thead] extractTable' path (Element (Name "thead" _ _) _ childs:_) = extractTable' path [el | NodeElement el <- childs] extractTable' path (Element (Name "tr" _ _) _ childs:_) = extractTR path 0 childs extractTable' path (_:els) = extractTable' path els extractTable' _ [] = [] extractTR path count (NodeElement (Element (Name name _ _) attrs childs):nodes) | name `elem` ["th", "td"] = extractTH path count ordering childs : extractTR path count' nodes where count' = count + fromMaybe 1 colspan colspan = readMaybe =<< unpack <$> M.lookup "colspan" attrs' ordering = M.lookup "aria-sort" attrs' attrs' = M.mapKeys nameLocalName attrs extractTR path count (_:nodes) = extractTR path count nodes extractTR _ _ [] = [] extractTH path count ordering nodes = Link { label = nodesText nodes "", title = pack $ show count, href = nullURI { uriFragment = '#':'-':'a':'r':'g':'o':'-':'%': intercalate "." [show n | n <- path] ++ o ordering : show count } } where o (Just "ascending") = '>' o _ = '<' extractNodes p@(n:path) (NodeElement el:nodes) = extractEl p el ++ extractNodes (succ n:path) nodes extractNodes path (NodeInstruction instruct:nodes) = extractMisc [MiscInstruction instruct] ++ extractNodes path nodes extractNodes path (_:nodes) = extractNodes path nodes extractNodes _ [] = [] (+++) = append nodesText :: [Node] -> Text -> Text nodesText (NodeElement (Element _ attrs children):nodes) def = nodesText children def +++ nodesText nodes def nodesText (NodeContent text:nodes) def = text +++ nodesText nodes def nodesText (_:nodes) def = nodesText nodes def nodesText [] def = def linksFromPage :: Page RhapsodeCSS -> [Link] linksFromPage Page { pageURL = url', pageTitle = title', html = html', apps = apps', backStack = back', forwardStack = forward' } = [link' n desc $ URI "app:" Nothing id "" "" | Application n _ desc id <- apps'] ++ extractLinks html' head' (a:_) = [a] head' [] = [] link' l t h = Link (pack l) (pack t) h readBookmarks :: IO Document readBookmarks = do dir <- getXdgDirectory XdgData "rhapsode" let file = dir "links.xml" exists <- doesFileExist file if exists then return () else do -- Copy defaults into userdir dirs <- getXdgDirectoryList XdgDataDirs files' <- forM dirs $ \dir' -> do let file' = dir' "rhapsode" "links.xml" exists' <- doesFileExist file' return $ if exists' then Just file' else Nothing case catMaybes files' of [] -> return () (file':_) -> copyFileWithMetadata file' file exists' <- doesFileExist file if exists' then Text.XML.readFile def file `catch` handleInvalid else nodoc where handleInvalid err@(InvalidXMLFile _ _) = hPrint stderr err >> nodoc nodoc = return $ Document (Prologue [] Nothing []) (Element "empty" M.empty []) [] -- | Write out a file of most frequently encountered unvisited links. -- Hopefully this'll help surfers rely less on YouTube, et al's hueristics. updateSuggestions :: Page RhapsodeCSS -> IO () updateSuggestions page = do let links = extractLinks $ html page let domain = maybe "" show $ uriAuthority $ pageURL page dir <- getXdgDirectory XdgData "rhapsode" let path = dir "suggestions.gmni" exists <- doesFileExist path suggestions <- if not exists then return [] else do file <- readStrict path return [line' | line <- lines file, line'@(_:uri':_) <- [words line], not (pack uri' `Set.member` visitedURLs page)] let suggestions' = suggestions ++ nub [["=>", uri', domain] | link <- links, let uri' = uriToString id (href link) "", not (pack uri' `Set.member` visitedURLs page)] createDirectoryIfMissing True dir Prelude.writeFile path $ unlines $ map unwords suggestions' ------- --- Voice2Json language models ------- -- | Output links to a Voice2Json sentences.ini grammar. outputSentences _ "" = return () outputSentences links dir = do Prelude.writeFile (dir "sentences.ini") $ unlines sentences createProcess (proc "voice2json" ["--profile", dir, "train-profile"]){ std_err = NoStream, std_out = NoStream } return () `catch` \(_ :: IOException) -> return () -- Assume the UI has already warned Voice2Json isn't available. where sentences = "[links]" : [ unwords $ words $ map validChar line -- Enforce valid sentences.ini syntax. | line@(_:_) <- map (unpack . label) links ++ map (unpack . title) links ++ map (show . href) links ] -- | Can this character appear in a sentences.ini rule without escaping? validChar ch | not (isAscii ch) || isSpace ch || isAlphaNum ch = ch validChar _ = ' ' ------ --- C API ------ foreign export ccall c_extractLinks :: StablePtr (Page RhapsodeCSS) -> CString -> IO (CArray CString) c_extractLinks c_page c_v2jProfile = do page <- deRefStablePtr c_page v2jProfile <- peekCString c_v2jProfile forkIO $ updateSuggestions page -- background process for useful navigation aid. bookmarks <- readBookmarks let links = linksFromPage page ++ extractLinks bookmarks forkIO $ outputSentences links v2jProfile ret <- forM links $ \link -> do c_label <- text2cstring $ strip $ label link c_title <- text2cstring $ strip $ title link c_href <- newCString $ uriToString id (href link) "" return [c_label, c_title, c_href] nil <- newCString " " newArray0 nil $ concat ret text2cstring txt = FTxt.withCStringLen txt $ \s -> (peekCStringLen s >>= newCString) ------ --- C helper functions ------ foreign export ccall c_formatLink :: CString -> CString -> CString -> IO CString c_formatLink c_label c_title c_url = do label <- ctext c_label title <- ctext c_title url <- ctext c_url sfx <- getXdgDirectory XdgCache "rhapsode" let bulletpoint_el = audio (sfx "bulletpoint.wav") let label_el = prosody [("pitch", "low")] label let title_el = prosody [("volume", "soft")] title let link_el = audio (sfx "link.wav") let url_el = style "punctuation" "all" $ style "capital_letters" "pitch" $ prosody [("rate", "fast")] url let root = el "speak" [] $ map NodeElement [bulletpoint_el, label_el, title_el, link_el, url_el] let ssml = renderText def $ Document (Prologue [] Nothing []) root [] newCString $ LTxt.unpack ssml where ctext cstr = pack <$> peekCString cstr el name attrs childs = Element name (M.fromList attrs) childs audio src = el "audio" [("src", pack src)] [] prosody attrs txt = el "prosody" attrs [NodeContent txt] style field mode inner = el "tts:style" [("field", field), ("mode", mode)] [NodeElement inner] foreign export ccall c_dading :: IO CString c_dading = do sfx <- getXdgDirectory XdgCache "rhapsode" let link_el = audio (sfx "link.wav") let root = el "speak" [] [NodeElement link_el] let ssml = renderText def $ Document (Prologue [] Nothing []) root [] newCString $ LTxt.unpack ssml where el name attrs childs = Element name (M.fromList attrs) childs audio src = el "audio" [("src", pack src)] [] --- For Voice2JSON foreign export ccall c_dataDir :: CString -> IO CString c_dataDir c_subdir = do subdir <- peekCString c_subdir cache <- getXdgDirectory XdgData "rhapsode" newCString (cache subdir) foreign export ccall c_recognizeIntent :: CString -> IO CString c_recognizeIntent c_profile = do profile <- peekCString c_profile (_, Just pipe, _, _) <- createProcess (proc "voice2json" [ "--profile", profile, "transcribe-stream", "-c", "1"]){std_out = CreatePipe} (_, Just out, _, _) <- createProcess (proc "voice2json" [ "--profile", profile, "recognize-intent"]){std_in = UseHandle pipe, std_out = CreatePipe} intent <- LBS.hGetContents out let transcript = case decode intent of Just (Object obj) | Just (String txt) <- "text" `HM.lookup` obj -> unpack txt _ -> "" newCString transcript