{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} module Links(extractLinks, linkToText, Link(..), c_extractLinks) where import Text.XML import qualified Data.Map as M import Network.MIME.Info as MIME import Network.URI import Data.Text (Text, unpack, append, pack, replace, strip) import qualified Data.Text.Lazy as LTxt import qualified Data.Text.Foreign as FTxt import Data.Maybe import Text.Read (readMaybe) import Table (splitTable) import Types import Foreign.StablePtr import Foreign.C.String import Foreign.Marshal.Array import Control.Monad (forM) import Control.Exception (catch, IOException) import System.Directory -- For locating links.xml, suggestions.gmni import System.FilePath import System.IO (hPrint, stderr, hGetContents) -- For error reporting, Voice2Json -- For suggestions.gmni import qualified Data.Set as Set import Data.List (nub, intercalate) import Control.Concurrent (forkIO) -- For Voice2Json import Data.Char import System.Process import Data.Aeson import qualified Data.HashMap.Strict as HM import qualified Data.ByteString.Lazy as LBS data Link = Link { label :: Text, title :: Text, href :: URI } linkToText :: Link -> Text linkToText (Link label' title' href') = rmWs label' +++ "\t" +++ rmWs title' +++ "\t" +++ pack (show href') rmWs text = strip $ replace "\t" " " $ replace "\n" " " text extractLinks :: Document -> [Link] extractLinks (Document prologue root misc) = extractMisc (prologueBefore prologue) ++ extractMisc (prologueAfter prologue) ++ extractEl [] root ++ extractMisc misc extractMisc :: [Miscellaneous] -> [Link] extractMisc (MiscInstruction (Instruction target dat):misc) | Just uri <- parseURIReference $ unpack target = Link dat "" uri : extractMisc misc extractMisc (_:misc) = extractMisc misc extractMisc [] = [] extractEl path el@(Element (Name "details" _ _) _ childs) = [Link (nodesText summary' $ nodesText childs "") "+" nullURI { uriFragment = '#':'.':intercalate "." (map show $ reverse path) } | NodeElement summary@(Element (Name "summary" _ _) _ summary') <- childs] ++ extractNodes (0:path) childs -- Special case for showing Appstream metadata of compatible apps. -- Fallback for incompatible package manager UIs. extractEl _ (Element "{https://specifications.freedesktop.org/metainfo/1.0}url" attrs childs) | Just label <- "{https://specifications.freedesktop.org/metainfo/1.0}type" `M.lookup` attrs, Just url <- parseAbsoluteURI $ unpack $ nodesText childs "" = [Link label "" url] extractEl _ el@(Element _ attrs []) | Just "alternate" <- "rel" `M.lookup` attrs', Just typ <- "type" `M.lookup` attrs', Just val <- "href" `M.lookup` attrs', Just uri <- parseURIReference $ unpack val = -- let Application name _ title _ = mimeInfo $ unpack typ -- in [Link (pack name) (pack title) uri] -- FIXME `mimeInfo` freezes... [Link typ "" uri] where attrs' = M.mapKeys nameLocalName attrs extractEl _ el@(Element (Name "link" _ _) attrs []) | Just "stylesheet" <- "rel" `M.lookup` attrs', Nothing <- "title" `M.lookup` attrs', Just val <- "href" `M.lookup` attrs', Just uri <- parseURIReference $ unpack val = let Application name _ title _ = mimeInfo "text/css" in [Link (pack name) (pack title) uri] where attrs' = M.mapKeys nameLocalName attrs extractEl path (Element (Name "table" _ _) _ childs) = extractTable path (splitTable childs) ++ extractNodes (0:path) childs extractEl path el@(Element _ _ children) = extractElAttr el "href" ++ extractElAttr el "longdesc" ++ extractElAttr el "src" ++ extractNodes (0:path) children extractElAttr (Element _ attrs children) attr | Just val <- attr `M.lookup` attrs', Just uri <- parseURIReference $ unpack val = [Link label' title' uri] | otherwise = [] where label' = nodesText children $ M.findWithDefault "" "rel" attrs' title' = fromMaybe "" $ M.lookup "title" attrs' attrs' = M.mapKeys nameLocalName attrs extractTable path (thead, _, _) = extractTable' path [el | NodeElement el <- thead] extractTable' path (Element (Name "thead" _ _) _ childs:_) = extractTable' path [el | NodeElement el <- childs] extractTable' path (Element (Name "tr" _ _) _ childs:_) = extractTR path 0 childs extractTable' path (_:els) = extractTable' path els extractTable' _ [] = [] extractTR path count (NodeElement (Element (Name name _ _) attrs childs):nodes) | name `elem` ["th", "td"] = extractTH path count ordering childs : extractTR path count' nodes where count' = count + fromMaybe 1 colspan colspan = readMaybe =<< unpack <$> M.lookup "colspan" attrs' ordering = M.lookup "aria-sort" attrs' attrs' = M.mapKeys nameLocalName attrs extractTR path count (_:nodes) = extractTR path count nodes extractTR _ _ [] = [] extractTH path count ordering nodes = Link { label = nodesText nodes "", title = pack $ show count, href = nullURI { uriFragment = '#': intercalate "." [show n | n <- path] ++ o ordering : show count } } where o (Just "ascending") = '>' o _ = '<' extractNodes p@(n:path) (NodeElement el:nodes) = extractEl p el ++ extractNodes (succ n:path) nodes extractNodes path (NodeInstruction instruct:nodes) = extractMisc [MiscInstruction instruct] ++ extractNodes path nodes extractNodes path (_:nodes) = extractNodes path nodes extractNodes _ [] = [] (+++) = append nodesText :: [Node] -> Text -> Text nodesText (NodeElement (Element _ attrs children):nodes) def = nodesText children def +++ nodesText nodes def nodesText (NodeContent text:nodes) def = text +++ nodesText nodes def nodesText (_:nodes) def = nodesText nodes def nodesText [] def = def linksFromPage :: Page -> [Link] linksFromPage Page { url = url', pageTitle = title', html = html', apps = apps', backStack = back', forwardStack = forward' } = -- TODO internationalize! link' "reload" title' url' : link' "aggiorna" title' url' : -- Italian link' "ladda om" title' url' : -- Swedish (Svenska) link' "last på nytt" title' url' : -- Norwegian Bokmål link' "reload without cache" "Fetch again from server without checking for a local copy" uncached : link' "aggiorna senza cache" "" uncached : -- Italian link' "ladda om utan cache" "hämta från servern igen utan att kolla efter en lokal kopia" uncached : -- Swedish (Svenska) link' "last på nytt uten mellomlager" "Last siden på nytt uten å bruke lokal kopi" uncached : -- Norwegian Bokmål [link' l t u | (t, u) <- head' back', l <- backLabels] ++ [link' l t u | (t, u) <- head' forward', l <- forwardLabels] ++ [link' n desc $ URI "app:" Nothing id "" "" | Application n _ desc id <- apps'] ++ extractLinks html' where uncached = url' { uriScheme = "nocache+" ++ uriScheme url' } backLabels = ["back", {- Italian -} "indietro", {- Swedish -} "tillbaka", {- Norwegian Bokmål -}"tilbake"] forwardLabels = ["forward", {- Italian -} "avanti", {- Swedish -} "framåt", {- Norwegian Bokmål -} "forover", "videre"] head' (a:_) = [a] head' [] = [] link' l t h = Link (pack l) (pack t) h readBookmarks :: IO Document readBookmarks = do dir <- getXdgDirectory XdgData "rhapsode" let file = dir "links.xml" exists <- doesFileExist file if exists then return () else do -- Copy defaults into userdir dirs <- getXdgDirectoryList XdgDataDirs files' <- forM dirs $ \dir' -> do let file' = dir' "rhapsode" "links.xml" exists' <- doesFileExist file' return $ if exists' then Just file' else Nothing case catMaybes files' of [] -> return () (file':_) -> copyFileWithMetadata file' file exists' <- doesFileExist file if exists' then Text.XML.readFile def file `catch` handleInvalid else nodoc where handleInvalid err@(InvalidXMLFile _ _) = hPrint stderr err >> nodoc nodoc = return $ Document (Prologue [] Nothing []) (Element "empty" M.empty []) [] -- | Write out a file of most frequently encountered unvisited links. -- Hopefully this'll help surfers rely less on YouTube, et al's hueristics. updateSuggestions :: Page -> IO () updateSuggestions page = do let links = extractLinks $ html page let domain = maybe "" show $ uriAuthority $ url page dir <- getXdgDirectory XdgData "rhapsode" let path = dir "suggestions.gmni" exists <- doesFileExist path suggestions <- if not exists then return [] else do file <- readStrict path return [line' | line <- lines file, line'@(_:uri':_) <- [words line], not (pack uri' `Set.member` visitedURLs page)] let suggestions' = suggestions ++ nub [["=>", uri', domain] | link <- links, let uri' = uriToString id (href link) "", not (pack uri' `Set.member` visitedURLs page)] createDirectoryIfMissing True dir Prelude.writeFile path $ unlines $ map unwords suggestions' ------- --- Voice2Json language models ------- -- | Output links to a Voice2Json sentences.ini grammar. outputSentences _ "" = return () outputSentences links dir = do Prelude.writeFile (dir "sentences.ini") $ unlines sentences createProcess (proc "voice2json" ["--profile", dir, "train-profile"]){ std_err = NoStream, std_out = NoStream } return () `catch` \(_ :: IOException) -> return () -- Assume the UI has already warned Voice2Json isn't available. where sentences = "[links]" : [ unwords $ words $ map validChar line -- Enforce valid sentences.ini syntax. | line@(_:_) <- map (unpack . label) links ++ map (unpack . title) links ++ map (show . href) links ] -- | Can this character appear in a sentences.ini rule without escaping? validChar ch | not (isAscii ch) || isSpace ch || isAlphaNum ch = ch validChar _ = ' ' ------ --- C API ------ foreign export ccall c_extractLinks :: StablePtr Page -> CString -> IO (CArray CString) c_extractLinks c_page c_v2jProfile = do page <- deRefStablePtr c_page v2jProfile <- peekCString c_v2jProfile forkIO $ updateSuggestions page -- background process for useful navigation aid. bookmarks <- readBookmarks let links = linksFromPage page ++ extractLinks bookmarks forkIO $ outputSentences links v2jProfile ret <- forM links $ \link -> do c_label <- text2cstring $ strip $ label link c_title <- text2cstring $ strip $ title link c_href <- newCString $ uriToString id (href link) "" return [c_label, c_title, c_href] nil <- newCString " " newArray0 nil $ concat ret text2cstring txt = FTxt.withCStringLen txt $ \s -> (peekCStringLen s >>= newCString) ------ --- C helper functions ------ foreign export ccall c_formatLink :: CString -> CString -> CString -> IO CString c_formatLink c_label c_title c_url = do label <- ctext c_label title <- ctext c_title url <- ctext c_url sfx <- getXdgDirectory XdgCache "rhapsode" let bulletpoint_el = audio (sfx "bulletpoint.wav") let label_el = prosody [("pitch", "low")] label let title_el = prosody [("volume", "soft")] title let link_el = audio (sfx "link.wav") let url_el = style "punctuation" "all" $ style "capital_letters" "pitch" $ prosody [("rate", "fast")] url let root = el "speak" [] $ map NodeElement [bulletpoint_el, label_el, title_el, link_el, url_el] let ssml = renderText def $ Document (Prologue [] Nothing []) root [] newCString $ LTxt.unpack ssml where ctext cstr = pack <$> peekCString cstr el name attrs childs = Element name (M.fromList attrs) childs audio src = el "audio" [("src", pack src)] [] prosody attrs txt = el "prosody" attrs [NodeContent txt] style field mode inner = el "tts:style" [("field", field), ("mode", mode)] [NodeElement inner] foreign export ccall c_dading :: IO CString c_dading = do sfx <- getXdgDirectory XdgCache "rhapsode" let link_el = audio (sfx "link.wav") let root = el "speak" [] [NodeElement link_el] let ssml = renderText def $ Document (Prologue [] Nothing []) root [] newCString $ LTxt.unpack ssml where el name attrs childs = Element name (M.fromList attrs) childs audio src = el "audio" [("src", pack src)] [] --- For Voice2JSON foreign export ccall c_dataDir :: CString -> IO CString c_dataDir c_subdir = do subdir <- peekCString c_subdir cache <- getXdgDirectory XdgData "rhapsode" newCString (cache subdir) foreign export ccall c_recognizeIntent :: CString -> IO CString c_recognizeIntent c_profile = do profile <- peekCString c_profile (_, Just pipe, _, _) <- createProcess (proc "voice2json" [ "--profile", profile, "transcribe-stream", "-c", "1"]){std_out = CreatePipe} (_, Just out, _, _) <- createProcess (proc "voice2json" [ "--profile", profile, "recognize-intent"]){std_in = UseHandle pipe, std_out = CreatePipe} intent <- LBS.hGetContents out let transcript = case decode intent of Just (Object obj) | Just (String txt) <- "text" `HM.lookup` obj -> unpack txt _ -> "" newCString transcript