{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Links(extractLinks, linkToText, Link(..), c_extractLinks) where
import Text.XML
import qualified Data.Map as M
import Network.MIME.Info as MIME
import Network.URI
import Data.Text (Text, unpack, append, pack, replace, strip)
import qualified Data.Text.Lazy as LTxt
import qualified Data.Text.Foreign as FTxt
import Data.Maybe
import Text.Read (readMaybe)
import Table (splitTable)
import Types
import Foreign.StablePtr
import Foreign.C.String
import Foreign.Marshal.Array
import Control.Monad (forM)
import Control.Exception (catch, IOException)
import System.Directory -- For locating links.xml, suggestions.gmni
import System.FilePath
import System.IO (hPrint, stderr, hGetContents) -- For error reporting, Voice2Json
-- For suggestions.gmni
import qualified Data.Set as Set
import Data.List (nub, intercalate)
import Control.Concurrent (forkIO)
-- For Voice2Json
import Data.Char
import System.Process
import Data.Aeson
import qualified Data.HashMap.Strict as HM
import qualified Data.ByteString.Lazy as LBS
data Link = Link {
label :: Text,
title :: Text,
href :: URI
}
linkToText :: Link -> Text
linkToText (Link label' title' href') =
rmWs label' +++ "\t" +++ rmWs title' +++ "\t" +++ pack (show href')
rmWs text = strip $ replace "\t" " " $ replace "\n" " " text
extractLinks :: Document -> [Link]
extractLinks (Document prologue root misc) =
extractMisc (prologueBefore prologue) ++ extractMisc (prologueAfter prologue) ++
extractEl [] root ++ extractMisc misc
extractMisc :: [Miscellaneous] -> [Link]
extractMisc (MiscInstruction (Instruction target dat):misc)
| Just uri <- parseURIReference $ unpack target = Link dat "" uri : extractMisc misc
extractMisc (_:misc) = extractMisc misc
extractMisc [] = []
extractEl path el@(Element (Name "details" _ _) _ childs) =
[Link (nodesText summary' $ nodesText childs "") "+" nullURI {
uriFragment = '#':'.':intercalate "." (map show $ reverse path)
} | NodeElement summary@(Element (Name "summary" _ _) _ summary') <- childs] ++
extractNodes (0:path) childs
-- Special case for showing Appstream metadata of compatible apps.
-- Fallback for incompatible package manager UIs.
extractEl _ (Element "{https://specifications.freedesktop.org/metainfo/1.0}url" attrs childs)
| Just label <- "{https://specifications.freedesktop.org/metainfo/1.0}type" `M.lookup` attrs,
Just url <- parseAbsoluteURI $ unpack $ nodesText childs "" = [Link label "" url]
extractEl _ el@(Element _ attrs [])
| Just "alternate" <- "rel" `M.lookup` attrs', Just typ <- "type" `M.lookup` attrs',
Just val <- "href" `M.lookup` attrs', Just uri <- parseURIReference $ unpack val =
-- let Application name _ title _ = mimeInfo $ unpack typ
-- in [Link (pack name) (pack title) uri] -- FIXME `mimeInfo` freezes...
[Link typ "" uri]
where attrs' = M.mapKeys nameLocalName attrs
extractEl _ el@(Element (Name "link" _ _) attrs [])
| Just "stylesheet" <- "rel" `M.lookup` attrs', Nothing <- "title" `M.lookup` attrs',
Just val <- "href" `M.lookup` attrs', Just uri <- parseURIReference $ unpack val =
let Application name _ title _ = mimeInfo "text/css"
in [Link (pack name) (pack title) uri]
where attrs' = M.mapKeys nameLocalName attrs
extractEl path (Element (Name "table" _ _) _ childs) =
extractTable path (splitTable childs) ++ extractNodes (0:path) childs
extractEl path el@(Element _ _ children) =
extractElAttr el "href" ++
extractElAttr el "longdesc" ++
extractElAttr el "src" ++
extractNodes (0:path) children
extractElAttr (Element _ attrs children) attr
| Just val <- attr `M.lookup` attrs',
Just uri <- parseURIReference $ unpack val = [Link label' title' uri]
| otherwise = []
where
label' = nodesText children $ M.findWithDefault "" "rel" attrs'
title' = fromMaybe "" $ M.lookup "title" attrs'
attrs' = M.mapKeys nameLocalName attrs
extractTable path (thead, _, _) = extractTable' path [el | NodeElement el <- thead]
extractTable' path (Element (Name "thead" _ _) _ childs:_) =
extractTable' path [el | NodeElement el <- childs]
extractTable' path (Element (Name "tr" _ _) _ childs:_) = extractTR path 0 childs
extractTable' path (_:els) = extractTable' path els
extractTable' _ [] = []
extractTR path count (NodeElement (Element (Name name _ _) attrs childs):nodes)
| name `elem` ["th", "td"] =
extractTH path count ordering childs : extractTR path count' nodes
where
count' = count + fromMaybe 1 colspan
colspan = readMaybe =<< unpack <$> M.lookup "colspan" attrs'
ordering = M.lookup "aria-sort" attrs'
attrs' = M.mapKeys nameLocalName attrs
extractTR path count (_:nodes) = extractTR path count nodes
extractTR _ _ [] = []
extractTH path count ordering nodes = Link {
label = nodesText nodes "",
title = pack $ show count,
href = nullURI {
uriFragment = '#': intercalate "." [show n | n <- path] ++ o ordering : show count
}
}
where
o (Just "ascending") = '>'
o _ = '<'
extractNodes p@(n:path) (NodeElement el:nodes) = extractEl p el ++ extractNodes (succ n:path) nodes
extractNodes path (NodeInstruction instruct:nodes) =
extractMisc [MiscInstruction instruct] ++ extractNodes path nodes
extractNodes path (_:nodes) = extractNodes path nodes
extractNodes _ [] = []
(+++) = append
nodesText :: [Node] -> Text -> Text
nodesText (NodeElement (Element _ attrs children):nodes) def =
nodesText children def +++ nodesText nodes def
nodesText (NodeContent text:nodes) def = text +++ nodesText nodes def
nodesText (_:nodes) def = nodesText nodes def
nodesText [] def = def
linksFromPage :: Page -> [Link]
linksFromPage Page {
url = url',
pageTitle = title',
html = html',
apps = apps',
backStack = back', forwardStack = forward'
} = -- TODO internationalize!
link' "reload" title' url' :
link' "aggiorna" title' url' : -- Italian
link' "ladda om" title' url' : -- Swedish (Svenska)
link' "last på nytt" title' url' : -- Norwegian Bokmål
link' "reload without cache" "Fetch again from server without checking for a local copy" uncached :
link' "aggiorna senza cache" "" uncached : -- Italian
link' "ladda om utan cache" "hämta från servern igen utan att kolla efter en lokal kopia" uncached : -- Swedish (Svenska)
link' "last på nytt uten mellomlager" "Last siden på nytt uten å bruke lokal kopi" uncached : -- Norwegian Bokmål
[link' l t u | (t, u) <- head' back', l <- backLabels] ++
[link' l t u | (t, u) <- head' forward', l <- forwardLabels] ++
[link' n desc $ URI "app:" Nothing id "" "" | Application n _ desc id <- apps'] ++
extractLinks html'
where
uncached = url' { uriScheme = "nocache+" ++ uriScheme url' }
backLabels = ["back", {- Italian -} "indietro", {- Swedish -} "tillbaka",
{- Norwegian Bokmål -}"tilbake"]
forwardLabels = ["forward", {- Italian -} "avanti", {- Swedish -} "framåt",
{- Norwegian Bokmål -} "forover", "videre"]
head' (a:_) = [a]
head' [] = []
link' l t h = Link (pack l) (pack t) h
readBookmarks :: IO Document
readBookmarks = do
dir <- getXdgDirectory XdgData "rhapsode"
let file = dir </> "links.xml"
exists <- doesFileExist file
if exists then return () else do
-- Copy defaults into userdir
dirs <- getXdgDirectoryList XdgDataDirs
files' <- forM dirs $ \dir' -> do
let file' = dir' </> "rhapsode" </> "links.xml"
exists' <- doesFileExist file'
return $ if exists' then Just file' else Nothing
case catMaybes files' of
[] -> return ()
(file':_) -> copyFileWithMetadata file' file
exists' <- doesFileExist file
if exists' then Text.XML.readFile def file `catch` handleInvalid else nodoc
where
handleInvalid err@(InvalidXMLFile _ _) = hPrint stderr err >> nodoc
nodoc = return $ Document (Prologue [] Nothing []) (Element "empty" M.empty []) []
-- | Write out a file of most frequently encountered unvisited links.
-- Hopefully this'll help surfers rely less on YouTube, et al's hueristics.
updateSuggestions :: Page -> IO ()
updateSuggestions page = do
let links = extractLinks $ html page
let domain = maybe "" show $ uriAuthority $ url page
dir <- getXdgDirectory XdgData "rhapsode"
let path = dir </> "suggestions.gmni"
exists <- doesFileExist path
suggestions <- if not exists then return [] else do
file <- readStrict path
return [line' | line <- lines file, line'@(_:uri':_) <- [words line], not (pack uri' `Set.member` visitedURLs page)]
let suggestions' = suggestions ++ nub [["=>", uri', domain] | link <- links,
let uri' = uriToString id (href link) "", not (pack uri' `Set.member` visitedURLs page)]
createDirectoryIfMissing True dir
Prelude.writeFile path $ unlines $ map unwords suggestions'
-------
--- Voice2Json language models
-------
-- | Output links to a Voice2Json sentences.ini grammar.
outputSentences _ "" = return ()
outputSentences links dir = do
Prelude.writeFile (dir </> "sentences.ini") $ unlines sentences
createProcess (proc "voice2json" ["--profile", dir, "train-profile"]){ std_err = NoStream, std_out = NoStream }
return ()
`catch` \(_ :: IOException) -> return () -- Assume the UI has already warned Voice2Json isn't available.
where
sentences = "[links]" : [
unwords $ words $ map validChar line -- Enforce valid sentences.ini syntax.
| line@(_:_) <- map (unpack . label) links ++ map (unpack . title) links ++ map (show . href) links
]
-- | Can this character appear in a sentences.ini rule without escaping?
validChar ch | not (isAscii ch) || isSpace ch || isAlphaNum ch = ch
validChar _ = ' '
------
--- C API
------
foreign export ccall c_extractLinks :: StablePtr Page -> CString -> IO (CArray CString)
c_extractLinks c_page c_v2jProfile = do
page <- deRefStablePtr c_page
v2jProfile <- peekCString c_v2jProfile
forkIO $ updateSuggestions page -- background process for useful navigation aid.
bookmarks <- readBookmarks
let links = linksFromPage page ++ extractLinks bookmarks
forkIO $ outputSentences links v2jProfile
ret <- forM links $ \link -> do
c_label <- text2cstring $ strip $ label link
c_title <- text2cstring $ strip $ title link
c_href <- newCString $ uriToString id (href link) ""
return [c_label, c_title, c_href]
nil <- newCString " "
newArray0 nil $ concat ret
text2cstring txt = FTxt.withCStringLen txt $ \s -> (peekCStringLen s >>= newCString)
------
--- C helper functions
------
foreign export ccall c_formatLink :: CString -> CString -> CString -> IO CString
c_formatLink c_label c_title c_url = do
label <- ctext c_label
title <- ctext c_title
url <- ctext c_url
sfx <- getXdgDirectory XdgCache "rhapsode"
let bulletpoint_el = audio (sfx </> "bulletpoint.wav")
let label_el = prosody [("pitch", "low")] label
let title_el = prosody [("volume", "soft")] title
let link_el = audio (sfx </> "link.wav")
let url_el = style "punctuation" "all" $ style "capital_letters" "pitch" $ prosody [("rate", "fast")] url
let root = el "speak" [] $ map NodeElement [bulletpoint_el, label_el, title_el, link_el, url_el]
let ssml = renderText def $ Document (Prologue [] Nothing []) root []
newCString $ LTxt.unpack ssml
where
ctext cstr = pack <$> peekCString cstr
el name attrs childs = Element name (M.fromList attrs) childs
audio src = el "audio" [("src", pack src)] []
prosody attrs txt = el "prosody" attrs [NodeContent txt]
style field mode inner = el "tts:style" [("field", field), ("mode", mode)] [NodeElement inner]
foreign export ccall c_dading :: IO CString
c_dading = do
sfx <- getXdgDirectory XdgCache "rhapsode"
let link_el = audio (sfx </> "link.wav")
let root = el "speak" [] [NodeElement link_el]
let ssml = renderText def $ Document (Prologue [] Nothing []) root []
newCString $ LTxt.unpack ssml
where
el name attrs childs = Element name (M.fromList attrs) childs
audio src = el "audio" [("src", pack src)] []
--- For Voice2JSON
foreign export ccall c_dataDir :: CString -> IO CString
c_dataDir c_subdir = do
subdir <- peekCString c_subdir
cache <- getXdgDirectory XdgData "rhapsode"
newCString (cache </> subdir)
foreign export ccall c_recognizeIntent :: CString -> IO CString
c_recognizeIntent c_profile = do
profile <- peekCString c_profile
(_, Just pipe, _, _) <- createProcess (proc "voice2json" [
"--profile", profile,
"transcribe-stream",
"-c", "1"]){std_out = CreatePipe}
(_, Just out, _, _) <- createProcess (proc "voice2json" [
"--profile", profile,
"recognize-intent"]){std_in = UseHandle pipe, std_out = CreatePipe}
intent <- LBS.hGetContents out
let transcript = case decode intent of
Just (Object obj) | Just (String txt) <- "text" `HM.lookup` obj -> unpack txt
_ -> ""
newCString transcript