~alcinnz/rhapsode

ref: bfcceaac93082176ddf0c0aa31d6edb7c230d70b rhapsode/src/Links.hs -rw-r--r-- 9.2 KiB
bfcceaac — Adrian Cochrane Add dependency on CMU Sphinx. 3 years ago
                                                                                
36f30081 Adrian Cochrane
4777781c Adrian Cochrane
36f30081 Adrian Cochrane
32909aaa Adrian Cochrane
36f30081 Adrian Cochrane
ff13105b Adrian Cochrane
4777781c Adrian Cochrane
36f30081 Adrian Cochrane
4777781c Adrian Cochrane
a9c0bfc0 Adrian Cochrane
e7cfb212 Adrian Cochrane
a9c0bfc0 Adrian Cochrane
4777781c Adrian Cochrane
911f8522 Adrian Cochrane
e7cfb212 Adrian Cochrane
c539e099 Adrian Cochrane
911f8522 Adrian Cochrane
e7cfb212 Adrian Cochrane
36f30081 Adrian Cochrane
c539e099 Adrian Cochrane
36f30081 Adrian Cochrane
c539e099 Adrian Cochrane
df331116 Adrian Cochrane
0f67342e Adrian Cochrane
5d890185 Adrian Cochrane
e042770f Adrian Cochrane
12fc781e Adrian Cochrane
e042770f Adrian Cochrane
12fc781e Adrian Cochrane
e042770f Adrian Cochrane
c539e099 Adrian Cochrane
36f30081 Adrian Cochrane
c539e099 Adrian Cochrane
36f30081 Adrian Cochrane
c539e099 Adrian Cochrane
36f30081 Adrian Cochrane
c539e099 Adrian Cochrane
36f30081 Adrian Cochrane
c539e099 Adrian Cochrane
36f30081 Adrian Cochrane
c539e099 Adrian Cochrane
4777781c Adrian Cochrane
a9c0bfc0 Adrian Cochrane
3a0ff8db Adrian Cochrane
78836e34 Adrian Cochrane
3a0ff8db Adrian Cochrane
78836e34 Adrian Cochrane
3a0ff8db Adrian Cochrane
a9c0bfc0 Adrian Cochrane
3a0ff8db Adrian Cochrane
78836e34 Adrian Cochrane
a9c0bfc0 Adrian Cochrane
cf3d8d0d Adrian Cochrane
a9c0bfc0 Adrian Cochrane
e7cfb212 Adrian Cochrane
32909aaa Adrian Cochrane
e7cfb212 Adrian Cochrane
32909aaa Adrian Cochrane
e7cfb212 Adrian Cochrane
4777781c Adrian Cochrane
e7cfb212 Adrian Cochrane
a9c0bfc0 Adrian Cochrane
4777781c Adrian Cochrane
ff13105b Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
{-# LANGUAGE OverloadedStrings #-}
module Links(extractLinks, linkToText, Link(..), c_extractLinks) where

import Text.XML
import qualified Data.Map as M
import Network.MIME.Info as MIME
import Network.URI
import Data.Text (Text, unpack, append, pack, replace, strip)
import qualified Data.Text.Lazy as LTxt
import qualified Data.Text.Foreign as FTxt
import Data.Maybe

import Types
import Foreign.StablePtr
import Foreign.C.String
import Foreign.Marshal.Array
import Control.Monad (forM)
import Control.Exception (catch)

import System.Directory -- For locating links.xml, suggestions.gmni
import System.FilePath
import System.IO (hPrint, stderr) -- For error reporting

-- For suggestions.gmni
import Data.Trie.Text (Trie)
import qualified Data.Trie.Text as Trie
import Data.List (nub, intercalate)
import Control.Concurrent (forkIO)

data Link = Link {
    label :: Text,
    title :: Text,
    href :: URI
}

linkToText :: Link -> Text
linkToText (Link label' title' href') =
    rmWs label' +++ "\t" +++ rmWs title' +++ "\t" +++ pack (show href')

rmWs text = strip $ replace "\t" " " $ replace "\n" " " text

extractLinks :: Document -> [Link]
extractLinks (Document prologue root misc) =
    extractMisc (prologueBefore prologue) ++ extractMisc (prologueAfter prologue) ++
    extractEl [] root ++ extractMisc misc

extractMisc :: [Miscellaneous] -> [Link]
extractMisc (MiscInstruction (Instruction target dat):misc)
    | Just uri <- parseURIReference $ unpack target = Link dat "" uri : extractMisc misc
extractMisc (_:misc) = extractMisc misc
extractMisc [] = []

extractEl path el@(Element (Name "details" _ _) _ childs) =
    [Link (nodesText summary' $ nodesText childs "") "+" nullURI {
        uriFragment = '#':'.':intercalate "." (map show $ reverse path)
    } | NodeElement summary@(Element (Name "summary" _ _) _ summary') <- childs] ++
    extractNodes (0:path) childs
-- Special case for showing Appstream metadata of compatible apps.
-- Fallback for incompatible package manager UIs.
extractEl _ (Element "{https://specifications.freedesktop.org/metainfo/1.0}url" attrs childs)
    | Just label <- "{https://specifications.freedesktop.org/metainfo/1.0}type" `M.lookup` attrs,
      Just url <- parseAbsoluteURI $ unpack $ nodesText childs "" = [Link label "" url]
extractEl _ el@(Element _ attrs [])
    | Just "alternate" <- "rel" `M.lookup` attrs', Just typ <- "type" `M.lookup` attrs',
            Just val <- "href" `M.lookup` attrs', Just uri <- parseURIReference $ unpack val =
        let Application name _ title _ = mimeInfo $ unpack typ
        in [Link (pack name) (pack title) uri]
  where attrs' = M.mapKeys nameLocalName attrs
extractEl _ el@(Element (Name "link" _ _) attrs [])
    | Just "stylesheet" <- "rel" `M.lookup` attrs', Nothing <- "title" `M.lookup` attrs',
            Just val <- "href" `M.lookup` attrs', Just uri <- parseURIReference $ unpack val =
        let Application name _ title _ = mimeInfo "text/css"
        in [Link (pack name) (pack title) uri]
  where attrs' = M.mapKeys nameLocalName attrs
extractEl path el@(Element _ _ children) =
    extractElAttr el "href" ++
    extractElAttr el "longdesc" ++
    extractElAttr el "src" ++
    extractNodes (0:path) children

extractElAttr (Element _ attrs children) attr
        | Just val <- attr `M.lookup` attrs',
            Just uri <- parseURIReference $ unpack val = [Link label' title' uri]
        | otherwise = []
    where
        label' = nodesText children $ M.findWithDefault "" "rel" attrs'
        title' = fromMaybe "" $ M.lookup "title" attrs'
        attrs' = M.mapKeys nameLocalName attrs

extractNodes p@(n:path) (NodeElement el:nodes) = extractEl p el ++ extractNodes (succ n:path) nodes
extractNodes path (NodeInstruction instruct:nodes) =
    extractMisc [MiscInstruction instruct] ++ extractNodes path nodes
extractNodes path (_:nodes) = extractNodes path nodes
extractNodes _ [] = []

(+++) = append
nodesText :: [Node] -> Text -> Text
nodesText (NodeElement (Element _ attrs children):nodes) def =
    nodesText children def +++ nodesText nodes def
nodesText (NodeContent text:nodes) def = text +++ nodesText nodes def
nodesText (_:nodes) def = nodesText nodes def
nodesText [] def = def

linksFromPage :: Page -> [Link]
linksFromPage Page {
        url = url',
        pageTitle = title',
        html = html',
        apps = apps',
        backStack = back', forwardStack = forward'
    } = -- TODO internationalize!
        link' "reload" title' url' :
        link' "aggiorna" title' url' : -- Italian
        link' "ladda om" title' url' : -- Swedish (Svenska)
        link' "last på nytt" title' url' : -- Norwegian Bokmål
        link' "reload without cache" "Fetch again from server without checking for a local copy" uncached :
        link' "aggiorna senza cache" "" uncached : -- Italian
        link' "ladda om utan cache" "hämta från servern igen utan att kolla efter en lokal kopia" uncached : -- Swedish (Svenska)
        link' "last på nytt uten mellomlager" "Last siden på nytt uten å bruke lokal kopi" uncached : -- Norwegian Bokmål
        [link' l t u | (t, u) <- head' back', l <- backLabels] ++
        [link' l t u | (t, u) <- head' forward', l <- forwardLabels] ++
        [link' n desc $ URI "app:" Nothing id "" "" | Application n _ desc id <- apps'] ++
        extractLinks html'
    where
        uncached = url' { uriScheme = "nocache+" ++ uriScheme url' }
        backLabels = ["back", {- Italian -} "indietro", {- Swedish -} "tillbaka",
            {- Norwegian Bokmål -}"tilbake"]
        forwardLabels = ["forward", {- Italian -} "avanti", {- Swedish -} "framåt",
            {- Norwegian Bokmål -} "forover", "videre"]

head' (a:_) = [a]
head' [] = []
link' l t h = Link (pack l) (pack t) h

readBookmarks :: IO Document
readBookmarks = do
    dir <- getXdgDirectory XdgData "rhapsode"
    let file = dir </> "links.xml"
    exists <- doesFileExist file

    if exists then return () else do
        -- Copy defaults into userdir
        dirs <- getXdgDirectoryList XdgDataDirs
        files' <- forM dirs $ \dir' -> do
            let file' = dir' </> "rhapsode" </> "links.xml"
            exists' <- doesFileExist file'
            return $ if exists' then Just file' else Nothing
        case catMaybes files' of
            [] -> return ()
            (file':_) -> copyFileWithMetadata file' file

    exists' <- doesFileExist file
    if exists' then Text.XML.readFile def file `catch` handleInvalid else nodoc
  where
    handleInvalid err@(InvalidXMLFile _ _) = hPrint stderr err >> nodoc
    nodoc = return $ Document (Prologue [] Nothing []) (Element "empty" M.empty []) []

-- | Write out a file of most frequently encountered unvisited links.
-- Hopefully this'll help surfers rely less on YouTube, et al's hueristics.
updateSuggestions :: Page -> IO ()
updateSuggestions page = do
    let links = extractLinks $ html page
    let domain = maybe "" show $ uriAuthority $ url page

    dir <- getXdgDirectory XdgData "rhapsode"
    let path = dir </> "suggestions.gmni"
    exists <- doesFileExist path
    suggestions <- if not exists then return [] else do
        file <- Prelude.readFile path
        return [line' | line <- lines file, line'@(_:uri':_) <- [words line], not (pack uri' `Trie.member` visitedURLs page)]

    let suggestions' = suggestions ++ nub [["=>", uri', domain] | link <- links,
            let uri' = uriToString id (href link) "", not (pack uri' `Trie.member` visitedURLs page)]

    createDirectoryIfMissing True dir
    Prelude.writeFile path $ unlines $ map unwords suggestions'

-- C API
foreign export ccall c_extractLinks :: StablePtr Page -> IO (CArray CString)

c_extractLinks c_page = do
    page <- deRefStablePtr c_page
    forkIO $ updateSuggestions page -- background process for useful navigation aid.
    bookmarks <- readBookmarks
    ret <- forM (linksFromPage page ++ extractLinks bookmarks) $ \link -> do
        c_label <- text2cstring $ strip $ label link
        c_title <- text2cstring $ strip $ title link
        c_href <- newCString $ uriToString id (href link) ""
        return [c_label, c_title, c_href]
    nil <- newCString " "
    newArray0 nil $ concat ret

text2cstring txt = FTxt.withCStringLen txt $ \s -> (peekCStringLen s >>= newCString)

foreign export ccall c_formatLink :: CString -> CString -> CString -> IO CString

c_formatLink c_label c_title c_url = do
    label <- ctext c_label
    title <- ctext c_title
    url <- ctext c_url

    sfx <- getXdgDirectory XdgCache "rhapsode"
    let bulletpoint_el = audio (sfx </> "bulletpoint.wav")
    let label_el = prosody [("pitch", "low")] label
    let title_el = prosody [("volume", "soft")] title
    let link_el = audio (sfx </> "link.wav")
    let url_el = style "punctuation" "all" $ style "capital_letters" "pitch" $ prosody [("rate", "fast")] url
    let root = el "speak" [] $ map NodeElement [bulletpoint_el, label_el, title_el, link_el, url_el]

    let ssml = renderText def $ Document (Prologue [] Nothing []) root []
    newCString $ LTxt.unpack ssml
  where
    ctext cstr = pack <$> peekCString cstr
    el name attrs childs = Element name (M.fromList attrs) childs
    audio src = el "audio" [("src", pack src)] []
    prosody attrs txt = el "prosody" attrs [NodeContent txt]
    style field mode inner = el "tts:style" [("field", field), ("mode", mode)] [NodeElement inner]