~alcinnz/rhapsode

ref: 3eb42c6ad53b77528ad54e139b30a1324e415a2b rhapsode/src/Links.hs -rw-r--r-- 4.8 KiB
3eb42c6a — Adrian Cochrane Add introductory messages. 3 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
{-# LANGUAGE OverloadedStrings #-}
module Links(extractLinks, linkToText, Link(..), c_extractLinks) where

import Text.XML
import qualified Data.Map as M
import Network.URI
import Data.Text (Text, unpack, append, pack, replace, strip)
import qualified Data.Text.Foreign as FTxt
import Data.Maybe

import Types
import Foreign.StablePtr
import Foreign.C.String
import Foreign.Marshal.Array
import Control.Monad (forM)
import Control.Exception (catch)

import System.Directory -- For locating links.xml
import System.FilePath
import System.IO (hPrint, stderr) -- For error reporting

data Link = Link {
    label :: Text,
    title :: Text,
    href :: URI
}

linkToText :: Link -> Text
linkToText (Link label' title' href') =
    rmWs label' +++ "\t" +++ rmWs title' +++ "\t" +++ pack (show href')

rmWs text = strip $ replace "\t" " " $ replace "\n" " " text

extractLinks :: Document -> [Link]
extractLinks (Document prologue root misc) =
    extractMisc (prologueBefore prologue) ++ extractMisc (prologueAfter prologue) ++
    extractEl root ++ extractMisc misc

extractMisc :: [Miscellaneous] -> [Link]
extractMisc (MiscInstruction (Instruction target dat):misc)
    | Just uri <- parseURIReference $ unpack target = Link dat "" uri : extractMisc misc
extractMisc (_:misc) = extractMisc misc
extractMisc [] = []

extractEl el@(Element _ _ children) =
    extractElAttr el "{http://www.w3.org/1999/xlink}href" ++
    extractElAttr el "href" ++
    extractElAttr el "longdesc" ++
    extractElAttr el "src" ++
    extractNodes children

extractElAttr (Element _ attrs children) attr
        | Just val <- attr `M.lookup` attrs,
            Just uri <- parseURIReference $ unpack val = [Link label' title' uri]
        | otherwise = []
    where
        label' = nodesText children
        title' = fromMaybe "" $ M.lookup "title" attrs

extractNodes (NodeElement el:nodes) = extractEl el ++ extractNodes nodes
extractNodes (NodeInstruction instruct:nodes) =
    extractMisc [MiscInstruction instruct] ++ extractNodes nodes
extractNodes (_:nodes) = extractNodes nodes
extractNodes [] = []

(+++) = append
nodesText :: [Node] -> Text
nodesText (NodeElement (Element _ attrs children):nodes) = nodesText children +++ nodesText nodes
nodesText (NodeContent text:nodes) = text +++ nodesText nodes
nodesText (_:nodes) = nodesText nodes
nodesText [] = ""

linksFromPage :: Page -> [Link]
linksFromPage Page {
        url = url',
        pageTitle = title',
        html = html',
        apps = apps',
        backStack = back', forwardStack = forward'
    } = -- TODO internationalize!
        link' "reload" title' url' :
        link' "aggiorna" title' url' : -- Italian
        link' "ladda om" title' url' : -- Swedish (Svenska)
        link' "last på nytt" title' url' : -- Norwegian Bokmål
        link' "reload without cache" "Fetch again from server without checking for a local copy" uncached :
        link' "aggiorna senza cache" "" uncached : -- Italian
        link' "ladda om utan cache" "hämta från servern igen utan att kolla efter en lokal kopia" uncached : -- Swedish (Svenska)
        link' "last på nytt uten mellomlager" "Last siden på nytt uten å bruke lokal kopi" uncached : -- Norwegian Bokmål
        [link' l t u | (t, u) <- head' back', l <- backLabels] ++
        [link' l t u | (t, u) <- head' forward', l <- forwardLabels] ++
        [link' n desc $ URI "app:" Nothing id "" "" | Application n _ desc id <- apps'] ++
        extractLinks html'
    where
        uncached = url' { uriScheme = "nocache+" ++ uriScheme url' }
        backLabels = ["back", {- Italian -} "indietro", {- Swedish -} "tillbaka",
            {- Norwegian Bokmål -}"tilbake"]
        forwardLabels = ["forward", {- Italian -} "avanti", {- Swedish -} "framåt",
            {- Norwegian Bokmål -} "forover", "videre"]

head' (a:_) = [a]
head' [] = []
link' l t h = Link (pack l) (pack t) h

readBookmarks :: IO Document
readBookmarks = do
    dir <- getXdgDirectory XdgData "rhapsode"
    let file = dir </> "links.xml"
    exists <- doesFileExist file
    if exists then Text.XML.readFile def file `catch` handleInvalid else nodoc
  where
    handleInvalid err@(InvalidXMLFile _ _) = hPrint stderr err >> nodoc
    nodoc = return $ Document (Prologue [] Nothing []) (Element "empty" M.empty []) []

-- C API
foreign export ccall c_extractLinks :: StablePtr Page -> IO (CArray CString)

c_extractLinks c_page = do
    page <- deRefStablePtr c_page
    bookmarks <- readBookmarks
    ret <- forM (linksFromPage page ++ extractLinks bookmarks) $ \link -> do
        c_label <- text2cstring $ strip $ label link
        c_title <- text2cstring $ strip $ title link
        c_href <- newCString $ uriToString id (href link) ""
        return [c_label, c_title, c_href]
    nil <- newCString " "
    newArray0 nil $ concat ret

text2cstring txt = FTxt.withCStringLen txt $ \s -> (peekCStringLen s >>= newCString)