~alcinnz/amphiarao

ref: 5eb4b914899a44b795ab2df865781f08f6f64e2c amphiarao/src/Internal/Elements.hs -rw-r--r-- 2.3 KiB
5eb4b914 — Adrian Cochrane Add search by tag name to JSON interface. 3 years ago
                                                                                
38e35ef9 Adrian Cochrane
8e195046 Adrian Cochrane
38e35ef9 Adrian Cochrane
8e195046 Adrian Cochrane
38e35ef9 Adrian Cochrane
b8351f83 Adrian Cochrane
38e35ef9 Adrian Cochrane
b8351f83 Adrian Cochrane
5eb4b914 Adrian Cochrane
b8351f83 Adrian Cochrane
8e195046 Adrian Cochrane
b8351f83 Adrian Cochrane
8e195046 Adrian Cochrane
38e35ef9 Adrian Cochrane
b8351f83 Adrian Cochrane
38e35ef9 Adrian Cochrane
b8351f83 Adrian Cochrane
5eb4b914 Adrian Cochrane
38e35ef9 Adrian Cochrane
b8351f83 Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
{-# LANGUAGE OverloadedStrings, DeriveGeneric #-}
module Internal.Elements (getTitle, Find(..), find) where

import Text.XML
import qualified Data.Map as M
import Data.Text as Txt hiding (find)
import Control.Concurrent.MVar

import Internal

import Data.Aeson
import Data.Text as Txt (Text, pack, append)
import GHC.Generics

-- Selector engines
import qualified Text.XML.Cursor as X
import qualified XML.Selectors.CSS as CSS

import Network.URI (parseURIReference)
import Data.Maybe
import Data.String (fromString)

getTitle :: Session -> IO Text
getTitle session = getTitle' <$> documentRoot <$> document <$> readMVar session

getTitle' (Element "title" _ childs) = nodesText childs
getTitle' (Element "h1" _ childs) = nodesText childs
getTitle' (Element _ _ childs)
    -- FIXME: Caught Rhapsode bug repaired here, needs that filtering condition.
    | title:_ <- [getTitle' el | NodeElement el <- childs, getTitle' el /= ""] = title
    | otherwise = ""

nodesText (NodeElement (Element _ _ childs):nodes) = nodesText childs `append` nodesText nodes
nodesText (NodeContent txt:nodes) = txt `append` nodesText nodes
nodesText (_:nodes) = nodesText nodes
nodesText [] = ""

---

data Find = Find { using :: Text, value :: String } deriving Generic
instance FromJSON Find
find :: Find -> X.Cursor -> Either (Bool, String) [X.Cursor]
find (Find "css selector" sel) root = case CSS.parsePath sel of
    Right sel' -> Right $ CSS.toAxis sel' root
    Left msg -> Left (True, msg)
find (Find "link text" sel) root = Right $ allLinks (== pack sel) root
find (Find "partial link text" sel) root = Right $ allLinks (Txt.isInfixOf $ pack sel) root
find (Find "tag name" sel) root = Right (X.descendant root >>= X.checkName (== fromString sel))
find (Find type_ _) _ = Left (False, "Invalid selector type: " ++ Txt.unpack type_)

allLinks test = X.descendant X.>=>
    -- Missing some misc. elements Rhapsode treats as links
    (X.hasAttribute "src" `union` X.hasAttribute "href") X.>=>
    X.checkElement test'
  where
    test' (Element _ attrs childs) =
        isJust (parseURIReference <$> unpack <$>
            (M.lookup "src" attrs *> M.lookup "href" attrs)) ||
        -- Emulate Rhapsode's mandatory whitespace-collapse
        test (Txt.unwords $ Txt.words $ nodesText childs)

union a b cursor = a cursor ++ b cursor