~alcinnz/amphiarao

ref: 1067e7dade93a5f4193da768bf2e3d6a771c65db amphiarao/src/Internal/Elements.hs -rw-r--r-- 3.7 KiB
1067e7da — Adrian Cochrane Integrate Haskell Stylist for CSS debugging. 2 years ago
                                                                                
38e35ef9 Adrian Cochrane
8e195046 Adrian Cochrane
2e0a2343 Adrian Cochrane
8e195046 Adrian Cochrane
38e35ef9 Adrian Cochrane
8e195046 Adrian Cochrane
38e35ef9 Adrian Cochrane
b8351f83 Adrian Cochrane
38e35ef9 Adrian Cochrane
2e0a2343 Adrian Cochrane
38e35ef9 Adrian Cochrane
b8351f83 Adrian Cochrane
5eb4b914 Adrian Cochrane
b8351f83 Adrian Cochrane
8e195046 Adrian Cochrane
b8351f83 Adrian Cochrane
8e195046 Adrian Cochrane
38e35ef9 Adrian Cochrane
b8351f83 Adrian Cochrane
38e35ef9 Adrian Cochrane
b8351f83 Adrian Cochrane
5eb4b914 Adrian Cochrane
2e0a2343 Adrian Cochrane
38e35ef9 Adrian Cochrane
b8351f83 Adrian Cochrane
2e0a2343 Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
{-# LANGUAGE OverloadedStrings, DeriveGeneric #-}
module Internal.Elements (getTitle, Find(..), find) where

import Text.XML
import Text.XML.HXT.DOM.TypeDefs
import Data.Tree.NTree.TypeDefs (NTree(..))
import qualified Data.Map as M
import Data.Text as Txt hiding (find)
import Control.Concurrent.MVar

import Internal

import Data.Aeson
import Data.Text as Txt (Text, pack, append)
import GHC.Generics

-- Selector engines
import qualified Text.XML.Cursor as X
import qualified XML.Selectors.CSS as CSS
import Text.XML.HXT.XPath.XPathEval (getXPath', parseXPathExpr)

import Network.URI (parseURIReference)
import Data.Maybe
import Data.String (fromString)

getTitle :: Session -> IO Text
getTitle session = getTitle' <$> documentRoot <$> document <$> readMVar session

getTitle' (Element "title" _ childs) = nodesText childs
getTitle' (Element "h1" _ childs) = nodesText childs
getTitle' (Element _ _ childs)
    -- FIXME: Caught Rhapsode bug repaired here, needs that filtering condition.
    | title:_ <- [getTitle' el | NodeElement el <- childs, getTitle' el /= ""] = title
    | otherwise = ""

nodesText (NodeElement (Element _ _ childs):nodes) = nodesText childs `append` nodesText nodes
nodesText (NodeContent txt:nodes) = txt `append` nodesText nodes
nodesText (_:nodes) = nodesText nodes
nodesText [] = ""

---

data Find = Find { using :: Text, value :: String } deriving Generic
instance FromJSON Find
find :: Find -> X.Cursor -> Either (Bool, String) [X.Cursor]
find (Find "css selector" sel) root = case CSS.parsePath sel of
    Right sel' -> Right $ CSS.toAxis sel' root
    Left msg -> Left (True, msg)
find (Find "link text" sel) root = Right $ allLinks (== pack sel) root
find (Find "partial link text" sel) root = Right $ allLinks (Txt.isInfixOf $ pack sel) root
find (Find "tag name" sel) root = Right (X.descendant root >>= X.checkName (== fromString sel))
-- XML Conduit doesn't have an XPath implementation, but HXT does. So convert back & forth!
find (Find "xpath" sel) root = case parseXPathExpr sel of
    Right expr -> Right $ hxts2cursors root $ getXPath' expr $ conduit2hxt $ X.node root
    Left err -> Left (True, err)
find (Find type_ _) _ = Left (False, "Invalid selector type: " ++ Txt.unpack type_)

allLinks test = X.descendant X.>=>
    -- Missing some misc. elements Rhapsode treats as links
    (X.hasAttribute "src" `union` X.hasAttribute "href") X.>=>
    X.checkElement test'
  where
    test' (Element _ attrs childs) =
        isJust (parseURIReference <$> unpack <$>
            (M.lookup "src" attrs *> M.lookup "href" attrs)) ||
        -- Emulate Rhapsode's mandatory whitespace-collapse
        test (Txt.unwords $ Txt.words $ nodesText childs)

union a b cursor = a cursor ++ b cursor

--- XPath Support

map' = Prelude.map

conduit2hxt (NodeElement (Element name attrs childs)) =
    NTree (XTag (n2qn name) $ map' attr2hxt $ M.toList attrs) $ map' conduit2hxt childs
conduit2hxt (NodeInstruction (Instruction target dat)) =
    NTree (XPi (mkName $ unpack target) []) [NTree (XText $ unpack dat) []]
conduit2hxt (NodeContent txt) = NTree (XText $ unpack txt) []
conduit2hxt (NodeComment txt) = NTree (XCmt $ unpack txt) []

attr2hxt (name, value) = NTree (XAttr $ n2qn name) [NTree (XText $ unpack value) []]

n2qn (Name local (Just namespace) (Just prefix)) =
    mkQName (unpack prefix) (unpack local) (unpack namespace)
n2qn (Name local Nothing (Just prefix)) =
    mkName (unpack prefix ++ ':' : unpack local)
n2qn (Name local (Just namespace) Nothing) = mkNsName (unpack local) (unpack namespace)
n2qn (Name local Nothing Nothing) = mkName $ unpack local

hxt2cursor node hxt = lookup hxt [
    (conduit2hxt $ X.node cursor, cursor) | cursor <- X.orSelf X.descendant node]
hxts2cursors node hxts = catMaybes $ map' (hxt2cursor node) hxts