~alcinnz/amphiarao

b8351f834a84aaae9a523cb90c57a78bbc05e39e — Adrian Cochrane 3 years ago 23e9751
Support searching for links by label, both in JSON & HTML interfaces
3 files changed, 34 insertions(+), 5 deletions(-)

M src/Internal/Elements.hs
M src/Messages.hs
M src/UI/Search.hs
M src/Internal/Elements.hs => src/Internal/Elements.hs +26 -3
@@ 9,23 9,31 @@ import Control.Concurrent.MVar
import Internal

import Data.Aeson
import Data.Text (Text, pack)
import Data.Text as Txt (Text, pack, append)
import GHC.Generics

-- Selector engines
import qualified Text.XML.Cursor as X
import qualified XML.Selectors.CSS as CSS

import Network.URI (parseURIReference)
import Data.Maybe

getTitle :: Session -> IO Text
getTitle session = getTitle' <$> documentRoot <$> document <$> readMVar session

getTitle' (Element "title" _ childs) = Txt.concat [txt | NodeContent txt <- childs]
getTitle' (Element "h1" _ childs) = Txt.concat [txt | NodeContent txt <- childs]
getTitle' (Element "title" _ childs) = nodesText childs
getTitle' (Element "h1" _ childs) = nodesText childs
getTitle' (Element _ _ childs)
    -- FIXME: Caught Rhapsode bug repaired here, needs that filtering condition.
    | title:_ <- [getTitle' el | NodeElement el <- childs, getTitle' el /= ""] = title
    | otherwise = ""

nodesText (NodeElement (Element _ _ childs):nodes) = nodesText childs `append` nodesText nodes
nodesText (NodeContent txt:nodes) = txt `append` nodesText nodes
nodesText (_:nodes) = nodesText nodes
nodesText [] = ""

---

data Find = Find { using :: Text, value :: String } deriving Generic


@@ 34,4 42,19 @@ find :: Find -> X.Cursor -> Either (Bool, String) [X.Cursor]
find (Find "css selector" sel) root = case CSS.parsePath sel of
    Right sel' -> Right $ CSS.toAxis sel' root
    Left msg -> Left (True, msg)
find (Find "link text" sel) root = Right $ allLinks (== pack sel) root
find (Find "partial link text" sel) root = Right $ allLinks (Txt.isInfixOf $ pack sel) root
find (Find type_ _) _ = Left (False, "Invalid selector type: " ++ Txt.unpack type_)

allLinks test = X.descendant X.>=>
    -- Missing some misc. elements Rhapsode treats as links
    (X.hasAttribute "src" `union` X.hasAttribute "href") X.>=>
    X.checkElement test'
  where
    test' (Element _ attrs childs) =
        isJust (parseURIReference <$> unpack <$>
            (M.lookup "src" attrs *> M.lookup "href" attrs)) ||
        -- Emulate Rhapsode's mandatory whitespace-collapse
        test (Txt.unwords $ Txt.words $ nodesText childs)

union a b cursor = a cursor ++ b cursor

M src/Messages.hs => src/Messages.hs +5 -1
@@ 15,7 15,9 @@ data Message =
    ElementNotFound |
    NoResults |
    PromptPreview |
    ErrURL deriving Show
    ErrURL |
    LinkSearchExact | LinkSearch
    deriving Show

l :: [Text] -> Message -> Html
---- Begin localizations


@@ 40,6 42,8 @@ l ("en":_) PromptPreview = em "Click a search result to preview it here"
l ("en":_) ErrURL = do
    h1 "Invalid Link!"
    p "The provided URL was not absolute."
l ("en":_) LinkSearchExact = "Links (Exact)"
l ("en":_) LinkSearch = "Links"
---- End localizations
l (_:langs) msg = l langs msg
l [] msg = string $ show msg

M src/UI/Search.hs => src/UI/Search.hs +3 -1
@@ 25,7 25,9 @@ engines :: [(
  )]
engines = [
    (const "URL", offerToLoad),
    (const "CSS", queryEls "css selector")
    (const "CSS", queryEls "css selector"),
    (flip l LinkSearchExact, queryEls "link text"), -- Only useful if it promotes results higher.
    (flip l LinkSearch, queryEls "partial link text")
  ]

result href' = a ! href (stringValue href') ! target "preview"