From b8351f834a84aaae9a523cb90c57a78bbc05e39e Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Sun, 27 Jun 2021 09:56:43 +1200 Subject: [PATCH] Support searching for links by label, both in JSON & HTML interfaces --- src/Internal/Elements.hs | 29 ++++++++++++++++++++++++++--- src/Messages.hs | 6 +++++- src/UI/Search.hs | 4 +++- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/Internal/Elements.hs b/src/Internal/Elements.hs index 82493c5..f82bc87 100644 --- a/src/Internal/Elements.hs +++ b/src/Internal/Elements.hs @@ -9,23 +9,31 @@ import Control.Concurrent.MVar import Internal import Data.Aeson -import Data.Text (Text, pack) +import Data.Text as Txt (Text, pack, append) import GHC.Generics -- Selector engines import qualified Text.XML.Cursor as X import qualified XML.Selectors.CSS as CSS +import Network.URI (parseURIReference) +import Data.Maybe + getTitle :: Session -> IO Text getTitle session = getTitle' <$> documentRoot <$> document <$> readMVar session -getTitle' (Element "title" _ childs) = Txt.concat [txt | NodeContent txt <- childs] -getTitle' (Element "h1" _ childs) = Txt.concat [txt | NodeContent txt <- childs] +getTitle' (Element "title" _ childs) = nodesText childs +getTitle' (Element "h1" _ childs) = nodesText childs getTitle' (Element _ _ childs) -- FIXME: Caught Rhapsode bug repaired here, needs that filtering condition. | title:_ <- [getTitle' el | NodeElement el <- childs, getTitle' el /= ""] = title | otherwise = "" +nodesText (NodeElement (Element _ _ childs):nodes) = nodesText childs `append` nodesText nodes +nodesText (NodeContent txt:nodes) = txt `append` nodesText nodes +nodesText (_:nodes) = nodesText nodes +nodesText [] = "" + --- data Find = Find { using :: Text, value :: String } deriving Generic @@ -34,4 +42,19 @@ find :: Find -> X.Cursor -> Either (Bool, String) [X.Cursor] find (Find "css selector" sel) root = case CSS.parsePath sel of Right sel' -> Right $ CSS.toAxis sel' root Left msg -> Left (True, msg) +find (Find "link text" sel) root = Right $ allLinks (== pack sel) root +find (Find "partial link text" sel) root = Right $ allLinks (Txt.isInfixOf $ pack sel) root find (Find type_ _) _ = Left (False, "Invalid selector type: " ++ Txt.unpack type_) + +allLinks test = X.descendant X.>=> + -- Missing some misc. elements Rhapsode treats as links + (X.hasAttribute "src" `union` X.hasAttribute "href") X.>=> + X.checkElement test' + where + test' (Element _ attrs childs) = + isJust (parseURIReference <$> unpack <$> + (M.lookup "src" attrs *> M.lookup "href" attrs)) || + -- Emulate Rhapsode's mandatory whitespace-collapse + test (Txt.unwords $ Txt.words $ nodesText childs) + +union a b cursor = a cursor ++ b cursor diff --git a/src/Messages.hs b/src/Messages.hs index ca37c18..bedc7c3 100644 --- a/src/Messages.hs +++ b/src/Messages.hs @@ -15,7 +15,9 @@ data Message = ElementNotFound | NoResults | PromptPreview | - ErrURL deriving Show + ErrURL | + LinkSearchExact | LinkSearch + deriving Show l :: [Text] -> Message -> Html ---- Begin localizations @@ -40,6 +42,8 @@ l ("en":_) PromptPreview = em "Click a search result to preview it here" l ("en":_) ErrURL = do h1 "Invalid Link!" p "The provided URL was not absolute." +l ("en":_) LinkSearchExact = "Links (Exact)" +l ("en":_) LinkSearch = "Links" ---- End localizations l (_:langs) msg = l langs msg l [] msg = string $ show msg diff --git a/src/UI/Search.hs b/src/UI/Search.hs index d46b34f..f3d98b7 100644 --- a/src/UI/Search.hs +++ b/src/UI/Search.hs @@ -25,7 +25,9 @@ engines :: [( )] engines = [ (const "URL", offerToLoad), - (const "CSS", queryEls "css selector") + (const "CSS", queryEls "css selector"), + (flip l LinkSearchExact, queryEls "link text"), -- Only useful if it promotes results higher. + (flip l LinkSearch, queryEls "partial link text") ] result href' = a ! href (stringValue href') ! target "preview" -- 2.30.2