~alcinnz/amphiarao

ref: 8e1950465df016493e38832bdd3f229899475877 amphiarao/src/Internal/Elements.hs -rw-r--r-- 695 bytes
8e195046 — Adrian Cochrane Parse web (or Gemini) pages & extract title. 3 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
{-# LANGUAGE OverloadedStrings #-}
module Internal.Elements (getTitle) where

import Text.XML
import qualified Data.Map as M
import Data.Text as Txt
import Control.Concurrent.MVar

import Internal

getTitle :: Session -> IO Text
getTitle session = getTitle' <$> documentRoot <$> document <$> readMVar session

getTitle' (Element "title" _ childs) = Txt.concat [txt | NodeContent txt <- childs]
getTitle' (Element "h1" _ childs) = Txt.concat [txt | NodeContent txt <- childs]
getTitle' (Element _ _ childs)
    -- FIXME: Caught Rhapsode bug repaired here, needs that filtering condition.
    | title:_ <- [getTitle' el | NodeElement el <- childs, getTitle' el /= ""] = title
    | otherwise = ""