{-# LANGUAGE RecordWildCards, PatternGuards #-}

module Text.HTML.TagSoup.Specification(parse) where

import Text.HTML.TagSoup.Implementation
import Data.Char (isAlpha, isAlphaNum, isDigit, toLower)

-- We make some generalisations:
-- <!name is a valid tag start closed by >
-- <?name is a valid tag start closed by ?>
-- </!name> is a valid closing tag
-- </?name> is a valid closing tag
-- <a "foo"> is a valid tag attibute in ! and ?, i.e missing an attribute name
-- We also don't do lowercase conversion
-- Entities are handled without a list of known entity names
-- We don't have RCData, CData or Escape modes (only effects dat and tagOpen)


data TypeTag = TypeNormal -- <foo
             | TypeXml    -- <?foo
             | TypeDecl   -- <!foo
             | TypeScript -- <script
               deriving TypeTag -> TypeTag -> Bool
(TypeTag -> TypeTag -> Bool)
-> (TypeTag -> TypeTag -> Bool) -> Eq TypeTag
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TypeTag -> TypeTag -> Bool
$c/= :: TypeTag -> TypeTag -> Bool
== :: TypeTag -> TypeTag -> Bool
$c== :: TypeTag -> TypeTag -> Bool
Eq


-- 2.4.1 Common parser idioms
white :: Char -> Bool
white x :: Char
x = Char
x Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` " \t\n\f\r"


-- 8.2.4 Tokenization

type Parser = S -> [Out]

parse :: String -> [Out]
parse :: [Char] -> [Out]
parse = Parser
dat Parser -> ([Char] -> S) -> [Char] -> [Out]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> S
state

-- 8.2.4.1 Data state
dat :: Parser
dat :: Parser
dat S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    '&' -> Parser
charReference S
tl
    '<' -> Parser
tagOpen S
tl
    _ | Bool
eof -> []
    _ -> Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
tl


-- 8.2.4.2 Character reference data state
charReference :: Parser
charReference s :: S
s = Parser -> Bool -> Maybe Char -> Parser
charRef Parser
dat Bool
False Maybe Char
forall a. Maybe a
Nothing S
s


-- 8.2.4.3 Tag open state
tagOpen :: Parser
tagOpen S{..} = case Char
hd of
    '!' -> Parser
markupDeclOpen S
tl
    '/' -> Parser
closeTagOpen S
tl
    _ | Char -> Bool
isAlpha Char
hd -> Out
Tag Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
tagName (if S -> Bool
isScript S
s then TypeTag
TypeScript else TypeTag
TypeNormal) S
tl
    '>' -> [Char] -> Out
forall a. Show a => a -> Out
errSeen "<>" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '<' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '>' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
tl
    '?' -> Parser
neilXmlTagOpen S
tl -- NEIL
    _ -> [Char] -> Out
forall a. Show a => a -> Out
errSeen  "<" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '<' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s

isScript :: S -> Bool
isScript = [Char] -> S -> Bool
f "script"
    where
        f :: [Char] -> S -> Bool
f (c :: Char
c:cs :: [Char]
cs) S{..} = Char -> Char
toLower Char
hd Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c Bool -> Bool -> Bool
&& [Char] -> S -> Bool
f [Char]
cs S
tl
        f [] S{..} = Char -> Bool
white Char
hd Bool -> Bool -> Bool
|| Char
hd Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '/' Bool -> Bool -> Bool
|| Char
hd Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '>' Bool -> Bool -> Bool
|| Char
hd Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '?' Bool -> Bool -> Bool
|| Bool
eof


-- seen "<?", emitted []
neilXmlTagOpen :: Parser
neilXmlTagOpen S{..} = case Char
hd of
    _ | Char -> Bool
isAlpha Char
hd -> Out
Tag Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '?' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
tagName TypeTag
TypeXml S
tl
    _ -> [Char] -> Out
forall a. Show a => a -> Out
errSeen "<?" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '<' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '?' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s

-- seen "?", expecting ">"
neilXmlTagClose :: Parser
neilXmlTagClose S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    '>' -> Out
TagEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
tl
    _ -> [Char] -> Out
forall a. Show a => a -> Out
errSeen "?" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
beforeAttName TypeTag
TypeXml S
s


-- just seen ">" at the end, am given tl
neilTagEnd :: TypeTag -> Parser
neilTagEnd typ :: TypeTag
typ S{..}
    | TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeXml = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ [Char] -> Out
forall a. Show a => a -> Out
errWant "?>" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Out
TagEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    | TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeScript = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ Out
TagEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
neilScriptBody S
s
    | Bool
otherwise = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ Out
TagEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s

-- Inside a <script> tag, only break on </script
neilScriptBody :: Parser
neilScriptBody o :: S
o@S{..}
    | Char
hd Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '<', S{..} <- S
tl
    , Char
hd Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '/', S{..} <- S
tl
    , S -> Bool
isScript S
s
    = Parser
dat S
o
    | Bool
eof = []
    | Bool
otherwise =  [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
neilScriptBody S
tl


-- 8.2.4.4 Close tag open state
-- Deviation: We ignore the if CDATA/RCDATA bits and tag matching
-- Deviation: On </> we output </> to the text
-- Deviation: </!name> is a closing tag, not a bogus comment
closeTagOpen :: Parser
closeTagOpen S{..} = case Char
hd of
    _ | Char -> Bool
isAlpha Char
hd Bool -> Bool -> Bool
|| Char
hd Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` "?!" -> Out
TagShut Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
tagName TypeTag
TypeNormal S
tl
    '>' -> [Char] -> Out
forall a. Show a => a -> Out
errSeen "</>" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '<' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '/' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '>' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
tl
    _ | Bool
eof -> '<' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '/' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> [Char] -> Out
forall a. Show a => a -> Out
errWant "tag name" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
bogusComment S
s


-- 8.2.4.5 Tag name state
tagName :: TypeTag -> Parser
tagName typ :: TypeTag
typ S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    _ | Char -> Bool
white Char
hd -> TypeTag -> Parser
beforeAttName TypeTag
typ S
tl
    '/' -> TypeTag -> Parser
selfClosingStartTag TypeTag
typ S
tl
    '>' -> TypeTag -> Parser
neilTagEnd TypeTag
typ S
tl
    '?' | TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeXml -> Parser
neilXmlTagClose S
tl
    _ | Char -> Bool
isAlpha Char
hd -> Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
tagName TypeTag
typ S
tl
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant (if TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeXml then "?>" else ">") Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
tagName TypeTag
typ S
tl


-- 8.2.4.6 Before attribute name state
beforeAttName :: TypeTag -> Parser
beforeAttName typ :: TypeTag
typ S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    _ | Char -> Bool
white Char
hd -> TypeTag -> Parser
beforeAttName TypeTag
typ S
tl
    '/' -> TypeTag -> Parser
selfClosingStartTag TypeTag
typ S
tl
    '>' -> TypeTag -> Parser
neilTagEnd TypeTag
typ S
tl
    '?' | TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeXml -> Parser
neilXmlTagClose S
tl
    _ | TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
/= TypeTag
TypeNormal Bool -> Bool -> Bool
&& Char
hd Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` "\'\"" -> TypeTag -> Parser
beforeAttValue TypeTag
typ S
s -- NEIL
    _ | Char
hd Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` "\"'<=" -> [Char] -> Out
forall a. Show a => a -> Out
errSeen [Char
hd] Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Out
AttName Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
attName TypeTag
typ S
tl
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant (if TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeXml then "?>" else ">") Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> Out
AttName Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
attName TypeTag
typ S
tl


-- 8.2.4.7 Attribute name state
attName :: TypeTag -> Parser
attName typ :: TypeTag
typ S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    _ | Char -> Bool
white Char
hd -> TypeTag -> Parser
afterAttName TypeTag
typ S
tl
    '/' -> TypeTag -> Parser
selfClosingStartTag TypeTag
typ S
tl
    '=' -> TypeTag -> Parser
beforeAttValue TypeTag
typ S
tl
    '>' -> TypeTag -> Parser
neilTagEnd TypeTag
typ S
tl
    '?' | TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeXml -> Parser
neilXmlTagClose S
tl
    _ | Char
hd Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` "\"'<" -> [Char] -> Out
forall a. Show a => a -> Out
errSeen [Char
hd] Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& [Out]
def
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant (if TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeXml then "?>" else ">") Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> [Out]
def
    where def :: [Out]
def = Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
attName TypeTag
typ S
tl


-- 8.2.4.8 After attribute name state
afterAttName :: TypeTag -> Parser
afterAttName typ :: TypeTag
typ S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    _ | Char -> Bool
white Char
hd -> TypeTag -> Parser
afterAttName TypeTag
typ S
tl
    '/' -> TypeTag -> Parser
selfClosingStartTag TypeTag
typ S
tl
    '=' -> TypeTag -> Parser
beforeAttValue TypeTag
typ S
tl
    '>' -> TypeTag -> Parser
neilTagEnd TypeTag
typ S
tl
    '?' | TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeXml -> Parser
neilXmlTagClose S
tl
    _ | TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
/= TypeTag
TypeNormal Bool -> Bool -> Bool
&& Char
hd Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` "\"'" -> Out
AttVal Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
beforeAttValue TypeTag
typ S
s -- NEIL
    _ | Char
hd Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` "\"'<" -> [Char] -> Out
forall a. Show a => a -> Out
errSeen [Char
hd] Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& [Out]
def
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant (if TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeXml then "?>" else ">") Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> [Out]
def
    where def :: [Out]
def = Out
AttName Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
attName TypeTag
typ S
tl

-- 8.2.4.9 Before attribute value state
beforeAttValue :: TypeTag -> Parser
beforeAttValue typ :: TypeTag
typ S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    _ | Char -> Bool
white Char
hd -> TypeTag -> Parser
beforeAttValue TypeTag
typ S
tl
    '\"' -> Out
AttVal Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
attValueDQuoted TypeTag
typ S
tl
    '&' -> Out
AttVal Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
attValueUnquoted TypeTag
typ S
s
    '\'' -> Out
AttVal Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
attValueSQuoted TypeTag
typ S
tl
    '>' -> [Char] -> Out
forall a. Show a => a -> Out
errSeen "=" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
neilTagEnd TypeTag
typ S
tl
    '?' | TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeXml -> Parser
neilXmlTagClose S
tl
    _ | Char
hd Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` "<=" -> [Char] -> Out
forall a. Show a => a -> Out
errSeen [Char
hd] Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& [Out]
def
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant (if TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeXml then "?>" else ">") Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> [Out]
def
    where def :: [Out]
def = Out
AttVal Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
attValueUnquoted TypeTag
typ S
tl


-- 8.2.4.10 Attribute value (double-quoted) state
attValueDQuoted :: TypeTag -> Parser
attValueDQuoted typ :: TypeTag
typ S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    '\"' -> TypeTag -> Parser
afterAttValueQuoted TypeTag
typ S
tl
    '&' -> Parser -> Maybe Char -> Parser
charRefAttValue (TypeTag -> Parser
attValueDQuoted TypeTag
typ) (Char -> Maybe Char
forall a. a -> Maybe a
Just '\"') S
tl
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant "\"" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
attValueDQuoted TypeTag
typ S
tl


-- 8.2.4.11 Attribute value (single-quoted) state
attValueSQuoted :: TypeTag -> Parser
attValueSQuoted typ :: TypeTag
typ S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    '\'' -> TypeTag -> Parser
afterAttValueQuoted TypeTag
typ S
tl
    '&' -> Parser -> Maybe Char -> Parser
charRefAttValue (TypeTag -> Parser
attValueSQuoted TypeTag
typ) (Char -> Maybe Char
forall a. a -> Maybe a
Just '\'') S
tl
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant "\'" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
attValueSQuoted TypeTag
typ S
tl


-- 8.2.4.12 Attribute value (unquoted) state
attValueUnquoted :: TypeTag -> Parser
attValueUnquoted typ :: TypeTag
typ S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    _ | Char -> Bool
white Char
hd -> TypeTag -> Parser
beforeAttName TypeTag
typ S
tl
    '&' -> Parser -> Maybe Char -> Parser
charRefAttValue (TypeTag -> Parser
attValueUnquoted TypeTag
typ) Maybe Char
forall a. Maybe a
Nothing S
tl
    '>' -> TypeTag -> Parser
neilTagEnd TypeTag
typ S
tl
    '?' | TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeXml -> Parser
neilXmlTagClose S
tl
    _ | Char
hd Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` "\"'<=" -> [Char] -> Out
forall a. Show a => a -> Out
errSeen [Char
hd] Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& [Out]
def
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant (if TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeXml then "?>" else ">") Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> [Out]
def
    where def :: [Out]
def = Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
attValueUnquoted TypeTag
typ S
tl


-- 8.2.4.13 Character reference in attribute value state
charRefAttValue :: Parser -> Maybe Char -> Parser
charRefAttValue :: Parser -> Maybe Char -> Parser
charRefAttValue resume :: Parser
resume c :: Maybe Char
c s :: S
s = Parser -> Bool -> Maybe Char -> Parser
charRef Parser
resume Bool
True Maybe Char
c S
s


-- 8.2.4.14 After attribute value (quoted) state
afterAttValueQuoted :: TypeTag -> Parser
afterAttValueQuoted typ :: TypeTag
typ S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    _ | Char -> Bool
white Char
hd -> TypeTag -> Parser
beforeAttName TypeTag
typ S
tl
    '/' -> TypeTag -> Parser
selfClosingStartTag TypeTag
typ S
tl
    '>' -> TypeTag -> Parser
neilTagEnd TypeTag
typ S
tl
    '?' | TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeXml -> Parser
neilXmlTagClose S
tl
    _ | Bool
eof -> Parser
dat S
s
    _ -> [Char] -> Out
forall a. Show a => a -> Out
errSeen [Char
hd] Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
beforeAttName TypeTag
typ S
s


-- 8.2.4.15 Self-closing start tag state
selfClosingStartTag :: TypeTag -> Parser
selfClosingStartTag typ :: TypeTag
typ S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    _ | TypeTag
typ TypeTag -> TypeTag -> Bool
forall a. Eq a => a -> a -> Bool
== TypeTag
TypeXml -> [Char] -> Out
forall a. Show a => a -> Out
errSeen "/" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
beforeAttName TypeTag
typ S
s
    '>' -> Out
TagEndClose Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
tl
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant ">" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> [Char] -> Out
forall a. Show a => a -> Out
errSeen "/" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
beforeAttName TypeTag
typ S
s


-- 8.2.4.16 Bogus comment state
bogusComment :: Parser
bogusComment S{..} = Out
Comment Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
bogusComment1 S
s
bogusComment1 :: Parser
bogusComment1 S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    '>' -> Out
CommentEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
tl
    _ | Bool
eof -> Out
CommentEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
bogusComment1 S
tl


-- 8.2.4.17 Markup declaration open state
markupDeclOpen :: Parser
markupDeclOpen S{..} = case Char
hd of
    _ | Just s :: S
s <- [Char] -> Maybe S
next "--" -> Out
Comment Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
commentStart S
s
    _ | Char -> Bool
isAlpha Char
hd -> Out
Tag Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '!' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& TypeTag -> Parser
tagName TypeTag
TypeDecl S
tl -- NEIL
    _ | Just s :: S
s <- [Char] -> Maybe S
next "[CDATA[" -> Parser
cdataSection S
s
    _ -> [Char] -> Out
forall a. Show a => a -> Out
errWant "tag name" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
bogusComment S
s


-- 8.2.4.18 Comment start state
commentStart :: Parser
commentStart S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    '-' -> Parser
commentStartDash S
tl
    '>' -> [Char] -> Out
forall a. Show a => a -> Out
errSeen "<!-->" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Out
CommentEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
tl
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant "-->" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Out
CommentEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
comment S
tl


-- 8.2.4.19 Comment start dash state
commentStartDash :: Parser
commentStartDash S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    '-' -> Parser
commentEnd S
tl
    '>' -> [Char] -> Out
forall a. Show a => a -> Out
errSeen "<!--->" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Out
CommentEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
tl
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant "-->" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Out
CommentEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> '-' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
comment S
tl


-- 8.2.4.20 Comment state
comment :: Parser
comment S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    '-' -> Parser
commentEndDash S
tl
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant "-->" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Out
CommentEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
comment S
tl


-- 8.2.4.21 Comment end dash state
commentEndDash :: Parser
commentEndDash S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    '-' -> Parser
commentEnd S
tl
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant "-->" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Out
CommentEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> '-' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
comment S
tl


-- 8.2.4.22 Comment end state
commentEnd :: Parser
commentEnd S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    '>' -> Out
CommentEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
tl
    '-' -> [Char] -> Out
forall a. Show a => a -> Out
errWant "-->" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '-' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
commentEnd S
tl
    _ | Char -> Bool
white Char
hd -> [Char] -> Out
forall a. Show a => a -> Out
errSeen "--" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '-' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '-' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
commentEndSpace S
tl
    '!' -> [Char] -> Out
forall a. Show a => a -> Out
errSeen "!" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
commentEndBang S
tl
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant "-->" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Out
CommentEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> [Char] -> Out
forall a. Show a => a -> Out
errSeen "--" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '-' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '-' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
comment S
tl


-- 8.2.4.23 Comment end bang state
commentEndBang :: Parser
commentEndBang S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    '>' -> Out
CommentEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
tl
    '-' -> '-' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '-' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '!' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
commentEndDash S
tl
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant "-->" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Out
CommentEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> '-' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '-' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '!' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
comment S
tl


-- 8.2.4.24 Comment end space state
commentEndSpace :: Parser
commentEndSpace S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    '>' -> Out
CommentEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
tl
    '-' -> Parser
commentEndDash S
tl
    _ | Char -> Bool
white Char
hd -> Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
commentEndSpace S
tl
    _ | Bool
eof -> [Char] -> Out
forall a. Show a => a -> Out
errWant "-->" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Out
CommentEnd Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
dat S
s
    _ -> Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
comment S
tl


-- 8.2.4.38 CDATA section state
cdataSection :: Parser
cdataSection S{..} = [Out] -> [Out]
pos ([Out] -> [Out]) -> [Out] -> [Out]
forall a b. (a -> b) -> a -> b
$ case Char
hd of
    _ | Just s :: S
s <- [Char] -> Maybe S
next "]]>" -> Parser
dat S
s
    _ | Bool
eof -> Parser
dat S
s
    _ | Bool
otherwise -> Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
cdataSection S
tl


-- 8.2.4.39 Tokenizing character references
-- Change from spec: this is reponsible for writing '&' if nothing is to be written
charRef :: Parser -> Bool -> Maybe Char -> S -> [Out]
charRef :: Parser -> Bool -> Maybe Char -> Parser
charRef resume :: Parser
resume att :: Bool
att end :: Maybe Char
end S{..} = case Char
hd of
    _ | Bool
eof Bool -> Bool -> Bool
|| Char
hd Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` "\t\n\f <&" Bool -> Bool -> Bool
|| Bool -> (Char -> Bool) -> Maybe Char -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
hd) Maybe Char
end -> '&' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
resume S
s
    '#' -> Parser -> S -> Parser
charRefNum Parser
resume S
s S
tl
    _ -> Parser -> Bool -> Parser
charRefAlpha Parser
resume Bool
att S
s

charRefNum :: Parser -> S -> Parser
charRefNum resume :: Parser
resume o :: S
o S{..} = case Char
hd of
    _ | Char
hd Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` "xX" -> Parser -> S -> Bool -> Parser
charRefNum2 Parser
resume S
o Bool
True S
tl
    _ -> Parser -> S -> Bool -> Parser
charRefNum2 Parser
resume S
o Bool
False S
s

charRefNum2 :: Parser -> S -> Bool -> Parser
charRefNum2 resume :: Parser
resume o :: S
o hex :: Bool
hex S{..} = case Char
hd of
    _ | Bool -> Char -> Bool
hexChar Bool
hex Char
hd -> (if Bool
hex then Out
EntityHex else Out
EntityNum) Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser -> Bool -> Parser
charRefNum3 Parser
resume Bool
hex S
tl
    _ -> [Char] -> Out
forall a. Show a => a -> Out
errSeen "&" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '&' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
resume S
o

charRefNum3 :: Parser -> Bool -> Parser
charRefNum3 resume :: Parser
resume hex :: Bool
hex S{..} = case Char
hd of
    _ | Bool -> Char -> Bool
hexChar Bool
hex Char
hd -> Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser -> Bool -> Parser
charRefNum3 Parser
resume Bool
hex S
tl
    ';' -> Bool -> Out
EntityEnd Bool
True Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
resume S
tl
    _ -> Bool -> Out
EntityEnd Bool
False Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& [Char] -> Out
forall a. Show a => a -> Out
errWant ";" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
resume S
s

charRefAlpha :: Parser -> Bool -> Parser
charRefAlpha resume :: Parser
resume att :: Bool
att S{..} = case Char
hd of
    _ | Char -> Bool
isAlpha Char
hd -> Out
EntityName Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser -> Bool -> Parser
charRefAlpha2 Parser
resume Bool
att S
tl
    _ -> [Char] -> Out
forall a. Show a => a -> Out
errSeen "&" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& '&' Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
resume S
s

charRefAlpha2 :: Parser -> Bool -> Parser
charRefAlpha2 resume :: Parser
resume att :: Bool
att S{..} = case Char
hd of
    _ | Char -> Bool
alphaChar Char
hd -> Char
hd Char -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser -> Bool -> Parser
charRefAlpha2 Parser
resume Bool
att S
tl
    ';' -> Bool -> Out
EntityEnd Bool
True Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
resume S
tl
    _ | Bool
att -> Bool -> Out
EntityEnd Bool
False Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
resume S
s
    _ -> Bool -> Out
EntityEnd Bool
False Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& [Char] -> Out
forall a. Show a => a -> Out
errWant ";" Out -> [Out] -> [Out]
forall a. Outable a => a -> [Out] -> [Out]
& Parser
resume S
s


alphaChar :: Char -> Bool
alphaChar x :: Char
x = Char -> Bool
isAlphaNum Char
x Bool -> Bool -> Bool
|| Char
x Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ":-_"

hexChar :: Bool -> Char -> Bool
hexChar False x :: Char
x = Char -> Bool
isDigit Char
x
hexChar True  x :: Char
x = Char -> Bool
isDigit Char
x Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'a' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'f') Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'A' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'F')