-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.Arrow.XmlRegex
   Copyright  : Copyright (C) 2008 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : experimental
   Portability: portable

   Regular Expression Matcher working on lists of XmlTrees

   It's intended to import this module with an explicit
   import declaration for not spoiling the namespace
   with these somewhat special arrows

-}

-- ------------------------------------------------------------

module Text.XML.HXT.Arrow.XmlRegex
    ( XmlRegex
    , mkZero
    , mkUnit
    , mkPrim
    , mkPrim'
    , mkPrimA
    , mkDot
    , mkStar
    , mkAlt
    , mkAlts
    , mkSeq
    , mkSeqs
    , mkRep
    , mkRng
    , mkOpt
    , mkPerm
    , mkPerms
    , mkMerge
    , nullable
    , delta
    , matchXmlRegex
    , splitXmlRegex
    , scanXmlRegex
    , matchRegexA
    , splitRegexA
    , scanRegexA
    )
where

import           Control.Arrow.ListArrows

import           Data.Maybe

import           Text.XML.HXT.DOM.Interface
import           Text.XML.HXT.DOM.ShowXml   (xshow)

-- ------------------------------------------------------------
-- the exported regex arrows

-- | check whether a sequence of XmlTrees match an Xml regular expression
--
-- The arrow for 'matchXmlRegex'.
--
-- The expession is build up from simple arrows acting as predicate ('mkPrimA') for
-- an XmlTree and of the usual cobinators for sequence ('mkSeq'), repetition
-- ('mkStar', mkRep', 'mkRng') and choice ('mkAlt', 'mkOpt')

matchRegexA             :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees
matchRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees
matchRegexA re :: XmlRegex
re ts :: LA XmlTree XmlTree
ts       = LA XmlTree XmlTree
ts LA XmlTree XmlTree
-> (XmlTrees -> [XmlTrees]) -> LA XmlTree XmlTrees
forall (a :: * -> * -> *) b c d.
ArrowList a =>
a b c -> ([c] -> [d]) -> a b d
>>. (\ s :: XmlTrees
s -> [XmlTrees] -> (String -> [XmlTrees]) -> Maybe String -> [XmlTrees]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [XmlTrees
s] ([XmlTrees] -> String -> [XmlTrees]
forall a b. a -> b -> a
const []) (Maybe String -> [XmlTrees])
-> (XmlTrees -> Maybe String) -> XmlTrees -> [XmlTrees]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlRegex -> XmlTrees -> Maybe String
matchXmlRegex XmlRegex
re (XmlTrees -> [XmlTrees]) -> XmlTrees -> [XmlTrees]
forall a b. (a -> b) -> a -> b
$ XmlTrees
s)

-- | split the sequence of trees computed by the filter a into
--
-- The arrow for 'splitXmlRegex'.
--
-- a first part matching the regex and a rest,
-- if a prefix of the input sequence does not match the regex, the arrow fails
-- else the pair containing the result lists is returned

splitRegexA             :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree (XmlTrees, XmlTrees)
splitRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree (XmlTrees, XmlTrees)
splitRegexA re :: XmlRegex
re ts :: LA XmlTree XmlTree
ts       = LA XmlTree XmlTree
ts LA XmlTree XmlTree
-> (XmlTrees -> [(XmlTrees, XmlTrees)])
-> LA XmlTree (XmlTrees, XmlTrees)
forall (a :: * -> * -> *) b c d.
ArrowList a =>
a b c -> ([c] -> [d]) -> a b d
>>. (Maybe (XmlTrees, XmlTrees) -> [(XmlTrees, XmlTrees)]
forall a. Maybe a -> [a]
maybeToList (Maybe (XmlTrees, XmlTrees) -> [(XmlTrees, XmlTrees)])
-> (XmlTrees -> Maybe (XmlTrees, XmlTrees))
-> XmlTrees
-> [(XmlTrees, XmlTrees)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex XmlRegex
re)

-- | scan the input sequence with a regex and give the result as a list of lists of trees back
-- the regex must at least match one input tree, so the empty sequence should not match the regex
--
-- The arrow for 'scanXmlRegex'.

scanRegexA              :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees
scanRegexA :: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees
scanRegexA re :: XmlRegex
re ts :: LA XmlTree XmlTree
ts        = LA XmlTree XmlTree
ts LA XmlTree XmlTree
-> (XmlTrees -> [XmlTrees]) -> LA XmlTree XmlTrees
forall (a :: * -> * -> *) b c d.
ArrowList a =>
a b c -> ([c] -> [d]) -> a b d
>>. ([XmlTrees] -> Maybe [XmlTrees] -> [XmlTrees]
forall a. a -> Maybe a -> a
fromMaybe [] (Maybe [XmlTrees] -> [XmlTrees])
-> (XmlTrees -> Maybe [XmlTrees]) -> XmlTrees -> [XmlTrees]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlRegex -> XmlTrees -> Maybe [XmlTrees]
scanXmlRegex XmlRegex
re)

-- ------------------------------------------------------------

data XmlRegex   = Zero String
                | Unit
                | Sym  (XmlTree -> Bool) String    -- optional external repr. of predicate
                | Dot
                | Star  XmlRegex
                | Alt   XmlRegex XmlRegex
                | Seq   XmlRegex XmlRegex
                | Rep   Int      XmlRegex          -- 1 or more repetitions
                | Rng   Int Int  XmlRegex          -- n..m repetitions
                | Perm  XmlRegex XmlRegex
                | Merge XmlRegex XmlRegex

-- ------------------------------------------------------------

{- just for documentation

class Inv a where
    inv         :: a -> Bool

instance Inv XmlRegex where
    inv (Zero _)        = True
    inv Unit            = True
    inv (Sym p _)       = p holds for some XmlTrees
    inv Dot             = True
    inv (Star e)        = inv e
    inv (Alt e1 e2)     = inv e1 &&
                          inv e2
    inv (Seq e1 e2)     = inv e1 &&
                          inv e2
    inv (Rep i e)       = i > 0 && inv e
    inv (Rng i j e)     = (i < j || (i == j && i > 1)) &&
                          inv e
    inv (Perm e1 e2)    = inv e1 &&
                          inv e2
-}
-- ------------------------------------------------------------
--
-- smart constructors

mkZero          :: String -> XmlRegex
mkZero :: String -> XmlRegex
mkZero          = String -> XmlRegex
Zero

mkUnit          :: XmlRegex
mkUnit :: XmlRegex
mkUnit          = XmlRegex
Unit

mkPrim          :: (XmlTree -> Bool) -> XmlRegex
mkPrim :: (XmlTree -> Bool) -> XmlRegex
mkPrim p :: XmlTree -> Bool
p        = (XmlTree -> Bool) -> String -> XmlRegex
Sym XmlTree -> Bool
p ""

mkPrim'         :: (XmlTree -> Bool) -> String -> XmlRegex
mkPrim' :: (XmlTree -> Bool) -> String -> XmlRegex
mkPrim'         = (XmlTree -> Bool) -> String -> XmlRegex
Sym

mkPrimA         :: LA XmlTree XmlTree -> XmlRegex
mkPrimA :: LA XmlTree XmlTree -> XmlRegex
mkPrimA a :: LA XmlTree XmlTree
a       = (XmlTree -> Bool) -> XmlRegex
mkPrim (Bool -> Bool
not (Bool -> Bool) -> (XmlTree -> Bool) -> XmlTree -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlTrees -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (XmlTrees -> Bool) -> (XmlTree -> XmlTrees) -> XmlTree -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. LA XmlTree XmlTree -> XmlTree -> XmlTrees
forall a b. LA a b -> a -> [b]
runLA LA XmlTree XmlTree
a)

mkDot           :: XmlRegex
mkDot :: XmlRegex
mkDot           = XmlRegex
Dot

mkStar                  :: XmlRegex -> XmlRegex
mkStar :: XmlRegex -> XmlRegex
mkStar (Zero _)         = XmlRegex
mkUnit                -- {}* == ()
mkStar e :: XmlRegex
e@XmlRegex
Unit           = XmlRegex
e                     -- ()* == ()
mkStar e :: XmlRegex
e@(Star _e1 :: XmlRegex
_e1)     = XmlRegex
e                     -- (r*)* == r*
mkStar (Rep 1 e1 :: XmlRegex
e1)       = XmlRegex -> XmlRegex
mkStar XmlRegex
e1             -- (r+)* == r*
mkStar e :: XmlRegex
e@(Alt _ _)      = XmlRegex -> XmlRegex
Star (XmlRegex -> XmlRegex
rmStar XmlRegex
e)       -- (a*|b)* == (a|b)*
mkStar e :: XmlRegex
e                = XmlRegex -> XmlRegex
Star XmlRegex
e

rmStar  :: XmlRegex -> XmlRegex
rmStar :: XmlRegex -> XmlRegex
rmStar (Alt e1 :: XmlRegex
e1 e2 :: XmlRegex
e2)      = XmlRegex -> XmlRegex -> XmlRegex
mkAlt (XmlRegex -> XmlRegex
rmStar XmlRegex
e1) (XmlRegex -> XmlRegex
rmStar XmlRegex
e2)
rmStar (Star e1 :: XmlRegex
e1)        = XmlRegex -> XmlRegex
rmStar XmlRegex
e1
rmStar (Rep 1 e1 :: XmlRegex
e1)       = XmlRegex -> XmlRegex
rmStar XmlRegex
e1
rmStar e1 :: XmlRegex
e1               = XmlRegex
e1

mkAlt                                   :: XmlRegex -> XmlRegex -> XmlRegex
mkAlt :: XmlRegex -> XmlRegex -> XmlRegex
mkAlt e1 :: XmlRegex
e1            (Zero _)            = XmlRegex
e1                            -- e1 u {} = e1
mkAlt (Zero _)      e2 :: XmlRegex
e2                  = XmlRegex
e2                            -- {} u e2 = e2
mkAlt e1 :: XmlRegex
e1@(Star Dot) _e2 :: XmlRegex
_e2                 = XmlRegex
e1                            -- A* u e1 = A*
mkAlt _e1 :: XmlRegex
_e1           e2 :: XmlRegex
e2@(Star Dot)       = XmlRegex
e2                            -- e1 u A* = A*
mkAlt (Sym p1 :: XmlTree -> Bool
p1 e1 :: String
e1)   (Sym p2 :: XmlTree -> Bool
p2 e2 :: String
e2)         = (XmlTree -> Bool) -> String -> XmlRegex
mkPrim' (\ x :: XmlTree
x -> XmlTree -> Bool
p1 XmlTree
x Bool -> Bool -> Bool
|| XmlTree -> Bool
p2 XmlTree
x)  (String -> String -> String
e String
e1 String
e2) -- melting of predicates
                                          where
                                            e :: String -> String -> String
e "" x2 :: String
x2 = String
x2
                                            e x1 :: String
x1 "" = String
x1
                                            e x1 :: String
x1 x2 :: String
x2 = String
x1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ "|" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
x2
mkAlt e1 :: XmlRegex
e1            e2 :: XmlRegex
e2@(Sym _ _)        = XmlRegex -> XmlRegex -> XmlRegex
mkAlt XmlRegex
e2 XmlRegex
e1                   -- symmetry: predicates always first
mkAlt e1 :: XmlRegex
e1@(Sym _ _)  (Alt e2 :: XmlRegex
e2@(Sym _ _) e3 :: XmlRegex
e3)
                                        = XmlRegex -> XmlRegex -> XmlRegex
mkAlt (XmlRegex -> XmlRegex -> XmlRegex
mkAlt XmlRegex
e1 XmlRegex
e2) XmlRegex
e3        -- prepare melting of predicates
mkAlt (Alt e1 :: XmlRegex
e1 e2 :: XmlRegex
e2)   e3 :: XmlRegex
e3                  = XmlRegex -> XmlRegex -> XmlRegex
mkAlt XmlRegex
e1 (XmlRegex -> XmlRegex -> XmlRegex
mkAlt XmlRegex
e2 XmlRegex
e3)        -- associativity
mkAlt e1 :: XmlRegex
e1 e2 :: XmlRegex
e2                             = XmlRegex -> XmlRegex -> XmlRegex
Alt XmlRegex
e1 XmlRegex
e2

mkAlts                          :: [XmlRegex] -> XmlRegex
mkAlts :: [XmlRegex] -> XmlRegex
mkAlts                          = (XmlRegex -> XmlRegex -> XmlRegex)
-> XmlRegex -> [XmlRegex] -> XmlRegex
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr XmlRegex -> XmlRegex -> XmlRegex
mkAlt (String -> XmlRegex
mkZero "")

mkSeq                           :: XmlRegex -> XmlRegex -> XmlRegex
mkSeq :: XmlRegex -> XmlRegex -> XmlRegex
mkSeq e1 :: XmlRegex
e1@(Zero _) _e2 :: XmlRegex
_e2           = XmlRegex
e1
mkSeq _e1 :: XmlRegex
_e1         e2 :: XmlRegex
e2@(Zero _)   = XmlRegex
e2
mkSeq Unit        e2 :: XmlRegex
e2            = XmlRegex
e2
mkSeq e1 :: XmlRegex
e1          Unit          = XmlRegex
e1
mkSeq (Seq e1 :: XmlRegex
e1 e2 :: XmlRegex
e2) e3 :: XmlRegex
e3            = XmlRegex -> XmlRegex -> XmlRegex
mkSeq XmlRegex
e1 (XmlRegex -> XmlRegex -> XmlRegex
mkSeq XmlRegex
e2 XmlRegex
e3)
mkSeq e1 :: XmlRegex
e1 e2 :: XmlRegex
e2                     = XmlRegex -> XmlRegex -> XmlRegex
Seq XmlRegex
e1 XmlRegex
e2

mkSeqs                          :: [XmlRegex] -> XmlRegex
mkSeqs :: [XmlRegex] -> XmlRegex
mkSeqs                          = (XmlRegex -> XmlRegex -> XmlRegex)
-> XmlRegex -> [XmlRegex] -> XmlRegex
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr XmlRegex -> XmlRegex -> XmlRegex
mkSeq XmlRegex
mkUnit

mkRep           :: Int -> XmlRegex -> XmlRegex
mkRep :: Int -> XmlRegex -> XmlRegex
mkRep 0 e :: XmlRegex
e                       = XmlRegex -> XmlRegex
mkStar XmlRegex
e
mkRep _ e :: XmlRegex
e@(Zero _)              = XmlRegex
e
mkRep _ e :: XmlRegex
e@XmlRegex
Unit                  = XmlRegex
e
mkRep i :: Int
i e :: XmlRegex
e                       = Int -> XmlRegex -> XmlRegex
Rep Int
i XmlRegex
e

mkRng   :: Int -> Int -> XmlRegex -> XmlRegex
mkRng :: Int -> Int -> XmlRegex -> XmlRegex
mkRng 0  0  _e :: XmlRegex
_e                  = XmlRegex
mkUnit
mkRng 1  1  e :: XmlRegex
e                   = XmlRegex
e
mkRng lb :: Int
lb ub :: Int
ub _e :: XmlRegex
_e
    | Int
lb Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
ub                   = String -> XmlRegex
Zero (String -> XmlRegex) -> String -> XmlRegex
forall a b. (a -> b) -> a -> b
$
                                  "illegal range " String -> String -> String
forall a. [a] -> [a] -> [a]
++
                                  Int -> String
forall a. Show a => a -> String
show Int
lb String -> String -> String
forall a. [a] -> [a] -> [a]
++ ".." String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
ub
mkRng _l :: Int
_l _u :: Int
_u e :: XmlRegex
e@(Zero _)          = XmlRegex
e
mkRng _l :: Int
_l _u :: Int
_u e :: XmlRegex
e@XmlRegex
Unit              = XmlRegex
e
mkRng lb :: Int
lb ub :: Int
ub e :: XmlRegex
e                   = Int -> Int -> XmlRegex -> XmlRegex
Rng Int
lb Int
ub XmlRegex
e

mkOpt   :: XmlRegex -> XmlRegex
mkOpt :: XmlRegex -> XmlRegex
mkOpt   = Int -> Int -> XmlRegex -> XmlRegex
mkRng 0 1

mkPerm                           :: XmlRegex -> XmlRegex -> XmlRegex
mkPerm :: XmlRegex -> XmlRegex -> XmlRegex
mkPerm e1 :: XmlRegex
e1@(Zero _) _             = XmlRegex
e1
mkPerm _           e2 :: XmlRegex
e2@(Zero _)   = XmlRegex
e2
mkPerm Unit        e2 :: XmlRegex
e2            = XmlRegex
e2
mkPerm e1 :: XmlRegex
e1          Unit          = XmlRegex
e1
mkPerm e1 :: XmlRegex
e1          e2 :: XmlRegex
e2            = XmlRegex -> XmlRegex -> XmlRegex
Perm XmlRegex
e1 XmlRegex
e2

mkPerms                          :: [XmlRegex] -> XmlRegex
mkPerms :: [XmlRegex] -> XmlRegex
mkPerms                          = (XmlRegex -> XmlRegex -> XmlRegex)
-> XmlRegex -> [XmlRegex] -> XmlRegex
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr XmlRegex -> XmlRegex -> XmlRegex
mkPerm XmlRegex
mkUnit

mkMerge                          :: XmlRegex -> XmlRegex -> XmlRegex
mkMerge :: XmlRegex -> XmlRegex -> XmlRegex
mkMerge e1 :: XmlRegex
e1@(Zero _) _            = XmlRegex
e1
mkMerge _           e2 :: XmlRegex
e2@(Zero _)  = XmlRegex
e2
mkMerge Unit        e2 :: XmlRegex
e2           = XmlRegex
e2
mkMerge e1 :: XmlRegex
e1          Unit         = XmlRegex
e1
mkMerge e1 :: XmlRegex
e1          e2 :: XmlRegex
e2           = XmlRegex -> XmlRegex -> XmlRegex
Merge XmlRegex
e1 XmlRegex
e2

-- ------------------------------------------------------------

instance Show XmlRegex where
    show :: XmlRegex -> String
show (Zero s :: String
s)       = "{err:" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
s String -> String -> String
forall a. [a] -> [a] -> [a]
++ "}"
    show Unit           = "()"
    show (Sym _p :: XmlTree -> Bool
_p "")    = "<pred>"
    show (Sym _p :: XmlTree -> Bool
_p r :: String
r )    = String
r
    show Dot            = "."
    show (Star e :: XmlRegex
e)       = "(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ ")*"
    show (Alt e1 :: XmlRegex
e1 e2 :: XmlRegex
e2)    = "(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ "|" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e2 String -> String -> String
forall a. [a] -> [a] -> [a]
++ ")"
    show (Seq e1 :: XmlRegex
e1 e2 :: XmlRegex
e2)    = XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e2
    show (Rep 1 e :: XmlRegex
e)      = "(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ ")+"
    show (Rep i :: Int
i e :: XmlRegex
e)      = "(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ "){" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i String -> String -> String
forall a. [a] -> [a] -> [a]
++ ",}"
    show (Rng 0 1 e :: XmlRegex
e)    = "(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ ")?"
    show (Rng i :: Int
i j :: Int
j e :: XmlRegex
e)    = "(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ "){" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i String -> String -> String
forall a. [a] -> [a] -> [a]
++ "," String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
j String -> String -> String
forall a. [a] -> [a] -> [a]
++ "}"
    show (Perm e1 :: XmlRegex
e1 e2 :: XmlRegex
e2)   = "(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e2 String -> String -> String
forall a. [a] -> [a] -> [a]
++ "|" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e2 String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ ")"
    show (Merge e1 :: XmlRegex
e1 e2 :: XmlRegex
e2)  = "(" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e1 String -> String -> String
forall a. [a] -> [a] -> [a]
++ "&" String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e2 String -> String -> String
forall a. [a] -> [a] -> [a]
++ ")"

-- ------------------------------------------------------------

unexpected              :: XmlTree -> String -> String
unexpected :: XmlTree -> String -> String
unexpected t :: XmlTree
t e :: String
e          = String -> String
emsg String
e String -> String -> String
forall a. [a] -> [a] -> [a]
++ (Int -> String -> String
cut 80 (String -> String) -> (XmlTrees -> String) -> XmlTrees -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlTrees -> String
xshow) [XmlTree
t]
    where
      emsg :: String -> String
emsg ""           = "unexpected: "
      emsg s :: String
s            = "expected: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
s String -> String -> String
forall a. [a] -> [a] -> [a]
++ ", but got: "
      cut :: Int -> String -> String
cut n :: Int
n s :: String
s
          | String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
rest   = String
s'
          | Bool
otherwise   = String
s' String -> String -> String
forall a. [a] -> [a] -> [a]
++ "..."
          where
            (s' :: String
s', rest :: String
rest)  = Int -> String -> (String, String)
forall a. Int -> [a] -> ([a], [a])
splitAt Int
n String
s

-- ------------------------------------------------------------

nullable        :: XmlRegex -> Bool
nullable :: XmlRegex -> Bool
nullable (Zero _)       = Bool
False
nullable Unit           = Bool
True
nullable (Sym _p :: XmlTree -> Bool
_p _)     = Bool
False         -- assumption: p holds for at least one tree
nullable Dot            = Bool
False
nullable (Star _)       = Bool
True
nullable (Alt e1 :: XmlRegex
e1 e2 :: XmlRegex
e2)    = XmlRegex -> Bool
nullable XmlRegex
e1 Bool -> Bool -> Bool
||
                          XmlRegex -> Bool
nullable XmlRegex
e2
nullable (Seq e1 :: XmlRegex
e1 e2 :: XmlRegex
e2)    = XmlRegex -> Bool
nullable XmlRegex
e1 Bool -> Bool -> Bool
&&
                          XmlRegex -> Bool
nullable XmlRegex
e2
nullable (Rep _i :: Int
_i e :: XmlRegex
e)     = XmlRegex -> Bool
nullable XmlRegex
e
nullable (Rng i :: Int
i _ e :: XmlRegex
e)    = Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 0 Bool -> Bool -> Bool
||
                          XmlRegex -> Bool
nullable XmlRegex
e
nullable (Perm e1 :: XmlRegex
e1 e2 :: XmlRegex
e2)   = XmlRegex -> Bool
nullable XmlRegex
e1 Bool -> Bool -> Bool
&&
                          XmlRegex -> Bool
nullable XmlRegex
e2
nullable (Merge e1 :: XmlRegex
e1 e2 :: XmlRegex
e2)  = XmlRegex -> Bool
nullable XmlRegex
e1 Bool -> Bool -> Bool
&&
                          XmlRegex -> Bool
nullable XmlRegex
e2

-- ------------------------------------------------------------

delta   :: XmlRegex -> XmlTree -> XmlRegex
delta :: XmlRegex -> XmlTree -> XmlRegex
delta e :: XmlRegex
e@(Zero _)   _    = XmlRegex
e
delta Unit         c :: XmlTree
c    = String -> XmlRegex
mkZero (String -> XmlRegex) -> String -> XmlRegex
forall a b. (a -> b) -> a -> b
$ XmlTree -> String -> String
unexpected XmlTree
c ""
delta (Sym p :: XmlTree -> Bool
p e :: String
e)    c :: XmlTree
c
    | XmlTree -> Bool
p XmlTree
c               = XmlRegex
mkUnit
    | Bool
otherwise         = String -> XmlRegex
mkZero (String -> XmlRegex) -> String -> XmlRegex
forall a b. (a -> b) -> a -> b
$ XmlTree -> String -> String
unexpected XmlTree
c String
e
delta Dot          _    = XmlRegex
mkUnit
delta e :: XmlRegex
e@(Star e1 :: XmlRegex
e1)  c :: XmlTree
c    = XmlRegex -> XmlRegex -> XmlRegex
mkSeq (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e1 XmlTree
c) XmlRegex
e
delta (Alt e1 :: XmlRegex
e1 e2 :: XmlRegex
e2)  c :: XmlTree
c    = XmlRegex -> XmlRegex -> XmlRegex
mkAlt (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e1 XmlTree
c) (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e2 XmlTree
c)
delta (Seq e1 :: XmlRegex
e1 e2 :: XmlRegex
e2)  c :: XmlTree
c
    | XmlRegex -> Bool
nullable XmlRegex
e1       = XmlRegex -> XmlRegex -> XmlRegex
mkAlt (XmlRegex -> XmlRegex -> XmlRegex
mkSeq (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e1 XmlTree
c) XmlRegex
e2) (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e2 XmlTree
c)
    | Bool
otherwise         = XmlRegex -> XmlRegex -> XmlRegex
mkSeq (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e1 XmlTree
c) XmlRegex
e2
delta (Rep i :: Int
i e :: XmlRegex
e)    c :: XmlTree
c    = XmlRegex -> XmlRegex -> XmlRegex
mkSeq (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e XmlTree
c) (Int -> XmlRegex -> XmlRegex
mkRep (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) XmlRegex
e)
delta (Rng i :: Int
i j :: Int
j e :: XmlRegex
e)  c :: XmlTree
c    = XmlRegex -> XmlRegex -> XmlRegex
mkSeq (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e XmlTree
c) (Int -> Int -> XmlRegex -> XmlRegex
mkRng ((Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) Int -> Int -> Int
forall a. Ord a => a -> a -> a
`max` 0) (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) XmlRegex
e)
delta (Perm e1 :: XmlRegex
e1 e2 :: XmlRegex
e2) c :: XmlTree
c    = case XmlRegex
e1' of
                            (Zero _) -> XmlRegex -> XmlRegex -> XmlRegex
mkPerm XmlRegex
e1 (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e2 XmlTree
c)
                            _        -> XmlRegex -> XmlRegex -> XmlRegex
mkPerm XmlRegex
e1' XmlRegex
e2
                          where
                          e1' :: XmlRegex
e1' = XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e1 XmlTree
c
delta (Merge e1 :: XmlRegex
e1 e2 :: XmlRegex
e2) c :: XmlTree
c   = XmlRegex -> XmlRegex -> XmlRegex
mkAlt (XmlRegex -> XmlRegex -> XmlRegex
mkMerge (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e1 XmlTree
c) XmlRegex
e2)
                                (XmlRegex -> XmlRegex -> XmlRegex
mkMerge XmlRegex
e1 (XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
e2 XmlTree
c))

-- ------------------------------------------------------------

delta'          :: XmlRegex -> XmlTrees -> XmlRegex
delta' :: XmlRegex -> XmlTrees -> XmlRegex
delta'          = (XmlRegex -> XmlTree -> XmlRegex)
-> XmlRegex -> XmlTrees -> XmlRegex
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl XmlRegex -> XmlTree -> XmlRegex
delta

-- | match a sequence of XML trees with a regular expression over trees
--
-- If the input matches, the result is Nothing, else Just an error message is returned

matchXmlRegex           :: XmlRegex -> XmlTrees -> Maybe String
matchXmlRegex :: XmlRegex -> XmlTrees -> Maybe String
matchXmlRegex e :: XmlRegex
e
    = XmlRegex -> Maybe String
res (XmlRegex -> Maybe String)
-> (XmlTrees -> XmlRegex) -> XmlTrees -> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlRegex -> XmlTrees -> XmlRegex
delta' XmlRegex
e
    where
    res :: XmlRegex -> Maybe String
res (Zero er :: String
er)       = String -> Maybe String
forall a. a -> Maybe a
Just String
er
    res re :: XmlRegex
re
        | XmlRegex -> Bool
nullable XmlRegex
re   = Maybe String
forall a. Maybe a
Nothing       -- o.k.
        | Bool
otherwise     = String -> Maybe String
forall a. a -> Maybe a
Just (String -> Maybe String) -> String -> Maybe String
forall a b. (a -> b) -> a -> b
$ "input does not match " String -> String -> String
forall a. [a] -> [a] -> [a]
++ XmlRegex -> String
forall a. Show a => a -> String
show XmlRegex
e

-- ------------------------------------------------------------

-- | split a sequence of XML trees into a pair of a a matching prefix and a rest
--
-- If there is no matching prefix, Nothing is returned

splitXmlRegex           :: XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex :: XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex re :: XmlRegex
re        = XmlRegex -> XmlTrees -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex' XmlRegex
re []

splitXmlRegex'          :: XmlRegex -> XmlTrees -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex' :: XmlRegex -> XmlTrees -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex' re :: XmlRegex
re res :: XmlTrees
res []
    | XmlRegex -> Bool
nullable XmlRegex
re       = (XmlTrees, XmlTrees) -> Maybe (XmlTrees, XmlTrees)
forall a. a -> Maybe a
Just (XmlTrees -> XmlTrees
forall a. [a] -> [a]
reverse XmlTrees
res, [])
    | Bool
otherwise         = Maybe (XmlTrees, XmlTrees)
forall a. Maybe a
Nothing

splitXmlRegex' (Zero _) _ _
                        = Maybe (XmlTrees, XmlTrees)
forall a. Maybe a
Nothing

splitXmlRegex' re :: XmlRegex
re res :: XmlTrees
res xs :: XmlTrees
xs@(x :: XmlTree
x:xs' :: XmlTrees
xs')
    | Maybe (XmlTrees, XmlTrees) -> Bool
forall a. Maybe a -> Bool
isJust Maybe (XmlTrees, XmlTrees)
res'       = Maybe (XmlTrees, XmlTrees)
res'
    | XmlRegex -> Bool
nullable XmlRegex
re       = (XmlTrees, XmlTrees) -> Maybe (XmlTrees, XmlTrees)
forall a. a -> Maybe a
Just (XmlTrees -> XmlTrees
forall a. [a] -> [a]
reverse XmlTrees
res, XmlTrees
xs)
    | Bool
otherwise         = Maybe (XmlTrees, XmlTrees)
forall a. Maybe a
Nothing
    where
    re' :: XmlRegex
re'  = XmlRegex -> XmlTree -> XmlRegex
delta XmlRegex
re XmlTree
x
    res' :: Maybe (XmlTrees, XmlTrees)
res' = XmlRegex -> XmlTrees -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex' XmlRegex
re' (XmlTree
xXmlTree -> XmlTrees -> XmlTrees
forall a. a -> [a] -> [a]
:XmlTrees
res) XmlTrees
xs'

-- ------------------------------------------------------------

-- | scan a sequence of XML trees and split it into parts matching the given regex
--
-- If the parts cannot be split because of a missing match, or because of the
-- empty sequence as match, Nothing is returned

scanXmlRegex                            :: XmlRegex -> XmlTrees -> Maybe [XmlTrees]
scanXmlRegex :: XmlRegex -> XmlTrees -> Maybe [XmlTrees]
scanXmlRegex re :: XmlRegex
re ts :: XmlTrees
ts                      = XmlRegex -> Maybe (XmlTrees, XmlTrees) -> Maybe [XmlTrees]
scanXmlRegex' XmlRegex
re (XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex XmlRegex
re XmlTrees
ts)

scanXmlRegex'                           :: XmlRegex -> Maybe (XmlTrees, XmlTrees) -> Maybe [XmlTrees]
scanXmlRegex' :: XmlRegex -> Maybe (XmlTrees, XmlTrees) -> Maybe [XmlTrees]
scanXmlRegex' _  Nothing                = Maybe [XmlTrees]
forall a. Maybe a
Nothing
scanXmlRegex' _  (Just (rs :: XmlTrees
rs, []))        = [XmlTrees] -> Maybe [XmlTrees]
forall a. a -> Maybe a
Just [XmlTrees
rs]
scanXmlRegex' _  (Just ([], _))         = Maybe [XmlTrees]
forall a. Maybe a
Nothing       -- re is nullable (the empty word matches), nothing split off
                                                        -- would give infinite list of empty lists
scanXmlRegex' re :: XmlRegex
re (Just (rs :: XmlTrees
rs, rest :: XmlTrees
rest))
    | Maybe [XmlTrees] -> Bool
forall a. Maybe a -> Bool
isNothing Maybe [XmlTrees]
res                     = Maybe [XmlTrees]
forall a. Maybe a
Nothing
    | Bool
otherwise                         = [XmlTrees] -> Maybe [XmlTrees]
forall a. a -> Maybe a
Just (XmlTrees
rs XmlTrees -> [XmlTrees] -> [XmlTrees]
forall a. a -> [a] -> [a]
: Maybe [XmlTrees] -> [XmlTrees]
forall a. HasCallStack => Maybe a -> a
fromJust Maybe [XmlTrees]
res)
    where
    res :: Maybe [XmlTrees]
res = XmlRegex -> Maybe (XmlTrees, XmlTrees) -> Maybe [XmlTrees]
scanXmlRegex' XmlRegex
re (XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex XmlRegex
re XmlTrees
rest)

-- ------------------------------------------------------------