| Safe Haskell | None |
|---|---|
| Language | Haskell2010 |
Data.Conduit.Parser.XML
Contents
Description
High-level primitives to parse a stream of XML Events.
Synopsis
- tag :: MonadCatch m => (Name -> Maybe a) -> (a -> AttrParser b) -> (b -> ConduitParser Event m c) -> ConduitParser Event m c
- tagName :: MonadCatch m => Name -> AttrParser a -> (a -> ConduitParser Event m b) -> ConduitParser Event m b
- tagPredicate :: MonadCatch m => (Name -> Bool) -> AttrParser a -> (a -> ConduitParser Event m b) -> ConduitParser Event m b
- tagNoAttr :: MonadCatch m => Name -> ConduitParser Event m a -> ConduitParser Event m a
- tagIgnoreAttrs :: MonadCatch m => Name -> ConduitParser Event m a -> ConduitParser Event m a
- anyTag :: MonadCatch m => (Name -> [(Name, [Content])] -> ConduitParser Event m a) -> ConduitParser Event m a
- type AttributeMap = Map Name [Content]
- data AttrParser a
- attr :: Name -> (Text -> Maybe a) -> AttrParser a
- textAttr :: Name -> AttrParser Text
- anyAttr :: AttrParser (Name, [Content])
- ignoreAttrs :: AttrParser ()
- content :: MonadCatch m => (Text -> Maybe a) -> ConduitParser Event m a
- textContent :: MonadCatch m => ConduitParser Event m Text
- parseBytes :: MonadThrow m => ParseSettings -> ConduitT ByteString Event m ()
- parseBytesPos :: MonadThrow m => ParseSettings -> ConduitT ByteString EventPos m ()
- parseText :: MonadThrow m => ParseSettings -> Conduit Text m Event
- parseTextPos :: MonadThrow m => ParseSettings -> ConduitT Text EventPos m ()
- detectUtf :: MonadThrow m => ConduitT ByteString Text m ()
- parseFile :: MonadResource m => ParseSettings -> FilePath -> ConduitT i Event m ()
- parseLBS :: MonadThrow m => ParseSettings -> ByteString -> ConduitT i Event m ()
- data ParseSettings
- type DecodeEntities = Text -> Content
- psDecodeEntities :: ParseSettings -> DecodeEntities
- psRetainNamespaces :: ParseSettings -> Bool
- decodeXmlEntities :: DecodeEntities
- decodeHtmlEntities :: DecodeEntities
- data XmlException
- = XmlException { }
- | InvalidEndElement Name (Maybe Event)
- | InvalidEntity String (Maybe Event)
- | MissingAttribute String
- | UnparsedAttributes [(Name, [Content])]
XML parsers
Tags
Arguments
| :: MonadCatch m | |
| => (Name -> Maybe a) | Tag name parser. |
| -> (a -> AttrParser b) | Attributes parser. It should consume all available attributes. |
| -> (b -> ConduitParser Event m c) | Children parser. It should consume all elements between the opening and closing tags. |
| -> ConduitParser Event m c |
Parse an XML tag, depending on its name and attributes. This is the most generic tag parser.
Comments, instructions and whitespace are ignored.
tagName :: MonadCatch m => Name -> AttrParser a -> (a -> ConduitParser Event m b) -> ConduitParser Event m b Source #
Like tag, but match a single tag name.
tagPredicate :: MonadCatch m => (Name -> Bool) -> AttrParser a -> (a -> ConduitParser Event m b) -> ConduitParser Event m b Source #
Like tag, but use a predicate to select tag names.
tagNoAttr :: MonadCatch m => Name -> ConduitParser Event m a -> ConduitParser Event m a Source #
Like tagName, but expect no attributes at all.
tagIgnoreAttrs :: MonadCatch m => Name -> ConduitParser Event m a -> ConduitParser Event m a Source #
Like tagName, but ignore all attributes.
anyTag :: MonadCatch m => (Name -> [(Name, [Content])] -> ConduitParser Event m a) -> ConduitParser Event m a Source #
Parse an XML tag, whatever its name and attributes.
Comments, instructions and whitespace are ignored.
Attributes
data AttrParser a Source #
Instances
attr :: Name -> (Text -> Maybe a) -> AttrParser a Source #
Parse a single attribute using a specific name and a custom parsing function for its value.
textAttr :: Name -> AttrParser Text Source #
Parse a single textual attribute.
anyAttr :: AttrParser (Name, [Content]) Source #
Parse a single attribute, whatever its name or value.
ignoreAttrs :: AttrParser () Source #
Consume all remaining unparsed attributes.
Content
content :: MonadCatch m => (Text -> Maybe a) -> ConduitParser Event m a Source #
Parse a tag content using a custom parsing function.
textContent :: MonadCatch m => ConduitParser Event m Text Source #
Parse a tag content as Text.
This parser fails if the tag is empty.
To get mempty instead of failing, use textContent <|> mempty.
Re-exports
Event producers
parseBytes :: MonadThrow m => ParseSettings -> ConduitT ByteString Event m () #
Parses a byte stream into Events. This function is implemented fully in
Haskell using attoparsec-text for parsing. The produced error messages do
not give line/column information, so you may prefer to stick with the parser
provided by libxml-enumerator. However, this has the advantage of not
relying on any C libraries.
This relies on detectUtf to determine character encoding, and parseText'
to do the actual parsing.
parseBytesPos :: MonadThrow m => ParseSettings -> ConduitT ByteString EventPos m () #
parseText :: MonadThrow m => ParseSettings -> Conduit Text m Event Source #
Alias for parseText'
parseTextPos :: MonadThrow m => ParseSettings -> ConduitT Text EventPos m () #
Same as parseText', but includes the position of each event.
Since 1.2.4
detectUtf :: MonadThrow m => ConduitT ByteString Text m () #
Automatically determine which UTF variant is being used. This function first checks for BOMs, removing them as necessary, and then check for the equivalent of <?xml for each of UTF-8, UTF-16LEBE, and UTF-32LEBE. It defaults to assuming UTF-8.
parseFile :: MonadResource m => ParseSettings -> FilePath -> ConduitT i Event m () #
A helper function which reads a file from disk using enumFile, detects
character encoding using detectUtf, parses the XML using parseBytes, and
then hands off control to your supplied parser.
parseLBS :: MonadThrow m => ParseSettings -> ByteString -> ConduitT i Event m () #
Parse an event stream from a lazy ByteString.
Parser settings
data ParseSettings #
Instances
| Default ParseSettings | |
Defined in Text.XML.Stream.Parse Methods def :: ParseSettings # | |
type DecodeEntities = Text -> Content #
psRetainNamespaces :: ParseSettings -> Bool #
Whether the original xmlns attributes should be retained in the parsed values. For more information on motivation, see:
https://github.com/snoyberg/xml/issues/38
Default: False
Since 1.2.1
Entity decoding
decodeXmlEntities :: DecodeEntities #
Default implementation of DecodeEntities, which leaves the
entity as-is. Numeric character references and the five standard
entities (lt, gt, amp, quot, pos) are handled internally by the
parser.
decodeHtmlEntities :: DecodeEntities #
HTML4-compliant entity decoder. Handles the additional 248 entities defined by HTML 4 and XHTML 1.
Note that HTML 5 introduces a drastically larger number of entities, and this code does not recognize most of them.
Exceptions
data XmlException #
Constructors
| XmlException | |
Fields | |
| InvalidEndElement Name (Maybe Event) | |
| InvalidEntity String (Maybe Event) | |
| MissingAttribute String | |
| UnparsedAttributes [(Name, [Content])] | |
Instances
| Show XmlException | |
Defined in Text.XML.Stream.Parse Methods showsPrec :: Int -> XmlException -> ShowS # show :: XmlException -> String # showList :: [XmlException] -> ShowS # | |
| Exception XmlException | |
Defined in Text.XML.Stream.Parse Methods toException :: XmlException -> SomeException # fromException :: SomeException -> Maybe XmlException # displayException :: XmlException -> String # | |