258 lines
8 KiB
Swift
258 lines
8 KiB
Swift
|
import Foundation
|
||
|
import libxml2
|
||
|
|
||
|
enum XMLParserError: Error {
|
||
|
case xmlDeclarationInside(Int, Int)
|
||
|
case xmlUnknown(Int)
|
||
|
}
|
||
|
|
||
|
enum XMLParserEvent {
|
||
|
case streamStarted(attributes: [String: String])
|
||
|
case streamEnded
|
||
|
case element(XMLElement)
|
||
|
case parserError(XMLParserError)
|
||
|
}
|
||
|
|
||
|
final class XMLParser {
|
||
|
let (element, elementContinuation) = AsyncStream.makeStream(of: XMLParserEvent.self, bufferingPolicy: .unbounded)
|
||
|
|
||
|
private var ctx: xmlParserCtxtPtr?
|
||
|
private var stack: [XMLElement] = []
|
||
|
private var xmlnss: [String: String] = [:]
|
||
|
|
||
|
init() {
|
||
|
ctx = xmlCreatePushParserCtxt(&saxHandler, Mem.bridge(self), nil, 0, nil)
|
||
|
}
|
||
|
|
||
|
deinit {
|
||
|
elementContinuation.finish()
|
||
|
xmlFreeParserCtxt(ctx)
|
||
|
stack = []
|
||
|
xmlnss = [:]
|
||
|
ctx = nil
|
||
|
}
|
||
|
|
||
|
func restart() {
|
||
|
stack = []
|
||
|
xmlnss = [:]
|
||
|
xmlFreeParserCtxt(ctx)
|
||
|
ctx = xmlCreatePushParserCtxt(&saxHandler, Mem.bridge(self), nil, 0, nil)
|
||
|
}
|
||
|
|
||
|
func parse(data: Data) {
|
||
|
data.withUnsafeBytes { [weak self] ptr in
|
||
|
if let addr = ptr.baseAddress {
|
||
|
let err = xmlParseChunk(self?.ctx, addr.assumingMemoryBound(to: CChar.self), Int32(data.count), 0)
|
||
|
if err > 0 {
|
||
|
if err == 64 {
|
||
|
let rng = Data("<?xml ".utf8)
|
||
|
if let position = data.range(of: rng)?.lowerBound {
|
||
|
self?.elementContinuation.yield(.parserError(.xmlDeclarationInside(Int(err), position)))
|
||
|
}
|
||
|
}
|
||
|
self?.elementContinuation.yield(.parserError(.xmlUnknown(Int(err))))
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// MARK: Private part, parsing
|
||
|
private extension XMLParser {
|
||
|
func startElement(elementName: String, prefix: String?, namespaces: [String: String]?, attributes: [String: String]) {
|
||
|
// for session start
|
||
|
if elementName == "stream", prefix == "stream" {
|
||
|
var attrs = attributes
|
||
|
if let namespaces {
|
||
|
for (key, value) in namespaces {
|
||
|
attrs[key] = value
|
||
|
}
|
||
|
}
|
||
|
elementContinuation.yield(.streamStarted(attributes: attrs))
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// for others elements
|
||
|
if let namespaces {
|
||
|
for (key, value) in namespaces where !key.isEmpty {
|
||
|
xmlnss[key] = value
|
||
|
}
|
||
|
}
|
||
|
|
||
|
var xmlns: String?
|
||
|
if let prefix {
|
||
|
xmlns = xmlnss[prefix] ?? namespaces?[""]
|
||
|
} else {
|
||
|
xmlns = namespaces?[""]
|
||
|
}
|
||
|
|
||
|
var name: String
|
||
|
if let prefix, xmlns == nil {
|
||
|
name = prefix + ":" + elementName
|
||
|
} else {
|
||
|
name = elementName
|
||
|
}
|
||
|
|
||
|
var element = XMLElement(name: name, xmlns: xmlns, attributes: attributes, content: nil, nodes: [])
|
||
|
if !stack.isEmpty {
|
||
|
element = element.updateXmlns(stack.last?.xmlns)
|
||
|
}
|
||
|
if xmlns != nil {
|
||
|
element = element.updateXmlns(xmlns)
|
||
|
}
|
||
|
|
||
|
stack.append(element)
|
||
|
}
|
||
|
|
||
|
func endElement(elementName: String, prefix: String?) {
|
||
|
if elementName == "stream", prefix == "stream" {
|
||
|
elementContinuation.yield(.streamEnded)
|
||
|
return
|
||
|
}
|
||
|
if let element = stack.popLast() {
|
||
|
if stack.isEmpty {
|
||
|
elementContinuation.yield(.element(element))
|
||
|
} else {
|
||
|
// because we just checked it
|
||
|
// swiftlint:disable:next force_unwrapping
|
||
|
var parent = stack.popLast()!
|
||
|
parent = parent.addNode(element)
|
||
|
stack.append(parent)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func charactersFound(_ line: String) {
|
||
|
guard var exists = stack.popLast() else { return }
|
||
|
exists = exists.updateContent(line)
|
||
|
stack.append(exists)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// =========================================================
|
||
|
// libxml2-specific stuff
|
||
|
private struct Attr {
|
||
|
var name: UnsafePointer<UInt8>?
|
||
|
var prefix: UnsafePointer<UInt8>?
|
||
|
var attrUri: UnsafePointer<UInt8>?
|
||
|
var valueBegin: UnsafePointer<UInt8>?
|
||
|
var valueEnd: UnsafePointer<UInt8>?
|
||
|
}
|
||
|
|
||
|
private struct Nss {
|
||
|
var prefix: UnsafePointer<UInt8>?
|
||
|
var uri: UnsafePointer<UInt8>?
|
||
|
}
|
||
|
|
||
|
private func strFromCUtf8(_ ptr: UnsafePointer<UInt8>?) -> String? {
|
||
|
if let ptr {
|
||
|
return String(cString: ptr)
|
||
|
}
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
private var saxHandler = xmlSAXHandler(
|
||
|
internalSubset: nil,
|
||
|
isStandalone: nil,
|
||
|
hasInternalSubset: nil,
|
||
|
hasExternalSubset: nil,
|
||
|
resolveEntity: nil,
|
||
|
getEntity: nil,
|
||
|
entityDecl: nil,
|
||
|
notationDecl: nil,
|
||
|
attributeDecl: nil,
|
||
|
elementDecl: nil,
|
||
|
unparsedEntityDecl: nil,
|
||
|
setDocumentLocator: nil,
|
||
|
startDocument: nil,
|
||
|
endDocument: nil,
|
||
|
startElement: nil,
|
||
|
endElement: nil,
|
||
|
reference: nil,
|
||
|
characters: SAX_charactersFound,
|
||
|
ignorableWhitespace: nil,
|
||
|
processingInstruction: nil,
|
||
|
comment: nil,
|
||
|
warning: nil,
|
||
|
error: nil, // unsafeBitCast(SAX_error, to: errorSAXFunc.self),
|
||
|
fatalError: nil,
|
||
|
getParameterEntity: nil,
|
||
|
cdataBlock: nil,
|
||
|
externalSubset: nil,
|
||
|
initialized: XML_SAX2_MAGIC,
|
||
|
_private: nil,
|
||
|
startElementNs: SAX_startElement,
|
||
|
endElementNs: SAX_endElement,
|
||
|
serror: nil
|
||
|
)
|
||
|
|
||
|
private let SAX_charactersFound: charactersSAXFunc = { ctx_, chars_, len_ in
|
||
|
guard let ctx_, let chars_ else {
|
||
|
return
|
||
|
}
|
||
|
let data = Data(bytes: UnsafePointer<UInt8>(chars_), count: Int(len_))
|
||
|
let chars = String(decoding: data, as: UTF8.self)
|
||
|
let parser = unsafeBitCast(ctx_, to: XMLParser.self)
|
||
|
parser.charactersFound(chars)
|
||
|
}
|
||
|
|
||
|
private let SAX_startElement: startElementNsSAX2Func = { ctx_, localName, prefix_, _, nb_namespaces, namespaces_, nb_attributes, _, attributes_ in
|
||
|
guard let name = strFromCUtf8(localName), let ctx_ else {
|
||
|
return
|
||
|
}
|
||
|
let prefix = strFromCUtf8(prefix_)
|
||
|
var attributes: [String: String] = [:]
|
||
|
var indx = 0
|
||
|
let parser = unsafeBitCast(ctx_, to: XMLParser.self)
|
||
|
|
||
|
// attributes
|
||
|
if let attributes_ {
|
||
|
attributes_.withMemoryRebound(to: Attr.self, capacity: Int(nb_attributes)) {
|
||
|
var attrsPtr = $0
|
||
|
while indx < Int(nb_attributes) {
|
||
|
if let name = strFromCUtf8(attrsPtr.pointee.name), let beginPtr = attrsPtr.pointee.valueBegin, let endPtr = attrsPtr.pointee.valueEnd {
|
||
|
let data = Data(bytes: UnsafePointer<UInt8>(beginPtr), count: endPtr - beginPtr)
|
||
|
var value = String(decoding: data, as: UTF8.self).unescaped
|
||
|
if let prefix = strFromCUtf8(attrsPtr.pointee.prefix) {
|
||
|
attributes[prefix + ":" + name] = value
|
||
|
} else {
|
||
|
attributes[name] = value
|
||
|
}
|
||
|
}
|
||
|
attrsPtr = attrsPtr.successor()
|
||
|
indx += 1
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// namespaces
|
||
|
if nb_namespaces > 0, let namespaces_ {
|
||
|
var namespaces: [String: String] = [:]
|
||
|
namespaces_.withMemoryRebound(to: Nss.self, capacity: Int(nb_namespaces)) {
|
||
|
var nsPtr = $0
|
||
|
indx = 0
|
||
|
while indx < Int(nb_namespaces) {
|
||
|
let prefix = strFromCUtf8(nsPtr.pointee.prefix) ?? ""
|
||
|
if var uri = strFromCUtf8(nsPtr.pointee.uri) {
|
||
|
uri = uri.unescaped
|
||
|
namespaces[prefix] = uri
|
||
|
}
|
||
|
nsPtr = nsPtr.successor()
|
||
|
indx += 1
|
||
|
}
|
||
|
}
|
||
|
parser.startElement(elementName: name, prefix: prefix, namespaces: namespaces, attributes: attributes)
|
||
|
} else {
|
||
|
parser.startElement(elementName: name, prefix: prefix, namespaces: nil, attributes: attributes)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private let SAX_endElement: endElementNsSAX2Func = { ctx_, localName, prefix_, _ in
|
||
|
guard let name = strFromCUtf8(localName), let ctx_ else {
|
||
|
return
|
||
|
}
|
||
|
let prefix = strFromCUtf8(prefix_)
|
||
|
let parser = unsafeBitCast(ctx_, to: XMLParser.self)
|
||
|
parser.endElement(elementName: name, prefix: prefix)
|
||
|
}
|