import Foundation import libxml2 enum XMLParserError: Error { case xmlDeclarationInside(Int, Int) case xmlUnknown(Int) } enum XMLParserEvent { case streamStarted(attributes: [String: String]) case streamEnded case element(XMLElement) case parserError(XMLParserError) } final class XMLParser { let (element, elementContinuation) = AsyncStream.makeStream(of: XMLParserEvent.self, bufferingPolicy: .unbounded) private var ctx: xmlParserCtxtPtr? private var stack: [XMLElement] = [] private var xmlnss: [String: String] = [:] init() { ctx = xmlCreatePushParserCtxt(&saxHandler, Mem.bridge(self), nil, 0, nil) } deinit { elementContinuation.finish() xmlFreeParserCtxt(ctx) stack = [] xmlnss = [:] ctx = nil } func restart() { stack = [] xmlnss = [:] xmlFreeParserCtxt(ctx) ctx = xmlCreatePushParserCtxt(&saxHandler, Mem.bridge(self), nil, 0, nil) } func parse(data: Data) { data.withUnsafeBytes { [weak self] ptr in if let addr = ptr.baseAddress { let err = xmlParseChunk(self?.ctx, addr.assumingMemoryBound(to: CChar.self), Int32(data.count), 0) if err > 0 { if err == 64 { let rng = Data("? var prefix: UnsafePointer? var attrUri: UnsafePointer? var valueBegin: UnsafePointer? var valueEnd: UnsafePointer? } private struct Nss { var prefix: UnsafePointer? var uri: UnsafePointer? } private func strFromCUtf8(_ ptr: UnsafePointer?) -> String? { if let ptr { return String(cString: ptr) } return nil } private var saxHandler = xmlSAXHandler( internalSubset: nil, isStandalone: nil, hasInternalSubset: nil, hasExternalSubset: nil, resolveEntity: nil, getEntity: nil, entityDecl: nil, notationDecl: nil, attributeDecl: nil, elementDecl: nil, unparsedEntityDecl: nil, setDocumentLocator: nil, startDocument: nil, endDocument: nil, startElement: nil, endElement: nil, reference: nil, characters: SAX_charactersFound, ignorableWhitespace: nil, processingInstruction: nil, comment: nil, warning: nil, error: nil, // unsafeBitCast(SAX_error, to: errorSAXFunc.self), fatalError: nil, getParameterEntity: nil, cdataBlock: nil, externalSubset: nil, initialized: XML_SAX2_MAGIC, _private: nil, startElementNs: SAX_startElement, endElementNs: SAX_endElement, serror: nil ) private let SAX_charactersFound: charactersSAXFunc = { ctx_, chars_, len_ in guard let ctx_, let chars_ else { return } let data = Data(bytes: UnsafePointer(chars_), count: Int(len_)) let chars = String(decoding: data, as: UTF8.self) let parser = unsafeBitCast(ctx_, to: XMLParser.self) parser.charactersFound(chars) } private let SAX_startElement: startElementNsSAX2Func = { ctx_, localName, prefix_, _, nb_namespaces, namespaces_, nb_attributes, _, attributes_ in guard let name = strFromCUtf8(localName), let ctx_ else { return } let prefix = strFromCUtf8(prefix_) var attributes: [String: String] = [:] var indx = 0 let parser = unsafeBitCast(ctx_, to: XMLParser.self) // attributes if let attributes_ { attributes_.withMemoryRebound(to: Attr.self, capacity: Int(nb_attributes)) { var attrsPtr = $0 while indx < Int(nb_attributes) { if let name = strFromCUtf8(attrsPtr.pointee.name), let beginPtr = attrsPtr.pointee.valueBegin, let endPtr = attrsPtr.pointee.valueEnd { let data = Data(bytes: UnsafePointer(beginPtr), count: endPtr - beginPtr) var value = String(decoding: data, as: UTF8.self).unescaped if let prefix = strFromCUtf8(attrsPtr.pointee.prefix) { attributes[prefix + ":" + name] = value } else { attributes[name] = value } } attrsPtr = attrsPtr.successor() indx += 1 } } } // namespaces if nb_namespaces > 0, let namespaces_ { var namespaces: [String: String] = [:] namespaces_.withMemoryRebound(to: Nss.self, capacity: Int(nb_namespaces)) { var nsPtr = $0 indx = 0 while indx < Int(nb_namespaces) { let prefix = strFromCUtf8(nsPtr.pointee.prefix) ?? "" if var uri = strFromCUtf8(nsPtr.pointee.uri) { uri = uri.unescaped namespaces[prefix] = uri } nsPtr = nsPtr.successor() indx += 1 } } parser.startElement(elementName: name, prefix: prefix, namespaces: namespaces, attributes: attributes) } else { parser.startElement(elementName: name, prefix: prefix, namespaces: nil, attributes: attributes) } } private let SAX_endElement: endElementNsSAX2Func = { ctx_, localName, prefix_, _ in guard let name = strFromCUtf8(localName), let ctx_ else { return } let prefix = strFromCUtf8(prefix_) let parser = unsafeBitCast(ctx_, to: XMLParser.self) parser.endElement(elementName: name, prefix: prefix) }