有几个不错的 HTML解析 库,它们使用
Swift,
Objective-C如下所示:
- hpple
- NDHpple
- Kanna( old Swift-HTML-Parser)
- Fuzi
- SwiftSoup
- Ji
看一下上面发布的四个库中的以下示例,这些示例主要是使用XPath 2.0进行解析的:
hpple :
let data = NSData(contentsOfFile: path)let doc = TFHpple(htmldata: data)if let elements = doc.searchWithXPathQuery("//a/@href[ends-with(.,'.txt')]") as? [TFHppleElement] { for element in elements { println(element.content) }}NDHpple :
let data = NSData(contentsOfFile: path)!let html = NSString(data: data, encoding: NSUTF8StringEncoding)!let doc = NDHpple(HTMLdata: html)if let elements = doc.searchWithXPathQuery("//a/@href[ends-with(.,'.txt')]") { for element in elements { println(element.children?.first?.content) }}Kanna (Xpath and CSS Selectors):
let html = "<html><head></head><body><ul><li><input type='image' name='input1' value='string1value' /></li><li><input type='image' name='input2' value='string2value' /></li></ul><span ><b>Hello World 1</b></span><span ><b>Hello World 2</b></span><a href='example.com'>example(English)</a><a href='example.co.jp'>example(JP)</a></body>"if let doc = Kanna.HTML(html: html, encoding: NSUTF8StringEncoding) { var bodyNode = doc.body if let inputNodes = bodyNode?.xpath("//a/@href[ends-with(.,'.txt')]") { for node in inputNodes { println(node.contents) } }}Fuzi(Xpath and CSS Selectors) :
let html = "<html><head></head><body><ul><li><input type='image' name='input1' value='string1value' /></li><li><input type='image' name='input2' value='string2value' /></li></ul><span ><b>Hello World 1</b></span><span ><b>Hello World 2</b></span><a href='example.com'>example(English)</a><a href='example.co.jp'>example(JP)</a></body>"do { // if encoding is omitted, it defaults to NSUTF8StringEncoding let doc = try HTMLdocument(string: html, encoding: NSUTF8StringEncoding) // XPath queries for anchor in doc.xpath("//a/@href[ends-with(.,'.txt')]") { print(anchor.stringValue) }} catch let error { print(error)}该
ends-with功能是Xpath 2.0的一部分。
SwiftSoup(CSS Selectors) :
do{ let doc: document = try SwiftSoup.parse("...") let links: Elements = try doc.select("a[href]") // a with href let pngs: Elements = try doc.select("img[src$=.png]") // img with src ending .png let masthead: Element? = try doc.select("div.masthead").first() // div with class=masthead let resultlinks: Elements? = try doc.select("h3.r > a") // direct a after h3} catch Exception.Error(let type, let message){ print(message)} catch { print("error")}Ji(XPath) :
let jiDoc = Ji(htmlURL: URL(string: "http://www.apple.com/support")!)let titleNode = jiDoc?.xPath("//head/title")?.firstprint("title: (titleNode?.content)") // title: Optional("Official Apple Support")我希望这可以帮助你。



