可以使用
xml.etree.ElementTree.TreeBuilderetree API来查找/操作
<span>元素:
import sysfrom HTMLParser import HTMLParserfrom xml.etree import cElementTree as etreeclass linksParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.tb = etree.TreeBuilder() def handle_starttag(self, tag, attributes): self.tb.start(tag, dict(attributes)) def handle_endtag(self, tag): self.tb.end(tag) def handle_data(self, data): self.tb.data(data) def close(self): HTMLParser.close(self) return self.tb.close()parser = linksParser()parser.feed(sys.stdin.read())root = parser.close()span = root.find(".//span[@itemprop='description']")etree.ElementTree(span).write(sys.stdout)输出量
<span itemprop="description"><h1>My First Heading</h1><p>My first <br /><br />paragraph.</p></span>
要在没有父(根)
<span>标签的情况下进行打印:
sys.stdout.write(span.text)for child in span: sys.stdout.write(etree.tostring(child)) # add encoding="unipre" on Python 3



