二手的
lxml.html。
import lxml.htmlrexml = ...def depth(node): d = 0 while node is not None: d += 1 node = node.getparent() return dtree = lxml.html.fromstring(rexml)for node in tree.iter('page'): print depth(node) for url in node.iterfind('url'): print url.text for title in node.iterfind('title'): print title.text.enpre("utf-8") print '-' * 30


