栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 软件开发 > 后端开发 > Python

使用python解析xml成对应的html示例分享

Python 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

使用python解析xml成对应的html示例分享

SAX将dd.xml解析成html。当然啦,如果得到了xml对应的xsl文件可以直接用libxml2将其转换成html。

复制代码 代码如下:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#---------------------------------------
#   程序:XML解析器
#   版本:01.0
#   作者:mupeng
#   日期:2013-12-18
#   语言:Python 2.7
#   功能:将xml解析成对应的html
#   注解:该程序用xml.sax模块的parse函数解析XML,并生成事件
#   继承ContentHandler并重写其事件处理函数
#   Dispatcher主要用于相应标签的起始、结束事件的派发
#---------------------------------------
from xml.sax.handler import ContentHandler
from xml.sax import parse

class Dispatcher:
    def dispatch(self, prefix, name, attrs=None):
        mname = prefix + name.capitalize()
        dname = 'default' + prefix.capitalize()
        method = getattr(self, mname, None)
        if callable(method): args = ()
        else:
            method = getattr(self, dname, None)
            #args = name
        #if prefix == 'start': args += attrs
        if callable(method): method()

    def startElement(self, name, attrs):
        self.dispatch('start', name, attrs)

    def endElement(self, name):
        self.dispatch('end', name)

class Website(Dispatcher, ContentHandler):

    def __init__(self):
        self.fout = open('ddt_SAX.html', 'w')
        self.imagein = False
        self.desflag = False
        self.item = False
        self.title = ''
        self.link = ''
        self.guid = ''
        self.url = ''
        self.pubdate = ''
        self.description = ''
        self.temp = ''
        self.prx = ''
    def startChannel(self):

        self.fout.write('''nn RSS-''')<br><br>    def endChannel(self):<BR>       self.fout.write('''<BR>                    <tr><td height="20"></td></tr><BR>                    </table><BR>                    </center><BR>                    <BR>                </body><BR>                </html><BR>                ''')<BR>       self.fout.close()</P> <P>    def characters(self, chars):<BR>        if chars.strip():<BR>            #chars = chars.strip()<BR>            self.temp += chars<BR>            #print self.temp<br><br>       <BR>    def startTitle(self):<br><br>        if self.item:<BR>            self.fout.write('''<BR>                        <tr bgcolor="#eeeeee">n<td style="padding-top:5px;padding-left:5px;" height="30">n<B><BR>                    ''')<br><br>    def endTitle(self):<br><br>        if not self.imagein and not self.item:<BR>            self.title = self.temp<BR>            self.temp = ''<BR>            self.fout.write(self.title.encode('gb2312'))<br><br>            #self.title = self.temp<BR>            self.fout.write('''<BR>                nnn

n
                n
                n
                n
               
                       
                       
                       
                           
                           
                       
                       
n
            ''')

        if self.item:
            self.title = self.temp
            self.temp = ''
            self.fout.write(self.title.encode('gb2312'))
            self.fout.write('''
                       
                       

                        ''')

    def startImage(self):
        self.imagein = True

    def endImage(self):
        self.imagein = False

    def startlink(self):
        if self.imagein:
            self.fout.write('''
           
    def endlink(self):
        self.link = self.temp
        self.temp = ''
        if self.imagein:
            self.fout.write(self.link.encode('gb2312'))
            self.fout.write('''" target="_blank">n ''')
        elif self.item:
            #self.link = self.temp
            pass
        else:
            self.fout.write(self.link)
            self.fout.write(''' " target="
      _blank
     "> ''')
            self.fout.write(self.title.encode('gb2312'))
            self.fout.write('''


                            ''')
            self.fout.write(self.description.encode('gb2312'))
            self.fout.write('''
                       
¸´ÖÆ´ËÒ³Á´½Ó                ÎÒҪǶÈë¸ÃÐÂÎÅÁÐ±íµ½ÎÒµÄÒ³Ãæ£¨¼òµ¥¡¢¿ìËÙ¡¢ÊµÊ±¡¢Ãâ·Ñ£©

                       
                            ''')

    def startUrl(self):
        if self.imagein:
            self.fout.write('''    def endUrl(self):
        self.url = self.temp
        self.temp = ''
        if self.imagein:
            self.fout.write(self.url.encode('gb2312'))
            self.fout.write('''" border="0">n
                           
                           
                           


                           
                           
                           
                       
                       
                       
                       
                        ''')

#程序入口
if __name__ == '__main__':
    parse('ddt.xml', Website())

转载请注明:文章转载自 www.mshxw.com
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号

                            ''')
        if self.item:
            #self.url = self.temp
            pass

    def defaultStart(self):
        pass
    def defaultEnd(self):
        self.temp = ''
    def startDescription(self):
        pass
    def endDescription(self):
        self.description = self.temp
        self.temp = ''
        if self.item:
            #self.fout.write('¡¡¡¡')
            self.fout.write(self.description.encode('gb2312'))

    def endGuid(self):
        self.guid = self.temp
    def endPubdate(self):
        if not self.temp.startswith('http'):
         self.pubdate = self.temp
         self.temp = ''
        else:
            self.pubdate = ''
    def startItem(self):
        self.item = True
    def endItem(self):
        self.item = False
        self.fout.write('''
                           


                                    self.fout.write(self.link)
        self.fout.write(''' " target="_blank"> ''')
        self.fout.write(self.guid)
        self.fout.write('''
                       
                       
''')
        self.fout.write(self.pubdate)
        self.fout.write('''