用户:Shizhao/itncode

#!/usr/bin/python
# -*- coding: utf-8  -*-

import os, sys, re
import wikipedia, query, datetime, BeautifulSoup

mysite = wikipedia.getSite()

itntext=u'{{Template:Itn}}'
itntitle=u'Template:Itn'
rsstitle=u'Template:Itn/rss'
rsspage=wikipedia.Page(mysite,rsstitle)
itnpage=wikipedia.Page(mysite,itntitle)

params = {
    'action'    :'parse',
    'text'      :itntext,
    }    
text = query.GetData(params, encodeTitle = False)[u'parse'][u'text'][u'*']

soup = BeautifulSoup.BeautifulSoup(text) 

image=u''

for i in soup.ul:

    try:
        
        if i.name == 'li':
            if i.small <> None:
                i.small.extract()
                image=unicode(soup.find('div',  "floatright"))
            else:
                image=u''
                
            r=ur'<li>(?P<li>.*?)</li>'

            for m in re.finditer(r,unicode(i),re.I):
                mm = m.groupdict()
            href=u'{{subst:fullurl:%s}}' % i.b.a['title']
            rtext= image+u'<p>'+mm['li'] + u'</p><p><a href=\"'+href+ u'\">阅读条目全文 >>></a></p>'
            wikitext=u'<title>新闻动态:%s</title>\n<link>%s</link>\n<guid>%s</guid>\n<description>%s</description>\n<pubDate>{{subst:#time:r}}</pubDate>\n<dc:creator>中文维基百科编者</dc:creator></item>\n' %(i.b.a['title'], href, href, rtext)

            rsstext=rsspage.get(force=True)
            ritem=rsstext.split(u'<item>')
            if i.b.a['title'] not in rsstext:
                if len(ritem)<7:
                    ritem.insert(1, wikitext) 
                    s = u'<item>'.join(ritem)
                else:
                    ritem.insert(1, wikitext) 
                    ritem.pop()
                    s = u'<item>'.join(ritem)
                    s=s+u'\n</channel>\n</rss>'
                rsspage.put(s, u'Bot:新闻动态rss更新: [[%s]]' % i.b.a['title'])
            else:
                print "DYK no update."
    except AttributeError, KeyError:
        pass

wikipedia.stopme()