维基专题:传统百科全书条目/cnki.py

import requests, re, pywikibot, time

def cnkiextract(bid, page):
    try:
        data=requests.get("https://xuewen.cnki.net/DetailList.aspx?code=&key=&bid="+bid+"&cid=&type=&sort=0&otype=0&page="+str(page)).text
    except Exception as e:
        print(e)
        time.sleep(30)
        data=requests.get("https://xuewen.cnki.net/DetailList.aspx?code=&key=&bid="+bid+"&cid=&type=&sort=0&otype=0&page="+str(page)).text
    return re.findall('<a target="_blank" href="([A-Za-z0-9]+\.html)">[ \r\n]+(.*?)[ \r\n]+</a>',data)

def cnki(bid, maxpage):
    data=[]
    for i in range(1,maxpage+1):
        print(i)
        data.extend(cnkiextract(bid, i))
    return data

def createpage(name, bid, page):
     p = pywikibot.Page(pywikibot.Site("zh","wikipedia"), "Wikipedia:传统百科全书条目专题/"+name)
     p.text="\n".join("*[["+i[1]+"]] [https://xuewen.cnki.net/"+i[0]+"]" for i in cnki(bid, maxpage))
     p.save()

# createpage("浙江民国人物大辞典","R201309128",172)