Python

作者: 星云之水 | 来源:发表于2017-05-19 10:39 被阅读0次

    1.由KB号取得KB的中文/日语名称

    正则,文件读写,爬虫(代理)
    python2.7测试通过
    将要读的KB号写入文件kb.txt,然后查看kbnum.txt

    # -*- coding:utf-8 -*-
    import urllib
    import urllib2
    import re,sys,os
    reload(sys)                     #设置UTF8,避免乱码
    sys.setdefaultencoding('utf8')  #设置UTF8,避免乱码
    def list(url):
        file_object = open('kbnum.txt', 'a')
        #headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3013.3 Safari/537.36'}
        request=urllib2.Request(url,headers={'User-Agent':'User-Agent:Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3095.5 Safari/537.36','Accept-Language':'ja-JP'})#'zh-CN,zh;q=0.8'    
        response = urllib2.urlopen(request)    
        pageCode = response.read()    
        #print pageCode#.encode('utf-8')    
        pattern=re.compile('"heading": "(.*?)",',re.S)#匹配换行    
        items=re.findall(pattern,pageCode)  
        for item in items:        
            #print url[-8:] + " " + item.decode('utf-8')
            str=url[-8:-1] + "\t" + item.decode('utf-8')+"\n"
            file_object.write(str)  
        file_object.close( )    
    
    def filelist():
        os.remove('kbnum.txt')
        f=open ('kb.txt')
        for line in f:
            url1="https://support.microsoft.com/api/content/help/" + line
            list(url1)
            
    if __name__=='__main__':    
        filelist()
    
    fdfdf
    

    相关文章

      网友评论

          本文标题:Python

          本文链接:https://www.haomeiwen.com/subject/zlxyxxtx.html