美文网首页
Mooc嵩天老师爬虫教学

Mooc嵩天老师爬虫教学

作者: Melece | 来源:发表于2019-07-26 15:23 被阅读0次
    import requests
    import time
    def getHtmlText(url):
        try:
            r = requests.get(url, timeout = 30)
            r.raise_for_status()
            r.encoding = r.apparent_encoding
            return r.text[:1000];
        except:
            return "产生异常"
        
        
    if __name__ == "__main__":
        url = "https://item.jd.com/100003717483.html"
        s_time = time.time();
        print(getHtmlText(url))
    
    

    import requests
    import os
    import traceback
    
    def getPicture(url, root, path):
        try:
            if not os.path.exists(root): 
                os.mkdir(root)
            if not os.path.exists(path):
                try:
                    r = requests.get(url, timeout = 30)
                    r.raise_for_status()
                    r.encoding = r.apparent_encoding
                except:
                    print("产生异常")
                    return
                with open(path, 'wb') as f:
                    f.write(r.content)
                    f.close()
                    print("保存成功")
            else:
                print("文件已存在")
        except Exception as e:
            print(str(e))
            print("文件存取错误")
        
        
    if __name__ == "__main__":
        root = "C://users//minghua//documents//get//"
        url = "http://img13.360buyimg.com//n0/jfs/t1/60838/7/2192/143412/5d074d65E15353d21/12dd3bb5a9658f3c.jpg"
        path = root + url.split('/')[-1]
        getPicture(url, root, path)
    
    
    import requests
    from bs4 import BeautifulSoup
    import bs4
    
    def getHtmlText(url):
        try:
            r = requests.get(url, timeout = 30)
            r.raise_for_status()
            r.encoding = r.apparent_encoding
            return r.text
        except Exception as e:
            print(e)
            return ""
    
    def makeUniList(text, ulist):
        soup = BeautifulSoup(text,'html.parser')
        for tr in soup.find('tbody').children:
            if(isinstance(tr, bs4.element.Tag)):
                td = tr.find_all('td')
                ulist.append([td[0].string, td[1].string, td[2].string])
                
    
    def printUniList(ulist, num):
        tplt = "{0:^10}\t{1:{3}^20}\t{2:^10}"
        print(tplt.format("排名", "名称", "位置", chr(12288)))
        for i in range(num):
            print(tplt.format(ulist[i][0], ulist[i][1], ulist[i][2], chr(12288)))
    
    def main():
        url = "http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html"
        ulist = []
        text = getHtmlText(url)
        makeUniList(text, ulist)
        printUniList(ulist, 30)
        
    main()
    

    相关文章

      网友评论

          本文标题:Mooc嵩天老师爬虫教学

          本文链接:https://www.haomeiwen.com/subject/eicdrctx.html