美文网首页
Mooc嵩天老师爬虫教学

Mooc嵩天老师爬虫教学

作者: Melece | 来源:发表于2019-07-26 15:23 被阅读0次
import requests
import time
def getHtmlText(url):
    try:
        r = requests.get(url, timeout = 30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text[:1000];
    except:
        return "产生异常"
    
    
if __name__ == "__main__":
    url = "https://item.jd.com/100003717483.html"
    s_time = time.time();
    print(getHtmlText(url))


import requests
import os
import traceback

def getPicture(url, root, path):
    try:
        if not os.path.exists(root): 
            os.mkdir(root)
        if not os.path.exists(path):
            try:
                r = requests.get(url, timeout = 30)
                r.raise_for_status()
                r.encoding = r.apparent_encoding
            except:
                print("产生异常")
                return
            with open(path, 'wb') as f:
                f.write(r.content)
                f.close()
                print("保存成功")
        else:
            print("文件已存在")
    except Exception as e:
        print(str(e))
        print("文件存取错误")
    
    
if __name__ == "__main__":
    root = "C://users//minghua//documents//get//"
    url = "http://img13.360buyimg.com//n0/jfs/t1/60838/7/2192/143412/5d074d65E15353d21/12dd3bb5a9658f3c.jpg"
    path = root + url.split('/')[-1]
    getPicture(url, root, path)

import requests
from bs4 import BeautifulSoup
import bs4

def getHtmlText(url):
    try:
        r = requests.get(url, timeout = 30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except Exception as e:
        print(e)
        return ""

def makeUniList(text, ulist):
    soup = BeautifulSoup(text,'html.parser')
    for tr in soup.find('tbody').children:
        if(isinstance(tr, bs4.element.Tag)):
            td = tr.find_all('td')
            ulist.append([td[0].string, td[1].string, td[2].string])
            

def printUniList(ulist, num):
    tplt = "{0:^10}\t{1:{3}^20}\t{2:^10}"
    print(tplt.format("排名", "名称", "位置", chr(12288)))
    for i in range(num):
        print(tplt.format(ulist[i][0], ulist[i][1], ulist[i][2], chr(12288)))

def main():
    url = "http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html"
    ulist = []
    text = getHtmlText(url)
    makeUniList(text, ulist)
    printUniList(ulist, 30)
    
main()

相关文章

网友评论

      本文标题:Mooc嵩天老师爬虫教学

      本文链接:https://www.haomeiwen.com/subject/eicdrctx.html