美文网首页
python爬虫相关

python爬虫相关

作者: 极速魔法 | 来源:发表于2017-04-08 13:57 被阅读16次
    import requests
    #url:图片的链接地址,12.jpg:图片在本地的名称
    url='https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1491640885248&di=119159673e19737701726aa75b667a3b&imgtype=0&src=http%3A%2F%2Fdesk.fd.zol-img.com.cn%2Ft_s960x600c5%2Fg2%2FM00%2F0D%2F05%2FChMlWVW3ScSIEd9SAAGFKpo0iqgAAHvoQLIkAwAAYVC501.jpg'
    html=requests.get(url)
    with open('12.jpg','wb') as f:
        f.write(html.content)
    
    #简单抓取图片,保存为0.jpg,1.jpg类似的格式
    #! -*-coding:utf-8 -*-
    import requests
    from bs4 import BeautifulSoup
    
    html = requests.get('http://www.mmjpg.com/').content
    soup = BeautifulSoup(html, "html.parser")
    listUrls = soup.select('img[src]')
    k = 0
    for list in listUrls:
        print(list.get('src'))
        with open(str(k) + '.jpg', "wb") as f:
            f.write(requests.get(list.get('src')).content)
        k += 1
    
    
    
    #! -*-coding:utf-8 -*-
    import requests
    from bs4 import BeautifulSoup
    
    #爬取豆瓣电影top250,保存到记事本中
    def get_one_page(url):
        head=head={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36'}
        response = requests.get(url,headers=head)
        return response.text
    
    
    def parse_one_page(html):
        #get one page info
        soup = BeautifulSoup(html, "html.parser")
        ranks = soup.find_all('em')
        titles = soup.find_all('img')
        #write datas to douban.txt
        for rank, title in zip(ranks, titles):
            with open('douban.txt', "a+", encoding='utf-8') as f:
                f.write(rank.string + '\t' + title.get('alt') + '\n')
                f.close()
    
    
    if __name__ == '__main__':
        for i in range(10):
            #10 pages urls
            url = 'https://movie.douban.com/top250?start=' + str(i * 25) + '&filter='
            html = get_one_page(url)
            parse_one_page(html)
    
    
    #解析json格式,实现天气预报
    import requests
    import json
    
    url=r'http://wthrcdn.etouch.cn/weather_mini?citykey=101210101'
    jsonStr=requests.get(url).text
    
    data=json.loads(jsonStr)
    weather=data['data']
    print(data)
    print("city:",weather['city'])
    print("wendu:",weather['wendu'])
    
    

    相关文章

      网友评论

          本文标题:python爬虫相关

          本文链接:https://www.haomeiwen.com/subject/jjplattx.html