美文网首页
Python3实战:批量下载妹子图片

Python3实战:批量下载妹子图片

作者: 我的袜子都是洞 | 来源:发表于2018-11-13 23:14 被阅读51次

    目标网站:点击进入

    网站截图 下载过程图

    说明:
    代码来源「福利向」Python妹子图爬虫(一)
    不使用框架,简单上手

    实例代码:

    import requests
    from lxml import etree
    import time
    from selenium import webdriver
    import os
    
    PICTURES_PATH = os.path.join(os.getcwd(), './pictures/')
    headers = {
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/65.0.3325.181 Safari/537.36',
        'Referer': "http://www.mmjpg.com"
    }
    
    class Spider(object):
        def __init__(self):
            # 爬取的页码
            self.page_num = 40
            # 目标站点
            self.page_urls = ['http://www.mmjpg.com/']
            self.girl_urls = []
            self.girl_name = ''
            self.pic_urls = []
    
        def get_page_urls(self):
            if int(self.page_num) > 1:
                for n in range(2, int(self.page_num)+1):
                    page_url = 'http://www.mmjpg.com/home/' + str(n)
                    self.page_urls.append(page_url)
            elif int(self.page_num) == 1:
                pass
    
        def get_girl_urls(self):
            for page_url in self.page_urls:
                html = requests.get(page_url).content
                selector = etree.HTML(html)
                self.girl_urls += (selector.xpath('//span[@class="title"]/a/@href'))
    
        def get_pic_urls(self):
            driver = webdriver.Chrome()
            for girl_url in self.girl_urls:
                driver.get(girl_url)
                time.sleep(3)
                driver.find_element_by_xpath('//em[@class="ch all"]').click()
                time.sleep(3)
                # 这里暂停3秒之后获取html的源代码
                html = driver.page_source
                selector = etree.HTML(html)
                self.girl_name = selector.xpath('//div[@class="article"]/h2/text()')[0]
                self.pic_urls = selector.xpath('//div[@id="content"]/img/@data-img')
                try:
                    self.download_pic()
                except Exception as e:
                    print("{}保存失败".format(self.girl_name) + str(e))
    
        def download_pic(self):
            try:
                os.mkdir(PICTURES_PATH)
            except:
                pass
            girl_path = PICTURES_PATH + self.girl_name
            try:
                os.mkdir(girl_path)
            except Exception as e:
                print("{}已存在".format(self.girl_name))
            img_name = 0
            for pic_url in self.pic_urls:
                img_name += 1
                img_data = requests.get(pic_url,headers =headers)
                pic_path = girl_path + '/' + str(img_name)+'.jpg'
                if os.path.isfile(pic_path):
                    print("{}第{}张已存在".format(self.girl_name, img_name))
                    pass
                else:
                    with open(pic_path, 'wb')as f:
                        f.write(img_data.content)
                        print("正在保存{}第{}张".format(self.girl_name, img_name))
                        f.close()
            return
    
    
    
    
    def main():
        spider = Spider()
        spider.get_page_urls()
        spider.get_girl_urls()
        spider.get_pic_urls()
    
    if __name__ == '__main__':
        main()
    

    相关文章

      网友评论

          本文标题:Python3实战:批量下载妹子图片

          本文链接:https://www.haomeiwen.com/subject/ykbrfqtx.html