美文网首页程序员
宅男福利 用Python爬取美女图片

宅男福利 用Python爬取美女图片

作者: Miku丨无形 | 来源:发表于2021-01-04 13:11 被阅读0次

    嘿嘿 召唤老色批
    今天带大家爬去一下美女的图片

    用的是requests和xpath去解析

    获取网页和解析网页的函数

    def get_tag(response,tag):
        html=etree.HTML(response)
        ret=html.xpath(tag)
        return ret
    
    def parse_url(url):
        response=requests.get(url,headers=headers)
        return response.text
    

    获取网页url

    def url_find(url):
        r=parse_url(url)
        url_list=get_tag(r,'//*[@id="pins"]/li/span[1]/a/@href')
        title=get_tag(r, '//*[@id="pins"]/li/span[1]/a/text()')
        # print(len(url_list))
        for i in range(len(url_list)):
            url_jpg_find(url_list[i],title[i])
            print(title,'保存完毕')
    

    获取图片的url

    def url_jpg_find(url,title):
        global page
        page=0
        r=parse_url(url)
        url_last=int(get_tag(r,'/html/body/div[2]/div[1]/div[4]/a[5]/span/text()')[0])
        url_list=[url]+[url + '/' + str(i) for i in range(2, url_last + 1)]
        if not os.path.exists(title):
            os.makedirs(title)
        # else:
        #     return
        for i in url_list:
            content_find(i,title)
            # break
    

    获取图片的信息

    def content_find(url,title):
        # print(url)
        r=parse_url(url)
        # print(r)
        name=get_tag(r,'/html/body/div[2]/div[1]/h2/text()')[0]
        url_jpg=get_tag(r,'//div[@class="main-image"]//a/img/@src')[0]
        # print(name,url_jpg)
        time.sleep(0.2)
        save(name,url_jpg,title)
    

    保存图片

    def save(name,url_jpg,title):
        global page
        r=requests.get(url_jpg,headers=headers)
        with open(os.getcwd()+'/'+title+'/'+name+'.jpg','wb') as j:
            j.write(r.content)
        j.close()
        page+=1
        print(page)
    
    
    import requests,os,time
    from lxml import etree
    
    headers={
        "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
        "Referer" : "https://www.mzitu.com",
    }
    
    page=0
    
    def get_tag(response,tag):
        html=etree.HTML(response)
        ret=html.xpath(tag)
        return ret
    
    def parse_url(url):
        response=requests.get(url,headers=headers)
        return response.text
    
    def url_find(url):
        r=parse_url(url)
        url_list=get_tag(r,'//*[@id="pins"]/li/span[1]/a/@href')
        title=get_tag(r, '//*[@id="pins"]/li/span[1]/a/text()')
        # print(len(url_list))
        for i in range(len(url_list)):
            url_jpg_find(url_list[i],title[i])
            print(title,'保存完毕')
    
    def url_jpg_find(url,title):
        global page
        page=0
        r=parse_url(url)
        url_last=int(get_tag(r,'/html/body/div[2]/div[1]/div[4]/a[5]/span/text()')[0])
        url_list=[url]+[url + '/' + str(i) for i in range(2, url_last + 1)]
        if not os.path.exists(title):
            os.makedirs(title)
        # else:
        #     return
        for i in url_list:
            content_find(i,title)
            # break
    
    def content_find(url,title):
        # print(url)
        r=parse_url(url)
        # print(r)
        name=get_tag(r,'/html/body/div[2]/div[1]/h2/text()')[0]
        url_jpg=get_tag(r,'//div[@class="main-image"]//a/img/@src')[0]
        # print(name,url_jpg)
        time.sleep(0.2)
        save(name,url_jpg,title)
    
    def save(name,url_jpg,title):
        global page
        r=requests.get(url_jpg,headers=headers)
        with open(os.getcwd()+'/'+title+'/'+name+'.jpg','wb') as j:
            j.write(r.content)
        j.close()
        page+=1
        print(page)
    
    def main():
        start_url='https://www.mzitu.com'
        r=parse_url(start_url)
        url_last=int(get_tag(r,'/html/body/div[2]/div[1]/div[3]/div/a[4]/text()')[0])
        url='https://www.mzitu.com/page/'
        url_list=['https://www.mzitu.com']+[url+str(i) for i in range(2,url_last+1)]
        # print(url_list)
        for url in url_list:
            url_find(url)
            # break
    
    
    if __name__ == '__main__':
        main()
    

    效果图就不放了
    咳咳 太诱人 会被封掉
    请大家自行脑补一下

    一起学习python,小白指导,教学分享记得私信我

    相关文章

      网友评论

        本文标题:宅男福利 用Python爬取美女图片

        本文链接:https://www.haomeiwen.com/subject/vgfvoktx.html