美文网首页
小说爬虫

小说爬虫

作者: 淡然z | 来源:发表于2018-08-14 09:32 被阅读0次
    from urllib import request
    from bs4 import BeautifulSoup
    
    if __name__ == '__main__':
        url = 'https://www.biquge.info/11_11668/'
        head = {}
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1'}
        req = request.Request(url, headers=headers)
        response = request.urlopen(req)
        html = response.read()
        soup = BeautifulSoup(html, 'lxml')
        list = soup.find('div', id='list').find_all('dd')
        f = open('imgs' + '/' + '黄金瞳.txt', 'wb')
        for li in list:
            title = li.find('a')['title']
            print(title)
            txt_url = li.find('a')['href']
            download_req = request.Request('https://www.biquge.info/11_11668/'+txt_url, headers=headers)
            download_response = request.urlopen(download_req)
            download_html = download_response.read()
            download_soup = BeautifulSoup(download_html, 'lxml')
            download_soup_texts = download_soup.find('div', id='content')
            download_soup_texts = download_soup_texts.text
            f.write(bytes(title, 'utf-8') )
            f.write(bytes(download_soup_texts,'utf-8'))
            f.write(bytes('\n\n','utf-8'))
        f.close()
    

    相关文章

      网友评论

          本文标题:小说爬虫

          本文链接:https://www.haomeiwen.com/subject/zhdmbftx.html