美文网首页
采集华秋论坛小组信息存入csv文件

采集华秋论坛小组信息存入csv文件

作者: 是东东 | 来源:发表于2021-06-30 18:26 被阅读0次
    import csv
    import time
    import requests
    from lxml import etree
    
    
    def write_to_file(file_path, item, n):
        csv_fileds, csv_items = [], []
        for i, v in item.items():
            csv_fileds.append(i)
            csv_items.append(v)
        with open(file_path, mode='a', encoding='utf-8', newline='') as _w:
            writor = csv.writer(_w)
            if n == 1:
                writor.writerow(csv_fileds)
            writor.writerows([csv_items])
    
    
    def get_group_info():
        n = 1
        for page in range(1, 12 + 1):
            url = f'https://bbs.elecfans.com/group.php?mod=index&orderby=membernum&page={page}'
            print(url)
            req = requests.get(url, headers={'User-Again': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36'})
            time.sleep(5)
            tree = etree.HTML(req.text)
            details = tree.xpath('//div[@class="glist-msg"]')
            for detail in details:
                item = {}
                item['组名'] = ''.join(detail.xpath('./h3/a/@title'))
                item['id'] = ''.join(detail.xpath('./h3/a/@href')).replace('group_', '')
                item['加入成员'] = ''.join(detail.xpath('./div[@class="glist-dec"]/span/text()')).replace('个成员', '')
                file_path = 'group_info.csv'
                write_to_file(file_path, item, n)
                print(item)
                print(f'采集第 {n} 个')
                n += 1
    
    
    if __name__ == '__main__':
        get_group_info()
    

    输出内容

    相关文章

      网友评论

          本文标题:采集华秋论坛小组信息存入csv文件

          本文链接:https://www.haomeiwen.com/subject/qtorultx.html