美文网首页
Pyton利用Ajax提取微博

Pyton利用Ajax提取微博

作者: 我的袜子都是洞 | 来源:发表于2018-10-26 15:03 被阅读4次

    提取马云微博

    from urllib.parse import urlencode
    import requests
    from pyquery import PyQuery as pq
    
    max_page = 14
    
    base_url = 'https://m.weibo.cn/api/container/getIndex?'
    headers = {
        'Host': 'm.weibo.cn',
        'Referer': 'https://m.weibo.cn/u/2145291155',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest',
    }
    
    def get_page(page):
        params = {
            'type': 'uid',
            'value': '2145291155',
            'containerid': '1076032145291155',
            'page': page
        }
        url = base_url + urlencode(params)
        try:
            response = requests.get(url, headers=headers)
            if response.status_code == 200:
                return response.json()
        except requests.ConnectionError as e:
            print('Error', e.args)
    
    def parse_page(json):
        if json:
            data = json.get('data')
            cards = data.get('cards')
            for item in cards:
                mblog = item.get('mblog')
                weibo = {}
                weibo['created_at'] = mblog.get('created_at')
                weibo['text'] = pq(mblog.get('text')).text()
                weibo['attitudes'] = mblog.get('attitudes_count')
                weibo['comments'] = mblog.get('comments_count')
                weibo['reposts'] = mblog.get('reposts_count')
                yield weibo
    
    if __name__ == '__main__':
        for page in range(1,max_page +1):
            json = get_page(page)
            results = parse_page(json)
            for result in results:
                print(result)
    

    修改成提取自己的微博

    from urllib.parse import urlencode
    import requests
    from pyquery import PyQuery as pq
    
    max_page = 14
    
    base_url = 'https://m.weibo.cn/api/container/getIndex?'
    headers = {
        'Host': 'm.weibo.cn',
        'Referer': 'https://m.weibo.cn/u/2145291155',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest',
    }
    
    def get_page(page):
        params = {
            'type': 'uid',
            # 用户uid
            'value': '1583624660',
            # 这个对应的值也需要更改
            'containerid': '1076031583624660',
            'page': page
        }
        url = base_url + urlencode(params)
        try:
            response = requests.get(url, headers=headers)
            if response.status_code == 200:
                return response.json()
        except requests.ConnectionError as e:
            print('Error', e.args)
    
    def parse_page(json):
        if json:
            data = json.get('data')
            cards = data.get('cards')
            for item in cards:
                mblog = item.get('mblog')
                weibo = {}
                weibo['created_at'] = mblog.get('created_at')
                weibo['text'] = pq(mblog.get('text')).text()
                weibo['attitudes'] = mblog.get('attitudes_count')
                weibo['comments'] = mblog.get('comments_count')
                weibo['reposts'] = mblog.get('reposts_count')
                yield weibo
    
    if __name__ == '__main__':
        for page in range(1,max_page +1):
            json = get_page(page)
            results = parse_page(json)
            for result in results:
                print(result)
    

    相关文章

      网友评论

          本文标题:Pyton利用Ajax提取微博

          本文链接:https://www.haomeiwen.com/subject/mxzktqtx.html