美文网首页
获取user agent

获取user agent

作者: 孤泉冷月 | 来源:发表于2019-10-25 17:11 被阅读0次

    List of User Agent Strings
    这是一个英文网站,里面记录着很多很多浏览器的user agent
    写了个爬虫把这些user agent都抓了下来,然后写了个随机函数,随机获取user agent

    from bs4 import BeautifulSoup as bs
    from urllib import request
    import json
    import random
    
    
    class UserAgent:
        _url = "http://useragentstring.com/pages/useragentstring.php?name=All"
    
        def __init__(self, cache=True, update=False):
            self._update = update
            if cache:
                self._save_as_json()
    
            pass
    
        def user_agent(self) -> list:
            try:
                user_agent = self._read_json()
            except FileNotFoundError as fnf:
                user_agent = self._get_user_agent_from_html()
            return user_agent
            pass
    
        def _get_user_agent_from_html(self) -> list:
            # 解析网页并获取 user agent 条目
            req = request.Request(url=self._url)
            html = request.urlopen(req).read().decode("iso-8859-1")
            soup = bs(html, "lxml")
            return [li.text for li in soup.select("#liste ul li a")]
    
        def _save_as_json(self):
            # 缓存为 user_agent.json 文件
            try:
                with open("user_agent.json", "x", encoding="utf-8") as fp:
                    json.dump(self._get_user_agent_from_html(), fp)
            except FileExistsError as er:
                if self._update:
                    with open("user_agent.json", "w", encoding="utf-8") as fp:
                        json.dump(self._get_user_agent_from_html(), fp)
            pass
    
        def _read_json(self) -> list:
            # 从缓存文件 user_agent.json 中读取 user_agent
            try:
                with open("user_agent.json", "r") as fp:
                    user_agent = json.load(fp)
                    if len(user_agent) > 0:
                        return user_agent
                    else:
                        self._update = True
                        self._save_as_json()
                        raise FileNotFoundError
            except FileNotFoundError as er:
                raise FileNotFoundError
                pass
            pass
    
        def random(self) -> str:
            return random.choice(self.user_agent())
            pass
    
    
    if __name__ == '__main__':
        ua = UserAgent()
        print(ua.random())
        pass
    
    

    相关文章

      网友评论

          本文标题:获取user agent

          本文链接:https://www.haomeiwen.com/subject/jjfevctx.html