美文网首页
pyspider源码-index.py

pyspider源码-index.py

作者: comboo | 来源:发表于2017-04-07 15:02 被阅读266次

    index.py在webui中.通过flask启动一个web来控制爬虫.

    主要有一下api

    @app.route('/')
    def index():
        projectdb = app.config['projectdb']
        projects = sorted(projectdb.get_all(fields=index_fields),
                          key=lambda k: (0 if k['group'] else 1, k['group'] or '', k['name']))
        return render_template("index.html", projects=projects)
    

    首页,projectdb是数据库连接.之后读取数据展示.

    @app.route('/queues')
    def get_queues():
        def try_get_qsize(queue):
            if queue is None:
                return 'None'
            try:
                return queue.qsize()
            except Exception as e:
                return "%r" % e
    
        result = {}
        queues = app.config.get('queues', {})
        for key in queues:
            result[key] = try_get_qsize(queues[key])
        return json.dumps(result), 200, {'Content-Type': 'application/json'}
    

    展示队列.
    queues = app.config.get('queues', {})
    这句话获得5个队列,之前创建过的.
    在通过try_get_qsize()方法获得相应队列的数量

    @app.route('/update', methods=['POST', ])
    def project_update():
        projectdb = app.config['projectdb']
        project = request.form['pk']
        name = request.form['name']
        value = request.form['value']
    
        project_info = projectdb.get(project, fields=('name', 'group'))
        if not project_info:
            return "no such project.", 404
        if 'lock' in projectdb.split_group(project_info.get('group')) \
                and not login.current_user.is_active():
            return app.login_response
    
        if name not in ('group', 'status', 'rate'):
            return 'unknown field: %s' % name, 400
        if name == 'rate':
            value = value.split('/')
            if len(value) != 2:
                return 'format error: rate/burst', 400
            rate = float(value[0])
            burst = float(value[1])
            update = {
                'rate': min(rate, app.config.get('max_rate', rate)),
                'burst': min(burst, app.config.get('max_burst', burst)),
            }
        else:
            update = {
                name: value
            }
    
        ret = projectdb.update(project, update)
        if ret:
            rpc = app.config['scheduler_rpc']
            if rpc is not None:
                try:
                    rpc.update_project()
                except socket.error as e:
                    app.logger.warning('connect to scheduler rpc error: %r', e)
                    return 'rpc error', 200
            return 'ok', 200
        else:
            return 'update error', 500
    

    首页更新,前面获取要更新的数据
    ret = projectdb.update(project, update)
    这句话进行更新.

    if ret:
            rpc = app.config['scheduler_rpc']
            if rpc is not None:
                try:
                    rpc.update_project()
                except socket.error as e:
                    app.logger.warning('connect to scheduler rpc error: %r', e)
                    return 'rpc error', 200
            return 'ok', 200
        else:
            return 'update error', 500
    

    之后这部分通过rpg服务传送数据.

    @app.route('/counter')
    def counter():
        rpc = app.config['scheduler_rpc']
        if rpc is None:
            return json.dumps({})
    
        result = {}
        try:
            data = rpc.webui_update()
            for type, counters in iteritems(data['counter']):
                for project, counter in iteritems(counters):
                    result.setdefault(project, {})[type] = counter
            for project, paused in iteritems(data['pause_status']):
                result.setdefault(project, {})['paused'] = paused
        except socket.error as e:
            app.logger.warning('connect to scheduler rpc error: %r', e)
            return json.dumps({}), 200, {'Content-Type': 'application/json'}
    
        return json.dumps(result), 200, {'Content-Type': 'application/json'}
    

    这个还不知道.现在还没想明白,rpg到底提供什么接口.

    @app.route('/run', methods=['POST', ])
    def runtask():
        rpc = app.config['scheduler_rpc']
        if rpc is None:
            return json.dumps({})
    
        projectdb = app.config['projectdb']
        project = request.form['project']
        project_info = projectdb.get(project, fields=('name', 'group'))
        if not project_info:
            return "no such project.", 404
        if 'lock' in projectdb.split_group(project_info.get('group')) \
                and not login.current_user.is_active():
            return app.login_response
    
        newtask = {
            "project": project,
            "taskid": "on_start",
            "url": "data:,on_start",
            "process": {
                "callback": "on_start",
            },
            "schedule": {
                "age": 0,
                "priority": 9,
                "force_update": True,
            },
        }
    
        try:
            ret = rpc.newtask(newtask)
        except socket.error as e:
            app.logger.warning('connect to scheduler rpc error: %r', e)
            return json.dumps({"result": False}), 200, {'Content-Type': 'application/json'}
        return json.dumps({"result": ret}), 200, {'Content-Type': 'application/json'}
    

    生成newtask,之后通过rpc.newtask开始执行

    最后,不懂rpc到底干什么的,该看调度器了,看了之后应该明白一点.

    相关文章

      网友评论

          本文标题:pyspider源码-index.py

          本文链接:https://www.haomeiwen.com/subject/pnehattx.html