美文网首页
scrapy mongodb 数据存储

scrapy mongodb 数据存储

作者: 领悟悟悟 | 来源:发表于2019-01-17 20:01 被阅读0次
    import traceback
    import time
    import motor.motor_asyncio
    
    try:
        # Python 3.x
        from urllib.parse import quote_plus
    except ImportError:
        # Python 2.x
        from urllib import quote_plus
    
    class MongoPipeline(object):
        def __init__(self, client, db_name, collection, settings):
            self.client = client
            self.db = self.client[db_name]
            self.collection = self.db[collection]
            self.settings = settings
    
        @classmethod
        def from_settings(cls, settings):
            dbparms = dict(
                host = settings["MONGODB_HOST"],
                port = settings["MONGODB_PORT"],
                user = settings["MONGODB_USER"],
                passwd = settings["MONGODB_PASSWORD"],
            )
            if dbparms['user']:
                uri = "mongodb://%s:%s@%s:%s" % (
                    quote_plus(dbparms['user']), quote_plus(dbparms['password']), dbparms['host'], dbparms['port'])
            else:
                uri = "mongodb://%s:%s" % (dbparms['host'], dbparms['port'])
            client = motor.motor_asyncio.AsyncIOMotorClient(uri)
            db_name = settings["MONGODB_DBNAME"]
            collection = settings['MONGODB_COLLECTION']
    
            return cls(client, db_name, collection, settings)
    
        def insert_one(self, sql):
            try:
                self.collection.insert_one(sql)
            except Exception as e:
                traceback.print_exc()
    
        def insert_many(self, multi_sql:list):
            try:
                self.collection.insert_many(multi_sql)
            except Exception as e:
                traceback.print_exc()
    
        def process_item(self, item, spider):
            if not self.settings['MONGODB_PIPLINES_ENABLED']: return
            mongodb_sql = item.get_mongodb_insert_sql(spider)
            for sql in mongodb_sql:
                self.insert_one(sql)
    
        def close_spider(self, spider):
            self.client.close()
    

    相关文章

      网友评论

          本文标题:scrapy mongodb 数据存储

          本文链接:https://www.haomeiwen.com/subject/valwdqtx.html