美文网首页ES
elasticsearch python 简单实践

elasticsearch python 简单实践

作者: 第八共同体 | 来源:发表于2017-12-11 16:31 被阅读0次

    1.创建索引

    # -.- coding:utf-8 -.-
    from __future__ import print_function
    from pprint import pprint
    from elasticsearch import Elasticsearch
    
    es_hosts = ["192.168.9.119:9200"]
    index_name = "log"
    doc_type = "20170103"
    
    
    def main():
        es = Elasticsearch(es_hosts)
        es.indices.create(index=index_name, body={"mappings":{doc_type: {"properties": {"name": {"type": "text"}, "gender":{"type": "text"}, "age":{"type": "integer"}, "phone":{"type": "keyword"}}}}})
        res = es.search(index=index_name, body={"query": {"match_all": {}}})
        pprint(res)
        # pprint(es.info())
    
    
    if __name__ == '__main__':
        main()
    

    查询创建的索引

    {
        "log": {
            "aliases": {},
            "mappings": {
                "20170103": {
                    "properties": {
                        "age": {
                            "type": "integer"
                        },
                        "gender": {
                            "type": "text"
                        },
                        "name": {
                            "type": "text"
                        },
                        "phone": {
                            "type": "keyword"
                        }
                    }
                }
            },
            "settings": {
                "index": {
                    "creation_date": "1512980895137",
                    "number_of_shards": "5",
                    "number_of_replicas": "1",
                    "uuid": "TOrOEfoHQiSKX8oqlZ6URw",
                    "version": {
                        "created": "5050099"
                    },
                    "provided_name": "log"
                }
            }
        }
    }
    

    你也可以先创建索引,然后创建type再设置mapping

    def main():
        es = Elasticsearch(es_hosts)
        # es.indices.create(index="students")
        es.indices.put_mapping(index="students", doc_type="yinianji", body={"properties": {"name": {"type": "text"}}})
        res = es.search(index=index_name, body={"query": {"match_all": {}}})
        pprint(res)
        # pprint(es.info())
    
    
    if __name__ == '__main__':
        main()
    

    2.插入数据

    # -.- coding:utf-8 -.-
    from __future__ import print_function
    from pprint import pprint
    from elasticsearch import Elasticsearch
    from elasticsearch import helpers
    
    es_hosts = ["192.168.9.119:9200"]
    index_name = "log"
    doc_type = "20170103"
    body = []
    for i in range(10):
        body.append({
            "_index": "students",
            "_type": "yinianji",
            "_id": i + 1,
            "_source": {
                  "name": 'weishihao'
            }
            })
    
    def main():
        es = Elasticsearch(es_hosts)
        helpers.bulk(es, body)
        res = es.search(index='students', body={"query": {"match_all": {}}})
        pprint(res)
        # pprint(es.info())
    
    
    if __name__ == '__main__':
        main()
    

    3.修改mapping结构

    在elasticsearch中,更改mapping结构只能新增field。所以

    es.indices.put_mapping(
                    index=index_name,
                    doc_type=doc_type,
                    body={
                        "properties": {
                             "county": {"type": "text"},
                             "total" : {"type": "integer"},
                             "gender": {"type": "integer"},
                             "agelow": {"type": "integer"},
                             "agehigh": {"type": "integer"}
                        }
                    }
                )
    
    
    

    4.查询数据

    # -.- coding:utf-8 -.-
    from __future__ import print_function
    from pprint import pprint
    from elasticsearch import Elasticsearch
    from elasticsearch import helpers
    
    es_hosts = ["192.168.9.119:9200"]
    index_name = "log"
    doc_type = "20170103"
    body = []
    for i in range(10):
        body.append({
            "_index": "students",
            "_type": "yinianji",
            "_id": i + 1,
            "_source": {
                  "name": 'weishihao'
            }
            })
    
    def main():
        es = Elasticsearch(es_hosts)
        # helpers.bulk(es, body)
        res = es.search(index='students', doc_type='yinianji', body={"query": {"match_all": {}}})
        pprint(res)
        # pprint(es.info())
    
    
    if __name__ == '__main__':
        main()
    

    返回值中的total值会给出总数据量,但是_source中,返回显示的,默认只有10条
    那么,我们如何查询所有的数据呢,

     es = Elasticsearch(es_hosts)
     scanResp = helpers.scan(es, {"query": {"match_all": {}}}, index= 'quanguorenkou', scroll= "10m") 
        for hit in scanResp:  
            print(hit)
    

    这么我们就可以查询所有的数据了。
    但是如果数据量比较大的时候,我们仅仅需要部分的数据的话,可以指定查询条件,比如:前缀查询:

     scanResp = helpers.scan(es, {"query": {"prefix": {"studentid": {"value": "330"}}}}, index= 'students', scroll= "100m", size=40000)
    

    上述语句实现的功能是查询studentid字段以330开头的所有文档。

    5.删除索引

    es.indices.delete(index=index)
    

    6.一个完整的例子

    # -.- coding:utf-8 -.-
    from __future__ import print_function
    from elasticsearch import Elasticsearch, helpers
    from pprint import pprint
    import sys
    import os
    sys.path.append(os.path.abspath(os.path.pardir))
    
    
    from multiprocessing import current_process, Pool
    from collections import deque
    import time
    import re
    es_hosts = ["192.168.31.13"]
    es_auth = ("elastic", "changeme")
    index_name = 'exportdata'
    doc_type = 'output'
    es = Elasticsearch(es_hosts, http_auth = es_auth)
    es.indices.delete(index=index_name)
    es.indices.create(index=index_name, body=
                          {"mappings":{doc_type:
                                  {"properties": {
                                         "filename": {"type": "text"},
                                         "url":{"type": "text"},
                                         "status":{"type": "integer"},
                                         "date": {"type": "date","format": "yyyy-MM-dd HH:mm:ss"},
                                         "county": {"type": "text"},
                                         "total" : {"type": "integer"},
                                         "gender": {"type": "integer"},
                                         "agelow": {"type": "integer"},
                                         "agehigh": {"type": "integer"}
                                        }
                                  }
                         }})
    #es.indices.put_mapping(
    #                index=index_name,
    #                doc_type=doc_type,
    #                body={
    #                    "properties": {
    #                         "county": {"type": "text"},
    #                         "total" : {"type": "integer"},
    #                         "gender": {"type": "integer"},
    #                         "agelow": {"type": "integer"},
    #                         "agehigh": {"type": "integer"}
    #                    }
    #               }
    #            )
    #from datetime import datetime
    #print(datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'))
    #data = {
    #        "filename": '1233445',
    #        "url": '/root',
    #        "status": 0,
    #        "date": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
    #        #"date": datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'),
    #        "total": 100,
    #        "county": '111111 111112',
    #        "gender": 0,
    #        "agelow": 12,
    #        "agehigh": 18
    #}
    #es.index(index = index_name, doc_type = doc_type, id= '1233445', body = data)
    #es.update(index = index_name, doc_type = doc_type, id='1233445', body={"script": "ctx._source.status = 1"})
    

    相关文章

      网友评论

        本文标题:elasticsearch python 简单实践

        本文链接:https://www.haomeiwen.com/subject/nmniixtx.html