美文网首页ES
elasticsearch python 简单实践

elasticsearch python 简单实践

作者: 第八共同体 | 来源:发表于2017-12-11 16:31 被阅读0次

1.创建索引

# -.- coding:utf-8 -.-
from __future__ import print_function
from pprint import pprint
from elasticsearch import Elasticsearch

es_hosts = ["192.168.9.119:9200"]
index_name = "log"
doc_type = "20170103"


def main():
    es = Elasticsearch(es_hosts)
    es.indices.create(index=index_name, body={"mappings":{doc_type: {"properties": {"name": {"type": "text"}, "gender":{"type": "text"}, "age":{"type": "integer"}, "phone":{"type": "keyword"}}}}})
    res = es.search(index=index_name, body={"query": {"match_all": {}}})
    pprint(res)
    # pprint(es.info())


if __name__ == '__main__':
    main()

查询创建的索引

{
    "log": {
        "aliases": {},
        "mappings": {
            "20170103": {
                "properties": {
                    "age": {
                        "type": "integer"
                    },
                    "gender": {
                        "type": "text"
                    },
                    "name": {
                        "type": "text"
                    },
                    "phone": {
                        "type": "keyword"
                    }
                }
            }
        },
        "settings": {
            "index": {
                "creation_date": "1512980895137",
                "number_of_shards": "5",
                "number_of_replicas": "1",
                "uuid": "TOrOEfoHQiSKX8oqlZ6URw",
                "version": {
                    "created": "5050099"
                },
                "provided_name": "log"
            }
        }
    }
}

你也可以先创建索引,然后创建type再设置mapping

def main():
    es = Elasticsearch(es_hosts)
    # es.indices.create(index="students")
    es.indices.put_mapping(index="students", doc_type="yinianji", body={"properties": {"name": {"type": "text"}}})
    res = es.search(index=index_name, body={"query": {"match_all": {}}})
    pprint(res)
    # pprint(es.info())


if __name__ == '__main__':
    main()

2.插入数据

# -.- coding:utf-8 -.-
from __future__ import print_function
from pprint import pprint
from elasticsearch import Elasticsearch
from elasticsearch import helpers

es_hosts = ["192.168.9.119:9200"]
index_name = "log"
doc_type = "20170103"
body = []
for i in range(10):
    body.append({
        "_index": "students",
        "_type": "yinianji",
        "_id": i + 1,
        "_source": {
              "name": 'weishihao'
        }
        })

def main():
    es = Elasticsearch(es_hosts)
    helpers.bulk(es, body)
    res = es.search(index='students', body={"query": {"match_all": {}}})
    pprint(res)
    # pprint(es.info())


if __name__ == '__main__':
    main()

3.修改mapping结构

在elasticsearch中,更改mapping结构只能新增field。所以

es.indices.put_mapping(
                index=index_name,
                doc_type=doc_type,
                body={
                    "properties": {
                         "county": {"type": "text"},
                         "total" : {"type": "integer"},
                         "gender": {"type": "integer"},
                         "agelow": {"type": "integer"},
                         "agehigh": {"type": "integer"}
                    }
                }
            )


4.查询数据

# -.- coding:utf-8 -.-
from __future__ import print_function
from pprint import pprint
from elasticsearch import Elasticsearch
from elasticsearch import helpers

es_hosts = ["192.168.9.119:9200"]
index_name = "log"
doc_type = "20170103"
body = []
for i in range(10):
    body.append({
        "_index": "students",
        "_type": "yinianji",
        "_id": i + 1,
        "_source": {
              "name": 'weishihao'
        }
        })

def main():
    es = Elasticsearch(es_hosts)
    # helpers.bulk(es, body)
    res = es.search(index='students', doc_type='yinianji', body={"query": {"match_all": {}}})
    pprint(res)
    # pprint(es.info())


if __name__ == '__main__':
    main()

返回值中的total值会给出总数据量,但是_source中,返回显示的,默认只有10条
那么,我们如何查询所有的数据呢,

 es = Elasticsearch(es_hosts)
 scanResp = helpers.scan(es, {"query": {"match_all": {}}}, index= 'quanguorenkou', scroll= "10m") 
    for hit in scanResp:  
        print(hit)

这么我们就可以查询所有的数据了。
但是如果数据量比较大的时候,我们仅仅需要部分的数据的话,可以指定查询条件,比如:前缀查询:

 scanResp = helpers.scan(es, {"query": {"prefix": {"studentid": {"value": "330"}}}}, index= 'students', scroll= "100m", size=40000)

上述语句实现的功能是查询studentid字段以330开头的所有文档。

5.删除索引

es.indices.delete(index=index)

6.一个完整的例子

# -.- coding:utf-8 -.-
from __future__ import print_function
from elasticsearch import Elasticsearch, helpers
from pprint import pprint
import sys
import os
sys.path.append(os.path.abspath(os.path.pardir))


from multiprocessing import current_process, Pool
from collections import deque
import time
import re
es_hosts = ["192.168.31.13"]
es_auth = ("elastic", "changeme")
index_name = 'exportdata'
doc_type = 'output'
es = Elasticsearch(es_hosts, http_auth = es_auth)
es.indices.delete(index=index_name)
es.indices.create(index=index_name, body=
                      {"mappings":{doc_type:
                              {"properties": {
                                     "filename": {"type": "text"},
                                     "url":{"type": "text"},
                                     "status":{"type": "integer"},
                                     "date": {"type": "date","format": "yyyy-MM-dd HH:mm:ss"},
                                     "county": {"type": "text"},
                                     "total" : {"type": "integer"},
                                     "gender": {"type": "integer"},
                                     "agelow": {"type": "integer"},
                                     "agehigh": {"type": "integer"}
                                    }
                              }
                     }})
#es.indices.put_mapping(
#                index=index_name,
#                doc_type=doc_type,
#                body={
#                    "properties": {
#                         "county": {"type": "text"},
#                         "total" : {"type": "integer"},
#                         "gender": {"type": "integer"},
#                         "agelow": {"type": "integer"},
#                         "agehigh": {"type": "integer"}
#                    }
#               }
#            )
#from datetime import datetime
#print(datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'))
#data = {
#        "filename": '1233445',
#        "url": '/root',
#        "status": 0,
#        "date": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
#        #"date": datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'),
#        "total": 100,
#        "county": '111111 111112',
#        "gender": 0,
#        "agelow": 12,
#        "agehigh": 18
#}
#es.index(index = index_name, doc_type = doc_type, id= '1233445', body = data)
#es.update(index = index_name, doc_type = doc_type, id='1233445', body={"script": "ctx._source.status = 1"})

相关文章

网友评论

    本文标题:elasticsearch python 简单实践

    本文链接:https://www.haomeiwen.com/subject/nmniixtx.html