1.创建索引
# -.- coding:utf-8 -.-
from __future__ import print_function
from pprint import pprint
from elasticsearch import Elasticsearch
es_hosts = ["192.168.9.119:9200"]
index_name = "log"
doc_type = "20170103"
def main():
es = Elasticsearch(es_hosts)
es.indices.create(index=index_name, body={"mappings":{doc_type: {"properties": {"name": {"type": "text"}, "gender":{"type": "text"}, "age":{"type": "integer"}, "phone":{"type": "keyword"}}}}})
res = es.search(index=index_name, body={"query": {"match_all": {}}})
pprint(res)
# pprint(es.info())
if __name__ == '__main__':
main()
查询创建的索引
{
"log": {
"aliases": {},
"mappings": {
"20170103": {
"properties": {
"age": {
"type": "integer"
},
"gender": {
"type": "text"
},
"name": {
"type": "text"
},
"phone": {
"type": "keyword"
}
}
}
},
"settings": {
"index": {
"creation_date": "1512980895137",
"number_of_shards": "5",
"number_of_replicas": "1",
"uuid": "TOrOEfoHQiSKX8oqlZ6URw",
"version": {
"created": "5050099"
},
"provided_name": "log"
}
}
}
}
你也可以先创建索引,然后创建type再设置mapping
def main():
es = Elasticsearch(es_hosts)
# es.indices.create(index="students")
es.indices.put_mapping(index="students", doc_type="yinianji", body={"properties": {"name": {"type": "text"}}})
res = es.search(index=index_name, body={"query": {"match_all": {}}})
pprint(res)
# pprint(es.info())
if __name__ == '__main__':
main()
2.插入数据
# -.- coding:utf-8 -.-
from __future__ import print_function
from pprint import pprint
from elasticsearch import Elasticsearch
from elasticsearch import helpers
es_hosts = ["192.168.9.119:9200"]
index_name = "log"
doc_type = "20170103"
body = []
for i in range(10):
body.append({
"_index": "students",
"_type": "yinianji",
"_id": i + 1,
"_source": {
"name": 'weishihao'
}
})
def main():
es = Elasticsearch(es_hosts)
helpers.bulk(es, body)
res = es.search(index='students', body={"query": {"match_all": {}}})
pprint(res)
# pprint(es.info())
if __name__ == '__main__':
main()
3.修改mapping结构
在elasticsearch中,更改mapping结构只能新增field。所以
es.indices.put_mapping(
index=index_name,
doc_type=doc_type,
body={
"properties": {
"county": {"type": "text"},
"total" : {"type": "integer"},
"gender": {"type": "integer"},
"agelow": {"type": "integer"},
"agehigh": {"type": "integer"}
}
}
)
4.查询数据
# -.- coding:utf-8 -.-
from __future__ import print_function
from pprint import pprint
from elasticsearch import Elasticsearch
from elasticsearch import helpers
es_hosts = ["192.168.9.119:9200"]
index_name = "log"
doc_type = "20170103"
body = []
for i in range(10):
body.append({
"_index": "students",
"_type": "yinianji",
"_id": i + 1,
"_source": {
"name": 'weishihao'
}
})
def main():
es = Elasticsearch(es_hosts)
# helpers.bulk(es, body)
res = es.search(index='students', doc_type='yinianji', body={"query": {"match_all": {}}})
pprint(res)
# pprint(es.info())
if __name__ == '__main__':
main()
返回值中的total值会给出总数据量,但是_source
中,返回显示的,默认只有10条
那么,我们如何查询所有的数据呢,
es = Elasticsearch(es_hosts)
scanResp = helpers.scan(es, {"query": {"match_all": {}}}, index= 'quanguorenkou', scroll= "10m")
for hit in scanResp:
print(hit)
这么我们就可以查询所有的数据了。
但是如果数据量比较大的时候,我们仅仅需要部分的数据的话,可以指定查询条件,比如:前缀查询:
scanResp = helpers.scan(es, {"query": {"prefix": {"studentid": {"value": "330"}}}}, index= 'students', scroll= "100m", size=40000)
上述语句实现的功能是查询studentid字段以330开头的所有文档。
5.删除索引
es.indices.delete(index=index)
6.一个完整的例子
# -.- coding:utf-8 -.-
from __future__ import print_function
from elasticsearch import Elasticsearch, helpers
from pprint import pprint
import sys
import os
sys.path.append(os.path.abspath(os.path.pardir))
from multiprocessing import current_process, Pool
from collections import deque
import time
import re
es_hosts = ["192.168.31.13"]
es_auth = ("elastic", "changeme")
index_name = 'exportdata'
doc_type = 'output'
es = Elasticsearch(es_hosts, http_auth = es_auth)
es.indices.delete(index=index_name)
es.indices.create(index=index_name, body=
{"mappings":{doc_type:
{"properties": {
"filename": {"type": "text"},
"url":{"type": "text"},
"status":{"type": "integer"},
"date": {"type": "date","format": "yyyy-MM-dd HH:mm:ss"},
"county": {"type": "text"},
"total" : {"type": "integer"},
"gender": {"type": "integer"},
"agelow": {"type": "integer"},
"agehigh": {"type": "integer"}
}
}
}})
#es.indices.put_mapping(
# index=index_name,
# doc_type=doc_type,
# body={
# "properties": {
# "county": {"type": "text"},
# "total" : {"type": "integer"},
# "gender": {"type": "integer"},
# "agelow": {"type": "integer"},
# "agehigh": {"type": "integer"}
# }
# }
# )
#from datetime import datetime
#print(datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'))
#data = {
# "filename": '1233445',
# "url": '/root',
# "status": 0,
# "date": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
# #"date": datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'),
# "total": 100,
# "county": '111111 111112',
# "gender": 0,
# "agelow": 12,
# "agehigh": 18
#}
#es.index(index = index_name, doc_type = doc_type, id= '1233445', body = data)
#es.update(index = index_name, doc_type = doc_type, id='1233445', body={"script": "ctx._source.status = 1"})
网友评论