Python Elasticsearch模块的使用,
参考URL:
https://es.xiaoleilu.com/054_Query_DSL/70_Important_clauses.html
http://www.cnblogs.com/letong/p/4749234.html
背景
最近发现某类日志每天几十万,但是有几百条的丢失,为了查找原因,首先要拿到数据对比,于是乎,就把ES中的数据导出来插入库中和原始数据进行对比,就有了下面的小程序,主要关注的几个点是(采用的是Elasticsearch模块):
这里插入库中又要修改一些东西就干脆只写出放入文件,代码如下:
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# auther : xiaojinsong(61627515@qq.com)
from elasticsearch import Elasticsearch
import logging
import sys
class Myesapi():
def __init__(self, index={}, body={}, doc_type='doc'):
self.clus_es = ['xxxxxx:9200', 'xxxxxx:9200', 'xxxxxx:9200']
self.body = body
self.doc_type = doc_type
self.index = index
def search(self, scroll='2m'):
es = Elasticsearch(self.clus_es)
if self.index:
try:
resp = es.search(index=self.index, doc_type=self.doc_type, body=self.body, scroll=scroll,
size=500)
# scroll_id = resp['_scroll_id']
resp_docs = resp["hits"]["hits"]
total = resp["hits"]["total"]
count = len(resp_docs)
result = resp_docs
while len(resp_docs) > 0:
scroll_id = resp['_scroll_id']
resp = es.scroll(scroll_id=scroll_id, scroll=scroll)
resp_docs = resp["hits"]["hits"]
result.extend(resp_docs)
count += len(resp_docs)
if count >= total:
break
return result
except Exception as msg:
logging.info(msg)
else:
msg = 'you must ensure the correct index'
sys.exit(1) and logging.info(msg)
def delete_index(self):
# es = Elasticsearch(self.clus_es)
pass
def useage():
pass
def main():
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
datefmt='%a, %d %b %Y %H:%M:%S',
filename='/tmp/esapi.log',
)
body1 = {
"query": {
"bool": {
"must": [
{"match_phrase": {"timedate": "2018-08-20"}},
{"match_phrase": {"source": "/app/logs/app/app.2018-08-20"}}
],
"filter": [
{"term": {"appname": "app"}},
{"term": {"host": "xxxxxxxx"}}
],
}
},
"_source" : ["timedate",]
}
body = {
"query": {
"match_all": {}
}
}
index = 'logstash-xxxxx-2018.08.20'
esapi = Myesapi(body=body1, index=index)
result = esapi.search()
for i in result:
try:
with open('timelist.txt', 'a+') as f:
f.writelines(i["_source"]["timedate"]+'\n')
except Exception as e:
logging.info(e)
if __name__ == '__main__':
main()
本站文章为和通数据库网友分享或者投稿,欢迎任何形式的转载,但请务必注明出处.
同时文章内容如有侵犯了您的权益,请联系QQ:970679559,我们会在尽快处理。