Python 操作 Elasticsearch 并导出全部数据
目录
1. 连接 Elasticsearch 数据库
安装包
pip install elasticsearch
连接 Elasticsearch
from elasticsearch import Elasticsearch
from tqdm import tqdm
import json
import warnings
warnings.filterwarnings('ignore')
es = Elasticsearch(hosts="http://0.0.0.0:9200/")
print(es)
2. 查询所有数据并导出为文件
# 请求体与 Kibana 下使用的格式一致
query_json = {
"query": {
"match_all": {}
}
}
# 执行查询
size = 100
# index 需指定 body为请求体 size 为查询结果的数量 scroll 为游标查询过期时间
query = es.search(index="song", body=query_json, size=size, scroll="5m")
documents = query['hits']['hits']
total = query["hits"]["total"]["value"]
scroll_id = query["_scroll_id"]
print(total)
# 滚屏查询
for index in tqdm(range(int(total / size) + 1)):
doc_scroll = es.scroll(scroll_id=scroll_id, scroll="5m")["hits"]["hits"]
documents += doc_scroll
# 结果保存为 json
with open("song.json", "w", encoding="utf-8") as f:
for doc in documents:
f.write(json.dumps(doc, ensure_ascii=False) + "\n")
print(len(documents))