-
Notifications
You must be signed in to change notification settings - Fork 8
/
check_elasticsearch_shard.py
executable file
·155 lines (141 loc) · 6.32 KB
/
check_elasticsearch_shard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/python
##-------------------------------------------------------------------
## @copyright 2017 DennyZhang.com
## Licensed under MIT
## https://www.dennyzhang.com/wp-content/mit_license.txt
##
## File : check_elasticsearch_shard.py
## Author : Denny <https://www.dennyzhang.com/contact>
## Description :
## Make sure ES indices have enough shards.
## Make sure no same shard(primary, replica) are in the same node, to avoid SPOF
## Read more: https://www.dennyzhang.com/es_shard
## --
## Created : <2017-02-24>
## Updated: Time-stamp: <2017-11-13 11:00:53>
##-------------------------------------------------------------------
import argparse
import requests
import sys
import socket
import re
NAGIOS_OK_ERROR=0
NAGIOS_EXIT_ERROR=2
def get_gb_size_from_string(string):
# 10.5gb -> 10.5; 1.2tb -> 1200
val = 0
if string.endswith("gb"):
val = float(string.replace("gb", ""))
elif string.endswith("tb"):
val = float(string.replace("tb", "")) * 1000
elif string.endswith("mb"):
val = float(string.replace("mb", "")) * 0.001
elif string.endswith("kb"):
val = float(string.replace("kb", "")) * 0.001 * 0.001
elif string.endswith("b"):
val = float(string.replace("b", "")) * 0.001 * 0.001 * 0.001
else:
print("ERROR: unexpected. size string: %s" % (string))
sys.exit(NAGIOS_EXIT_ERROR)
return val
def get_es_index_info(es_host, es_port, es_pattern_regexp):
index_list = []
url = "http://%s:%s/_cat/indices?v" % (es_host, es_port)
r = requests.get(url)
'''
Sample output:
root@test:/# curl 172.17.0.8:9200/_cat/indices?v
health status index pri rep docs.count docs.deleted store.size pri.store.size
green open master-index-098f6bcd4621d373cade4e832627b4f6 1 0 1 0 8.1kb 8.1kb
green open master-index-13a1f8adbec032ed68f3d035449ef48d 1 0 1 0 10.6kb 10.6kb
...
...
'''
if r.status_code != 200:
print("ERROR: fail to run REST API: %s" % (url))
sys.exit(NAGIOS_EXIT_ERROR)
# TODO: use python library for ES
# TODO: error handling, if curl requests fails
for line in r.content.split("\n"):
# remove the header, and skip closed ES indices
if line == '' or " index " in line or " close " in line:
continue
else:
line = " ".join(line.split())
l = line.split()
index_name = l[2]
number_of_shards = l[3]
pri_store_size = l[8]
if es_pattern_regexp != "":
m = re.search(es_pattern_regexp, index_name)
# Skip ES index which doesn't match the pattern
if m is None:
continue
index_list.append([index_name, number_of_shards, pri_store_size])
return index_list
def confirm_es_shard_count(es_host, es_port, es_index_list, min_shard_count):
# Check all ES indices have more than $min_shard_count shards
failed_index_list = []
for l in es_index_list:
index_name = l[0]
number_of_shards = int(l[1])
if number_of_shards < min_shard_count:
print("ERROR: index(%s) only has %d shards, less than %d." \
% (index_name, number_of_shards, min_shard_count))
failed_index_list.append(index_name)
return failed_index_list
def confirm_es_shard_size(es_host, es_port, es_index_list, max_shard_size):
# Check all ES indices have more than $min_shard_count shards
failed_index_list = []
for l in es_index_list:
index_name = l[0]
number_of_shards = int(l[1])
pri_store_size = l[2]
avg_shard_size_gb = get_gb_size_from_string(pri_store_size)/number_of_shards
if avg_shard_size_gb > max_shard_size:
print("ERROR: index(%s) has some shards bigger than %s gb." \
% (index_name, max_shard_size))
failed_index_list.append(index_name)
return failed_index_list
# Sample:
# python ./check_elasticsearch_shard.py --es_pattern_regexp "master-.*|staging-.*" \
# --min_shard_count "5" \
# --max_shard_size "60gb"
if __name__ == '__main__':
# get parameters from users
parser = argparse.ArgumentParser()
parser.add_argument('--es_host', required=False, \
help="server ip or hostname for elasticsearch instance. Default value is ip of eth0", type=str)
parser.add_argument('--es_port', default='9200', required=False, \
help="server port for elasticsearch instance", type=str)
parser.add_argument('--es_pattern_regexp', required=False, default='', \
help="ES index name pattern. Only ES indices with matched pattern will be examined", type=str)
parser.add_argument('--min_shard_count', default='3', required=False, \
help='minimal shards each elasticsearch index should have', type=str)
parser.add_argument('--max_shard_size', default='50gb', required=False, \
help='maximum shards size: avoid giant shards', type=str)
l = parser.parse_args()
es_port = l.es_port
min_shard_count = int(l.min_shard_count)
es_pattern_regexp = l.es_pattern_regexp
es_host = l.es_host
max_shard_size = get_gb_size_from_string(l.max_shard_size)
# get ip of eth0, if es_host is not given
if es_host is None:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(("8.8.8.8", 80))
es_host = s.getsockname()[0]
es_index_list = get_es_index_info(es_host, es_port, es_pattern_regexp)
failed_index_list = confirm_es_shard_count(es_host, es_port, es_index_list, min_shard_count)
if len(failed_index_list) != 0:
print("ERROR: Below indices don't have enough shards:\n%s" % \
(",".join(failed_index_list)))
sys.exit(NAGIOS_EXIT_ERROR)
failed_index_list = confirm_es_shard_size(es_host, es_port, es_index_list, max_shard_size)
if len(failed_index_list) != 0:
print("ERROR: Below indices have shards bigger than %s gb:\n%s" % \
(max_shard_size, ",".join(failed_index_list)))
sys.exit(NAGIOS_EXIT_ERROR)
print("OK: all matched ES indices have no less than %d shards. And no shards bigger than %s gb" % \
(min_shard_count, max_shard_size))
## File : check_elasticsearch_shard.py ends