Commit 82799e49 by root

Initial commit

parents
#!/bin/bash
#dell & hp
#0表示故障,1表示警告,2表示OK
PATH=$PATH:/usr/sbin:/sbin
step=$(echo $0|grep -Po '\d+(?=_)')
endpoint=$(ifconfig `route|grep '^default'|awk '{print $NF}'`|grep inet|awk '{print $2}'|awk -F ':' '{print $NF}'|head -n 1)
base_dir=$(cd $(dirname $0);pwd)
cd $base_dir
vendor=$(dmidecode|grep Vendor|awk -F'[: ]' '{print tolower($3)}')
tags="vendor=$vendor"
Json_join(){
metric=$1
value=$2
tags=${3:-""}
countertype=${4:-GAUGE}
jstr=$jstr"{\"endpoint\": \"${endpoint}\", \"metric\": \"${metric}\", \"value\": $value,\"step\": ${step}, \"tags\": \"${tags}\",\"counterType\":\"${countertype}\",\"timestamp\": $(date +%s)},"
}
if [ "X$vendor" == "Xdell" ];then
test -f /usr/bin/hwinfo && value=2 || value=0
Json_join hw.status $value "$tags"
if [ $value -eq 2 ];then
PATH=/sbin:/bin:/usr/sbin:/usr/bin:/opt/dell/srvadmin/sbin:/opt/dell/srvadmin/bin
SHELL=/bin/bash
./hwcheck_dell.py -p -s $step
exit 0
fi
elif [ "X$vendor" == "Xhp" ];then
test -f /usr/sbin/hpacucli && value=2 || value=0
Json_join hw.status $value "$tags"
if [ $value -eq 2 ];then
hpacucli_log='/tmp/hpacucli_log'
temp_log='/tmp/temp_log'
hpacucli ctrl all show config > $hpacucli_log
vdisk=$(grep logicaldrive ${hpacucli_log}|grep -v OK|wc -l|awk '{print $1==0?2:0}')
pdisk=$(grep physicaldrive ${hpacucli_log}|grep -v OK|wc -l|awk '{print $1==0?2:0}')
memory=$(hpasmcli -s 'SHOW DIMM'|grep 'Status' |grep -v Ok|wc -l|awk '{print $1==0?2:0}')
fan=$(hpasmcli -s 'SHOW FANS'|grep '#'|grep -v Yes|wc -l|awk '{print $1==0?2:0}')
power=$(hpasmcli -s 'SHOW POWERSUPPLY'|grep Present|grep -v Yes|wc -l|awk '{print $1==0?2:0}')
cpu=$(hpasmcli -s 'SHOW SERVER' |grep Status|grep -v Ok|wc -l|awk '{print $1==0?2:0}')
raidcard=$(hpssacli ctrl all show status|grep Status|grep -v OK|wc -l|awk '{print $1==0?2:0}')
hpasmcli -s 'SHOW TEMP' >$temp_log
#power_temp=$(awk '/POWER_SUPPLY_BAY/{print $3}' $temp_log|awk -F"C" '{print $1}'|awk 'BEGIN {max = 0} {if ($1>max) max=$1 fi} END {print max}')
#system_temp=$(awk '/SYSTEM_BD/{print $3}' $temp_log|awk -F"C" '{print $1}' |awk 'BEGIN {max = 0} {if ($1>max) max=$1 fi} END {print max}')
ambient_temp=$(awk '/AMBIENT/{print $3}' $temp_log|awk -F"C" '{print $1}')
cpu_temp=$(awk '/PROCESSOR_ZONE/{print $3}' $temp_log|awk -F"C" '{print $1}' |awk 'BEGIN {max = 0} {if ($1>max) max=$1 fi} END {print max}')
memory_temp=$(awk '/MEMORY_BD/{print $3}' $temp_log|awk -F"C" '{print $1}' |awk 'BEGIN {max = 0} {if ($1>max) max=$1 fi} END {print max}')
Json_join hw.raidcard $raidcard $tags
Json_join hw.vdisk $vdisk $tags
Json_join hw.pdisk $pdisk $tags
Json_join hw.memory $memory $tags
Json_join hw.fan $fan $tags
Json_join hw.power $power $tags
Json_join hw.cpu $cpu $tags
Json_join hw.ambient_temp $ambient_temp $tags
Json_join hw.cpu_temp $cpu_temp $tags
Json_join hw.memory_temp $memory_temp $tags
#Json_join hw.power_temp $power_temp $tags
#Json_join hw.system_temp $system_temp $tags
fi
fi
jstr=$(echo $jstr|sed 's/^/[/;s/,$/]/;s/\[$/[]/')
echo $jstr
#!/bin/bash
service=mysqld
step=$(echo $0|grep -Po '\d+(?=_)')
dirname=$(cd $(dirname $0);pwd|awk -F\/ '$0=$NF')
base_dir=$(cd $(dirname $0);pwd)
cd $base_dir
mysqld_max_con=13684
user="monitor"
pass="3IPSkSxDpiPUtlF"
host="127.0.0.1"
endpoint=$(ifconfig `route|grep '^default'|awk '{print $NF}'`|grep inet|awk '{print $2}'|awk -F ':' '{print $NF}'|head -n 1)
Json_join(){
metric=$1
value=$2
tags=${3:-""}
countertype=${4:-GAUGE}
jstr=$jstr"{\"endpoint\": \"${endpoint}\", \"metric\": \"${metric}\", \"value\": $value,\"step\": ${step}, \"tags\": \"${tags}\",\"counterType\":\"${countertype}\",\"timestamp\": $(date +%s)},"
}
metric_arrays=(metric_global_status metric_slave_status metric_global_variables)
metric_global_status=(Aborted_clients:compute Aborted_connects:compute Bytes_received:compute Bytes_sent:compute Com_lock_tables:compute Com_rollback:compute Com_delete:compute Com_insert:compute Com_insert_select:compute Com_load:compute Com_replace:compute Com_select:compute Com_update:compute Qcache_hits:compute Slow_queries:compute Threads_connected:undefined Threads_running:undefined Uptime:undefined Queries:compute)
metric_slave_status=(slave_status:undefined Seconds_Behind_Master:undefined)
#metric_global_variables=(auto_increment_increment:undefined auto_increment_offset:undefined autocommit:undefined binlog_format:undefined general_log:undefined gtid_mode:undefined query_cache_size:undefined query_cache_type:undefined read_only:undefined report_host:undefined report_port:undefined server_id:undefined server_uuid:undefined skip_name_resolve:undefined slave_skip_errors:undefined slow_query_log:undefined sql_mode:undefined time_zone:undefined tx_isolation:undefined version:undefined)
Get_current_value(){
flag=$1
case $flag in
global_status)
sql="show global status"
eval $(mysql -u$user -p$pass -h$host -P$port -e "$sql" 2>/dev/null|awk '{printf("mysqld_%s=\"%s\"\n",$1,$2)}')
;;
slave_status)
sql="show slave status\G"
eval $(mysql -u$user -p$pass -h$host -P$port -e "$sql" 2>/dev/null |grep -v row |grep -v '_Gtid_Set'| grep -v ':\w' | awk -F'[: ]+' 'NR>1&&$0="mysqld_"$2"="$3')
#mysqld_slave_status
if [ ! -z $mysqld_Master_Host ];then
[ $mysqld_Slave_IO_Running == 'Yes' -a $mysqld_Slave_SQL_Running == 'Yes' ] && mysqld_slave_status=1 || mysqld_slave_status=0
fi
;;
# global_variables)
# sql="show global variables"
# eval $(mysql -u$user -p$pass -h$host -P$port -e "$sql" 2>/dev/null|awk '{printf("mysqld_%s=\"%s\"\n",$1,$2)}')
# ;;
esac
}
Push_n9e(){
for metric_array in ${metric_arrays[@]};do
{
for pre_metric in $(eval echo \${$metric_array[@]});do
{
[[ "$pre_metric" =~ ':compute' ]] \
&& countertype="COUNTER" \
|| countertype="GAUGE"
key="${service}_${pre_metric%%:*}"
value=$(eval echo \$$key)
metric="mysql.${pre_metric%%:*}"
[ "X"$value == "X" -o "X"$value == "XNULL" ] && continue
Json_join $metric $value "port=$port" $countertype
}
done
}
done
}
Test_connection_status(){
tags="port=$port"
ret=$(/usr/bin/mysql -u$user -p$pass -h$host -P$port -e 'quit' 2>&1)
#alive
echo "$ret"|grep -qi "Can't connect" && value=1 || value=0
metric="mysql.alive"
Json_join $metric $value "port=$port"
[ $value -eq 1 ] && return $value
#monitor auth
echo "$ret"|grep -qi 'Access denied' && value=1 || value=0
metric="mysql.monitor_auth"
Json_join $metric $value "port=$port"
[ $value -eq 1 ] && return $value
# connection status
echo "$ret"|grep -qi 'Too many connections' && value=1 || value=0
metric="mysql.connection_status"
Json_join $metric $value "port=$port"
return $value
}
Main(){
for port in $(grep $service ../service_port 2>/dev/null|awk '$0=$2');do
{
Test_connection_status || continue
Get_current_value global_status
Get_current_value slave_status
#Get_current_value global_variables
Push_n9e
}
done
jstr=$(echo $jstr|sed 's/^/[/;s/,$/]/;s/\[$/[]/')
echo $jstr
}
Main
#!/usr/bin/env python
#-*- coding:utf-8 -*-
"""
oracle_auth.conf 配置文件内容格式
items:
- {host: 127.0.0.1, port: 1521, user: monitor, passwd: 3IPSkSxDpiPUtlF, dbname: orcl11g}
"""
import os
import sys
import urllib2
import base64
import json
import re
import time
import yaml
import commands
step = int(os.path.basename(__file__).split('_')[0])
ts = int(time.time())
metric_list = ["check_active","rcachehit","dsksortratio","activeusercount","dbsize","dbfilesize","uptime","commits","rollbacks","deadlocks","redowrites","tblscans","tblrowsscans","indexffs","hparsratio","netsent","netresv","netroundtrips","logonscurrent","lastarclog","lastapplarclog","freebufwaits","bufbusywaits","logswcompletion","logfilesync","logprllwrite","enqueue","dbseqread","dbscattread","dbsnglwrite","dbprllwrite","directread","directwrite","latchfree","query_lock","query_redologs","query_rollbacks","query_sessions","query_sysmetrics","fra_use"]
counter_metric_list = ["commits","dbseqread","indexffs","logfilesync","logprllwrite","netresv","netroundtrips","netsent","redowrites","rollbacks","tblrowsscans","tblscans"]
conf_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'oracle_auth.conf')
if not os.path.exists(conf_file): sys.exit(0)
code, endpoint = commands.getstatusoutput("ifconfig `route|grep '^default'|awk '{print $NF}'`|grep inet|awk '{print $2}'|awk -F ':' '{print $NF}'|head -n 1")
if code != 0: sys.exit(0)
f = open(conf_file)
y = yaml.load(f)
f.close()
items = y["items"]
from oracle_pyora import OracleTool
data = []
for item in items:
odb = OracleTool(item)
tablespaces = odb.show_tablespaces()
tablespaces_temp = odb.show_tablespaces_temp()
volumes = odb.show_asm_volumes()
for metric in metric_list:
t = {}
t['metric'] = 'oracle.%s' % metric
t['endpoint'] = endpoint
t['timestamp'] = ts
t['step'] = step
t['counterType'] = 'GAUGE'
if metric in counter_metric_list: t['counterType'] = 'COUNTER'
try:
t['value'] = getattr(odb, metric)()
except:
t['value'] = -1
data.append(t)
for tablespace in tablespaces:
t = {}
t['metric'] = 'oracle.tablespace'
t['endpoint'] = endpoint
t['timestamp'] = ts
t['step'] = step
t['counterType'] = 'GAUGE'
t['tags'] = 'name=%s' % tablespace
try:
t['value'] = odb.tablespace(tablespace)
except:
t['value'] = -1
data.append(t)
for tablespace_temp in tablespaces_temp:
t = {}
t['metric'] = 'oracle.tablespace_temp'
t['endpoint'] = endpoint
t['timestamp'] = ts
t['step'] = step
t['counterType'] = 'GAUGE'
t['tags'] = 'name=%s' % tablespace_temp
try:
t['value'] = odb.tablespace_temp(tablespace_temp)
except:
t['value'] = -1
data.append(t)
for volume in volumes:
t = {}
t['metric'] = 'oracle.volume'
t['endpoint'] = endpoint
t['timestamp'] = ts
t['step'] = step
t['counterType'] = 'GAUGE'
t['tags'] = 'name=%s' % volume
try:
t['value'] = odb.asm_volume_use(volume)
except:
t['value'] = -1
data.append(t)
print(json.dumps(data))
#!/usr/bin/env python
#-*- coding:utf-8 -*-
# alarm value: =-1 or =1024
import os
import sys
import urllib2
import base64
import json
import time
import commands
step = int(os.path.basename(__file__).split('_')[0])
ts = int(time.time())
keys = ('messages_ready', 'messages_unacknowledged')
rates = ('ack', 'deliver', 'deliver_get', 'publish')
code, num = commands.getstatusoutput("grep 'beam.smp 15672' /home/n9e/service_port 2>/dev/null|grep -v grep|wc -l")
if code != 0 or int(num) == 0: sys.exit(0)
code, endpoint = commands.getstatusoutput("ifconfig `route|grep '^default'|awk '{print $NF}'`|grep inet|awk '{print $2}'|awk -F ':' '{print $NF}'|head -n 1")
if code != 0: sys.exit(0)
timeout = 10
p = []
# see #issue4
base64string = base64.b64encode('monitor:monitor')
try:
request = urllib2.Request("http://{}:15672/api/queues".format(endpoint))
request.add_header("Authorization", "Basic %s" % base64string)
result = urllib2.urlopen(request, timeout=timeout)
data = json.loads(result.read())
except:
q = {}
q["endpoint"] = endpoint
q['timestamp'] = ts
q['step'] = step
q['counterType'] = "GAUGE"
q['metric'] = 'rabbitmq.alive'
q['value'] = 1024
q['tags'] = ''
p.append(q)
print(json.dumps(p))
sys.exit(0)
for queue in data:
# ready and unack
msg_total = 0
for key in keys:
if not queue.has_key(key): continue
q = {}
q["endpoint"] = endpoint
q['timestamp'] = ts
q['step'] = step
q['counterType'] = "GAUGE"
q['metric'] = 'rabbitmq.%s' % key
q['tags'] = 'name={}'.format(queue['name'])
q['value'] = int(queue[key])
msg_total += q['value']
p.append(q)
# total
q = {}
q["endpoint"] = endpoint
q['timestamp'] = ts
q['step'] = step
q['counterType'] = "GAUGE"
q['metric'] = 'rabbitmq.messages_total'
q['tags'] = 'name={}'.format(queue['name'])
q['value'] = msg_total
p.append(q)
# rates
for rate in rates:
q = {}
q["endpoint"] = endpoint
q['timestamp'] = ts
q['step'] = step
q['counterType'] = "GAUGE"
q['metric'] = 'rabbitmq.%s_rate' % rate
q['tags'] = 'name={}'.format(queue['name'])
try:
q['value'] = int(queue['message_stats']["%s_details" % rate]['rate'])
except:
q['value'] = 0
p.append(q)
q = {}
q["endpoint"] = endpoint
q['timestamp'] = ts
q['step'] = step
q['counterType'] = "GAUGE"
q['metric'] = 'rabbitmq.alive'
if p:
q['value'] = 1
else:
q['value'] = -1
q['tags'] = ''
p.append(q)
print(json.dumps(p))
#!/bin/bash
#如果有密码需要在当前目录下创建redis_auth.conf文件,内容格式"端口:密码",多个实例换行,一个实例一行
service="redis"
step=$(echo $0|grep -Po '\d+(?=_)')
dirname=$(cd $(dirname $0);pwd|awk -F\/ '$0=$NF')
base_dir=$(cd $(dirname $0);pwd)
cd $base_dir
ip=127.0.0.1
metrics_counter=(total_connections_received rejected_connections keyspace_hits keyspace_misses total_commands_processed total_net_input_bytes total_net_output_bytes expired_keys evicted_keys used_cpu_sys used_cpu_user)
endpoint=$(ifconfig `route|grep '^default'|awk '{print $NF}'`|grep inet|awk '{print $2}'|awk -F ':' '{print $NF}'|head -n 1)
Json_join(){
metric=$1
value=$2
tags=${3:-""}
countertype=${4:-GAUGE}
jstr=$jstr"{\"endpoint\": \"${endpoint}\", \"metric\": \"${metric}\", \"value\": $value,\"step\": ${step}, \"tags\": \"${tags}\",\"counterType\":\"${countertype}\",\"timestamp\": $(date +%s)},"
}
Get_current_value(){
cluster_enabled=0
eval $($redis_cli_cmd -h $ip -p $port info 2>/dev/null|tr -d "\r"|egrep ':'|awk -F: '{printf("%s=\"%s\"\n",$1,$2)}')
[ $cluster_enabled -eq 1 ] && eval $($redis_cli_cmd -h $ip -p $port cluster info 2>/dev/null|tr -d "\r"|sed 's/-/_/g'|egrep ':'|awk -F: '{printf("%s=\"%s\"\n",$1,$2)}')
eval $($redis_cli_cmd -h $ip -p $port info commandstats 2>/dev/null|tr -d "\r"|egrep ':'|sed 's/-/_/g'|awk -F[:=,] '{printf("%s_%s=%s\n%s_%s=%s\n%s_%s=%s\n",$1,$2,$3,$1,$4,$5,$1,$6,$7)}')
maxmemory=$($redis_cli_cmd -h $ip -p $port config get maxmemory 2>/dev/null|sed -n '2p')
test -z $maxmemory && maxmemory=$($redis_cli_cmd -h $ip -p $port sc_config get maxmemory 2>/dev/null|sed -n '2p')
maxclients=$($redis_cli_cmd -h $ip -p $port config get maxclients 2>/dev/null|sed -n '2p')
test -z $maxclients && maxclients=$($redis_cli_cmd -h $ip -p $port sc_config get maxclients 2>/dev/null|sed -n '2p')
}
Push_n9e(){
for metric in $(cat redis_metrics);do
countertype=GAUGE
[[ "${metrics_counter[@]}" =~ "$metric" ]] && countertype=COUNTER
[[ "$metric" =~ "cmdstat_" ]] && countertype=COUNTER
[[ "$metric" =~ "cluster_" ]] && [ $cluster_enabled -eq 0 ] && continue
value=$(eval echo \$$metric)
if [ "X"$metric == 'Xrole' ];then
if [ "X"$value == 'Xmaster' ];then
value=1
else
value=0
master_link_status_value=$(eval echo \$master_link_status)
[ "X"$master_link_status_value == 'Xup' ] && master_link_status_value=1 || master_link_status_value=0
Json_join redis.master_link_status ${master_link_status_value} "port=$port" $countertype
fi
fi
if [ "X"$metric == 'Xrdb_last_bgsave_status' ];then
[ "X"$value == 'Xok' ] && value=1 || value=0
fi
if [ "X"$metric == 'Xaof_last_bgrewrite_status' ];then
[ "X"$value == 'Xok' ] && value=1 || value=0
fi
if [ "X"$metric == 'Xaof_last_write_status' ];then
[ "X"$value == 'Xok' ] && value=1 || value=0
fi
if [ "X"$metric == 'Xcluster_state' ];then
[ "X"$value == 'Xok' ] && value=1 || value=0
fi
[ "X"$value == "X" ] && continue
Json_join redis.$metric $value "port=$port" $countertype
done
}
Test_alive(){
r=$($redis_cli_cmd -h $ip -p $port ping 2>/dev/null) && value=0 || value=1
Json_join redis.alive $value "port=$port" $countertype
[ "X$r" == 'XPONG' ] && value=0 || value=1
Json_join redis.auth_passwd $value "port=$port" $countertype
return $value
}
Test_slowlog(){
$redis_cli_cmd -h $ip -p $port slowlog get 1024 >/tmp/redis_slowlog_$port 2>/dev/null|| return 1
grep -v ^$ /tmp/redis_slowlog_$port >>/opt/redis_slowlog_$port
value=$($redis_cli_cmd -h $ip -p $port slowlog len 2>/dev/null)
$redis_cli_cmd -h $ip -p $port slowlog reset &>/dev/null
Json_join redis.slowlog_len $value "port=$port" $countertype
if [ $value -gt 0 ];then
timestamp=$(date '+%s')
time_str=${timestamp:0:8}
max_time=$(sed -n "/^$time_str/ {n;p}" /tmp/redis_slowlog_$port|sort -n|sed -n '$p')
fi
test -z $max_time && max_time=0
Json_join redis.slowlog_max_time $max_time "port=$port" $countertype
return 0
}
Tast_keyspace_hit_ratio(){
value=0
keyspace_total=$((keyspace_hits + keyspace_misses))
[ $keyspace_total -ne 0 ] && value=$((100 * keyspace_hits / keyspace_total))
Json_join redis.keyspace_hit_ratio $value "port=$port" $countertype
return $value
}
Tast_use_memory(){
value=0
[ $maxmemory -ne 0 ] && value=$((100 * used_memory / maxmemory))
Json_join redis.used_memory_percent $value "port=$port" $countertype
return $value
}
Tast_use_connected_clients(){
value=0
[ $maxclients -ne 0 ] && value=$((100 * connected_clients / maxclients))
Json_join redis.used_connected_clients_percent $value "port=$port" $countertype
return $value
}
Test_use_cpu(){
pid=$(ps aux|grep redis-server|grep :${port}|awk '{print $2}')
value=$(top -b -n1| grep redis-server| grep ${pid}|awk '{print $9}')
Json_join redis.used_cpu_percent $value "port=$port" $countertype
}
Main(){
for port in $(grep $service ../service_port 2>/dev/null| grep -v redis-shake 2>/dev/null|awk '$0=$2');do
#which redis-cli &>/dev/null || exit 1
[ $port -gt 10000 ] && continue
test -f redis_auth.conf && passwd=$(awk -F: '/^'$port':/{print $2}' redis_auth.conf)
test -z $passwd && redis_cli_cmd='redis-cli' || redis_cli_cmd="redis-cli -a $passwd"
Test_alive || continue
Test_slowlog || continue
Get_current_value
Tast_keyspace_hit_ratio
Tast_use_connected_clients
Tast_use_memory
#Test_use_cpu
Push_n9e
done
jstr=$(echo $jstr|sed 's/^/[/;s/,$/]/;s/\[$/[]/')
echo $jstr
}
Main
#!/bin/bash
step=$(echo $0|grep -Po '\d+(?=_)')
service_port=/home/n9e/service_port
base_dir=$(cd $(dirname $0);pwd)
cd $base_dir
endpoint=$(ifconfig `route|grep '^default'|awk '{print $NF}'`|grep inet|awk '{print $2}'|awk -F ':' '{print $NF}'|head -n 1)
Json_join(){
metric=$1
value=$2
tags=${3:-""}
countertype=${4:-GAUGE}
jstr=$jstr"{\"endpoint\": \"${endpoint}\", \"metric\": \"${metric}\", \"value\": $value,\"step\": ${step}, \"tags\": \"${tags}\",\"counterType\":\"${countertype}\",\"timestamp\": $(date +%s)},"
}
Check_plugin(){
Json_join plugin.myself.status 1
}
Check_ntpd(){
#alarm value: >30 or =1024
value=$(ntpq -pn 2>/dev/null|grep ^*|awk '{print $9}')
test -z $value && value=1024
Json_join sys.ntp.offset $value
}
Check_passwd(){
#alarm value: diff(#1)!=0
value=$(stat -c %Y /etc/shadow)
Json_join sys.passwd.modify $value
}
Check_uptime(){
#alarm value: diff(#1)<0
value=$(awk '$0=$1' /proc/uptime)
Json_join sys.uptime.duration $value
}
Service_port() {
service_port_tmp=/tmp/service_port_tmp
if [ ! -f $service_port ];then
ss -tnlp|gawk 'match($0,"[^ ]+:([0-9]+).+users:\\(\\(\"([^\"]+)",a)&&$0=a[2]" "a[1]' |sort |uniq > $service_port
else
ss -tnlp|gawk 'match($0,"[^ ]+:([0-9]+).+users:\\(\\(\"([^\"]+)",a)&&$0=a[2]" "a[1]' |sort |uniq > $service_port_tmp
grep -v -f $service_port $service_port_tmp >> $service_port
fi
}
Check_plugin
Service_port
Check_ntpd
Check_passwd
Check_uptime
jstr=$(echo $jstr|sed 's/^/[/;s/,$/]/;s/\[$/[]/')
echo $jstr
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import time
import json
import commands
metric = ['usr', 'nice', 'sys', 'idle', 'iowait', 'irq', 'soft', 'steal', 'guest']
def get_cpu_core_stat(num):
data = []
for x in range(num):
try:
handler = os.popen("cat /proc/stat | grep cpu%d " % x)
except:
continue
output = handler.read().strip().split()[1:]
if len(output) < 9:
continue
index=0
for m in output:
if len(metric) == index: continue
t = {}
t['endpoint'] = endpoint
t['metric'] = 'cpu.core.%s' % metric[index]
t['timestamp'] = int(time.time())
t['step'] = 60
t['counterType'] = 'COUNTER'
t['tags'] = 'core=%s' % str(x)
t['value'] = int(m)
index += 1
data.append(t)
return data
if __name__ == "__main__":
code, endpoint = commands.getstatusoutput("ifconfig `route|grep '^default'|awk '{print $NF}'`|grep inet|awk '{print $2}'|awk -F ':' '{print $NF}'|head -n 1")
if code != 0:
sys.stderr.write('cannot get local ip')
sys.exit(0)
core_total = int(os.popen("cat /proc/cpuinfo | grep processor | tail -1 | cut -d' ' -f2").read().strip()) + 1
print(json.dumps(get_cpu_core_stat(core_total)))
#!/bin/env python
#-*- coding:utf8 -*-
"""
mongodb_auth.conf 配置文件内容格式
items:
- {port: 20000, user: "",password: ""}
"""
import os
import sys
import time
import yaml
import json
import commands
# all falcon counter type metrics list
mongodb_counter_metric = ["asserts_msg",
"asserts_regular",
"asserts_rollovers",
"asserts_user",
"asserts_warning",
"page_faults",
"connections_totalCreated",
"locks_Global_acquireCount_ISlock",
"locks_Global_acquireCount_IXlock",
"locks_Global_acquireCount_Slock",
"locks_Global_acquireCount_Xlock",
"locks_Global_acquireWaitCount_ISlock",
"locks_Global_acquireWaitCount_IXlock",
"locks_Global_timeAcquiringMicros_ISlock",
"locks_Global_timeAcquiringMicros_IXlock",
"locks_Database_acquireCount_ISlock",
"locks_Database_acquireCount_IXlock",
"locks_Database_acquireCount_Slock",
"locks_Database_acquireCount_Xlock",
"locks_Collection_acquireCount_ISlock",
"locks_Collection_acquireCount_IXlock",
"locks_Collection_acquireCount_Xlock",
"opcounters_command",
"opcounters_insert",
"opcounters_delete",
"opcounters_update",
"opcounters_query",
"opcounters_getmore",
"opcountersRepl_command",
"opcountersRepl_insert",
"opcountersRepl_delete",
"opcountersRepl_update",
"opcountersRepl_query",
"opcountersRepl_getmore",
"network_bytesIn",
"network_bytesOut",
"network_numRequests",
"backgroundFlushing_flushes",
"backgroundFlushing_last_ms",
"cursor_timedOut",
"wt_cache_readinto_bytes",
"wt_cache_writtenfrom_bytes",
"wt_bm_bytes_read",
"wt_bm_bytes_written",
"wt_bm_blocks_read",
"wt_bm_blocks_written"]
ts = int(time.time())
step = int(os.path.basename(__file__).split('_')[0])
conf_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'mongodb_auth.conf')
if not os.path.exists(conf_file): sys.exit(0)
from mongodb_server import mongodbMonitor
code, endpoint = commands.getstatusoutput("ifconfig `route|grep '^default'|awk '{print $NF}'`|grep inet|awk '{print $2}'|awk -F ':' '{print $NF}'|head -n 1")
if code != 0: sys.exit(0)
f = open(conf_file)
y = yaml.load(f)
f.close()
mongodb_items = y["items"]
mongodb_upate_list = []
for mongodb_ins in mongodb_items:
mongodb_monitor = mongodbMonitor()
mongodb_tag = "port=" + str(mongodb_ins["port"])
err, conn = mongodb_monitor.mongodb_connect(host=endpoint, port=mongodb_ins[
"port"], user=mongodb_ins["user"], password=mongodb_ins["password"])
if err != 0:
key_item_dict = {
"endpoint": endpoint,
"metric": "mongodb.local_alive",
"tags": mongodb_tag,
"timestamp": ts,
"value": 0,
"step": step,
"counterType": "GAUGE"}
mongodb_upate_list.append(key_item_dict)
# The instance is dead. upload the "mongo_alive_local=0" key, then
# continue.
continue
mongodb_dict = mongodb_monitor.get_mongo_monitor_data(conn)
mongodb_dict_keys = mongodb_dict.keys()
for mongodb_metric in mongodb_dict_keys:
if mongodb_metric in mongodb_counter_metric:
key_item_dict = {
"endpoint": endpoint,
"metric": "mongodb." + mongodb_metric,
"tags": mongodb_tag,
"timestamp": ts,
"value": mongodb_dict[mongodb_metric],
"step": step,
"counterType": "COUNTER"}
else:
if mongodb_metric == 'mem_supported': continue
key_item_dict = {
"endpoint": endpoint,
"metric": "mongodb." + mongodb_metric,
"tags": mongodb_tag,
"timestamp": ts,
"value": mongodb_dict[mongodb_metric],
"step": step,
"counterType": "GAUGE"}
mongodb_upate_list.append(key_item_dict)
print(json.dumps(mongodb_upate_list))
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""A wrapper script with srvadmin and other tools for hardware monitor.
Supported metrics:
cpu memory raidcard pdisk vdisk raidcard_bat
bios cmos_bat fan power board_temp cpu_temp
"""
import sys
import json
import time
import socket
#import urllib2
import commands
import subprocess
from optparse import OptionParser
host = socket.gethostname()
messages = []
verbs = []
def addverb(metric, model, index, status, info):
m = {}
m['metric'] = metric
m['model'] = model
m['index'] = index
m['status'] = status
m['info'] = info
verbs.append(m)
def addmsg(metric, value):
m = {}
m['metric'] = 'hw.%s' % metric
m['endpoint'] = endpoint
m['tags'] = 'vendor=dell'
m['value'] = value
m['timestamp'] = int(time.time())
m['step'] = int(step)
m['counterType'] = 'GAUGE'
messages.append(m)
def map_value(state):
statemap = {0:['crit', 'critical'],
1:['warn', 'warning', 'non-critical'],
2:['ok', 'ready']
}
for i in statemap:
if state.lower() in statemap[i]:
return i
def execute(cmd):
return commands.getstatusoutput(cmd)
#p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
#return p.communicate()
def check_exec_status():
value = 2
cmd = 'omreport about'
#code, stdout = execute(cmd)
code, out = execute(cmd)
if code != 0: value = 0
addmsg('exec.status', value)
return value
# cpu
def check_cpu():
cmd = 'omreport chassis processors -fmt ssv'
#code, stdout = execute(cmd)
code, stdout = execute(cmd)
cpus = [cpu for cpu in stdout.splitlines() if 'CPU' in cpu]
value = 2
for line in cpus:
i = line.split(';')
Index = i[0].strip().lower()
Status = i[1].strip().lower()
Connector_Name = i[2].strip().lower()
Processor_Brand = i[3].strip().lower()
Processor_Version = i[4].strip().lower()
Current_Speed = i[5].strip().lower()
State = i[6].strip().lower()
Core_Count = i[7].strip().lower()
try:
model = Processor_Brand.split()[3]
except:
model = Processor_Brand.split()[0]
v = map_value(Status)
if v < value:
value = v
addverb('cpu', model, Connector_Name, Status, State)
addmsg('cpu', value)
# memory
def check_memory():
cmd = 'omreport chassis memory -fmt ssv'
#code, stdout = execute(cmd)
code, stdout = execute(cmd)
mems = [mem for mem in stdout.splitlines() if 'DIMM' in mem]
value = 2
for line in mems:
i = line.split(';')
# TODO make sure index here is uniq
Index = i[0].strip()
Status = i[1].strip().lower()
Connector_Name = i[2].strip().lower()
Type = i[3].strip().lower()
Size = i[4].strip()
if Status == 'unknown':
continue
index = Connector_Name.lstrip('dimm_')
v = map_value(Status)
if v < value:
value = v
addverb('memory', Type, Connector_Name, Status, Size)
addmsg('memory', value)
# disk raidcard
def check_raidcard():
cmd = 'omreport storage controller -fmt ssv'
code, stdout = execute(cmd)
ctrlers = [c for c in stdout.splitlines() if 'Applicable' in c]
ids = []
value = 2
if not ctrlers:
return
for line in ctrlers:
i = line.split(';')
ID = i[0].strip()
Status = i[1].strip()
Name = i[2].strip()
Slot_ID = i[3].strip()
State = i[4].strip()
Firmware_Version = i[5].strip()
Latest_Available_Firmware_Version = i[6].strip()
Driver_Version = i[7].strip()
Minimum_Required_Driver_Version = i[8].strip()
Storport_Driver_Version = i[9].strip()
Minimum_Required_Storport_Driver_Version = i[10].strip()
Number_of_Connectors = i[11].strip()
Rebuild_Rate = i[12].strip()
BGI_Rate = i[13].strip()
Check_Consistency_Rate = i[14].strip()
Reconstruct_Rate = i[15].strip()
Alarm_State = i[16].strip()
Cluster_Mode = i[17].strip()
SCSI_Initiator_ID = i[18].strip()
Cache_Memory_Size = i[19].strip()
Patrol_Read_Mode = i[20].strip()
Patrol_Read_State = i[21].strip()
Patrol_Read_Rate = i[22].strip()
Patrol_Read_Iterations = i[23].strip()
Abort_Check_Consistency_on_Error = i[24].strip()
Allow_Revertible_Hot_Spare_and_Replace_Member = i[25].strip()
Load_Balance = i[26].strip()
Auto_Replace_Member_on_Predictive_Failure = i[27].strip()
Redundant_Path_view = i[28].strip()
CacheCade_Capable = i[29].strip()
Persistent_Hot_Spare = i[30].strip()
Encryption_Capable = i[31].strip()
Encryption_Key_Present = i[32].strip()
Encryption_Mode = i[33].strip()
Preserved_Cache = i[34].strip()
if len(i) == 36:
T10_Protection_Information_Capable = i[35].strip()
elif len(i) == 40:
Spin_Down_Unconfigured_Drives = i[35].strip()
Spin_Down_Hot_Spares = i[36].strip()
Spin_Down_Configured_Drives = i[37].strip()
Automatic_Disk_Power_Saving_Idle_C = i[38].strip()
T10_Protection_Information_Capable = i[39].strip()
v = map_value(Status)
if v < value:
value = v
ids.append(ID)
addverb('raidcard', Name, ID, Status, State)
addmsg('raidcard', value)
return(ids)
# pdisk
def check_pdisk(ctrlers=[0]):
if not ctrlers:
return
value = 2
for cid in ctrlers:
cmd = 'omreport storage pdisk controller=%s -fmt ssv' % cid
code, stdout = execute(cmd)
pdisks = [p for p in stdout.splitlines() if 'bytes' in p]
for line in pdisks:
i = line.split(';')
ID = i[0].strip()
Status = i[1].strip()
Name = i[2].strip()
State = i[3].strip()
Power_Status = i[4].strip()
Bus_Protocol = i[5].strip()
Media = i[6].strip()
Part_of_Cache_Pool = i[7].strip()
Remaining_Rated_Write_Endurance = i[8].strip()
Failure_Predicted = i[9].strip()
Revision = i[10].strip()
Driver_Version = i[11].strip()
Model_Number = i[12].strip()
T10_PI_Capable = i[13].strip()
Certified = i[14].strip()
Encryption_Capable = i[15].strip()
Encrypted = i[16].strip()
Progress = i[17].strip()
Mirror_Set_ID = i[18].strip()
Capacity = i[19].strip()
Used_RAID_Disk_Space = i[20].strip()
Available_RAID_Disk_Space = i[21].strip()
Hot_Spare = i[22].strip()
Vendor_ID = i[23].strip()
Product_ID = i[24].strip()
Serial_No = i[25].strip()
Part_Number = i[26].strip()
Negotiated_Speed = i[27].strip()
Capable_Speed = i[28].strip()
PCIe_Maximum_Link_Width = i[29].strip()
PCIe_Negotiated_Link_Width = i[30].strip()
Sector_Size = i[31].strip()
if len(i) > 33:
Device_Write_Cache = i[32].strip()
Manufacture_Day = i[33].strip()
Manufacture_Week = i[34].strip()
Manufacture_Year = i[35].strip()
SAS_Address = i[36].strip()
info = {}
info = {'Bus_Protocol': Bus_Protocol, 'Media': Media,
'Capacity': Capacity, 'State': State,
'Vendor_ID': Vendor_ID,
'Serial_No': Serial_No}
if Progress != 'Not Applicable':
info['Progress'] = Progress
v = map_value(Status)
if v < value:
value = v
addverb('pdisk', Product_ID, ID, Status, info)
addmsg('pdisk', value)
# vdisk
def check_vdisk(ctrlers=[0]):
if not ctrlers:
return
value = 2
for cid in ctrlers:
cmd = 'omreport storage vdisk controller=%s -fmt ssv' % cid
code, stdout = execute(cmd)
vdisks = [v for v in stdout.splitlines() if 'bytes' in v]
for line in vdisks:
i = line.split(';')
ID = i[0].strip()
Status = i[1].strip()
Name = i[2].strip()
State = i[3].strip()
Hot_Spare_Policy_violated = i[4].strip()
if len(i) == 19:
Virtual_Disk_Bad_Blocks = i[5].strip()
Encrypted = i[6].strip()
Layout = i[7].strip()
Size = i[8].strip()
T10_Protection_Information_Status = i[9].strip()
Associated_Fluid_Cache_State = i[10].strip()
Device_Name = i[11].strip()
Bus_Protocol = i[12].strip()
Media = i[13].strip()
Read_Policy = i[14].strip()
Write_Policy = i[15].strip()
Cache_Policy = i[16].strip()
Stripe_Element_Size = i[17].strip()
Disk_Cache_Policy = i[18].strip()
elif len(i) == 18:
Encrypted = i[5].strip()
Layout = i[6].strip()
Size = i[7].strip()
T10_Protection_Information_Status = i[8].strip()
Associated_Fluid_Cache_State = i[9].strip()
Device_Name = i[10].strip()
Bus_Protocol = i[11].strip()
Media = i[12].strip()
Read_Policy = i[13].strip()
Write_Policy = i[14].strip()
Cache_Policy = i[15].strip()
Stripe_Element_Size = i[16].strip()
Disk_Cache_Policy = i[17].strip()
elif len(i) == 16:
Encrypted = i[5].strip()
Layout = i[6].strip()
Size = i[7].strip()
Device_Name = i[8].strip()
Bus_Protocol = i[9].strip()
Media = i[10].strip()
Read_Policy = i[11].strip()
Write_Policy = i[12].strip()
Cache_Policy = i[13].strip()
Stripe_Element_Size = i[14].strip()
Disk_Cache_Policy = i[15].strip()
info = {}
info = {'Bus_Protocol': Bus_Protocol, 'Media': Media,
'Device_Name': Device_Name, 'Size': Size, 'State': State}
if len(i) == 19:
info['Virtual_Disk_Bad_Blocks'] = Virtual_Disk_Bad_Blocks
v = map_value(Status)
if v < value:
value = v
addverb('vdisk', Layout, ID, Status, info)
addmsg('vdisk', value)
# raidcard battery
def check_raidcard_bat():
cmd = 'omreport storage battery -fmt ssv'
code, stdout = execute(cmd)
batteries = [bat for bat in stdout.splitlines() if 'Battery' in bat]
if not batteries:
return
value = 2
for line in batteries:
i = line.split(';')
ID = i[0].strip()
Status = i[1].strip()
Name = i[2].strip()
State = i[3].strip()
Recharge_Count = i[4].strip()
Max_Recharge_Count = i[5].strip()
Learn_State = i[6].strip()
Next_Learn_Time = i[7].strip()
Maximum_Learn_Delay = i[8].strip()
try:
Learn_Mode = i[9].strip()
except:
Learn_Mode = False
v = map_value(Status)
if v < value:
value = v
addverb('raidcard_bat', Name, ID, Status, Learn_State)
addmsg('raidcard_bat', value)
# bios
def check_bios():
cmd = 'omreport chassis biossetup -fmt ssv'
code, stdout = execute(cmd)
bsets = [b for b in stdout.splitlines() if 'C State' in b or 'C1-E' in b or
'C1E' in b]
if not bsets:
return
value = 2
for line in bsets:
i = line.split(';')
ATTRIBUTE = i[0].strip().lower()
if 'c state' in ATTRIBUTE:
index = 'cstate'
else:
index = 'c1e'
VALUE = i[1].strip()
if VALUE == 'Enabled':
Status = 'warn'
elif VALUE == 'Disabled':
Status = 'ok'
else:
continue
v = map_value(Status)
if v < value:
value = v
addverb('bios', "bios_setting", ATTRIBUTE, Status, VALUE)
addmsg('bios', value)
# cmos battery
def check_cmos_bat():
cmd = 'omreport chassis batteries -fmt ssv'
code, stdout = execute(cmd)
bats = [battery for battery in stdout.splitlines() if 'CMOS' in battery]
if not bats:
return
value = 2
for line in bats:
i = line.split(';')
Index = i[0].strip()
Status = i[1].strip()
Probe_Name = i[2].strip()
Reading = i[3].strip()
v = map_value(Status)
if v < value:
value = v
addverb('cmos_bat', Probe_Name, Index, Status, Reading)
addmsg('cmos_bat', value)
# fan
def check_fan():
cmd = 'omreport chassis fans -fmt ssv'
code, stdout = execute(cmd)
fans = [fan for fan in stdout.splitlines() if 'RPM' in fan]
if not fans:
return
value = 2
for line in fans:
i = line.split(';')
Index = i[0].strip()
Status = i[1].strip()
Probe_Name = i[2].strip()
Reading = i[3].strip()
Minimum_Warning_Threshold = i[4].strip()
Maximum_Warning_Threshold = i[5].strip()
Minimum_Failure_Threshold = i[6].strip()
Maximum_Failure_Threshold = i[7].strip()
v = map_value(Status)
if v < value:
value = v
addverb('fan', Probe_Name, Index, Status, Reading)
addmsg('fan', value)
# power
def check_power():
cmd = 'omreport chassis pwrsupplies -fmt ssv'
code, stdout = execute(cmd)
powers = [pwr for pwr in stdout.splitlines() if 'PS' in pwr]
if not powers:
return
value = 2
for line in powers:
i = line.split(';')
Index = i[0].strip()
Status = i[1].strip()
Probe_Name = i[2].strip()
Reading = i[3].strip()
Warning_Threshold = i[4].strip()
Failure_Threshold = i[5].strip()
v = map_value(Status)
if v < value:
value = v
addverb('power', Probe_Name, Index, Status, Reading)
addmsg('power', value)
# power
def check_power_consumption():
cmd = 'omreport chassis pwrmonitoring -fmt ssv'
code, stdout = execute(cmd)
powers = [pwr for pwr in stdout.splitlines() if 'System Board' in pwr]
if not powers:
return
value = 2
for line in powers:
i = line.split(';')
Index = i[0].strip()
Status = i[1].strip()
Probe_Name = i[2].strip()
Reading = i[3].strip()
Warning_Threshold = i[4].strip()
Failure_Threshold = i[5].strip()
v = map_value(Status)
w = Reading.split()[0]
if w > value:
value = int(w)
addverb('power_consumption', Probe_Name, Index, Status, Reading)
addmsg('power_consumption', value)
# board temp
def check_board_temp():
cmd = 'omreport chassis temps -fmt ssv'
code, stdout = execute(cmd)
temp = [t for t in stdout.splitlines() if 'Board' in t]
if not temp:
return
value = 2
for line in temp:
i = line.split(';')
Index = i[0].strip()
Status = i[1].strip()
Probe_Name = i[2].strip()
Reading = i[3].strip().split()[0]
Minimum_Warning_Threshold = i[4].strip()
Maximum_Warning_Threshold = i[5].strip()
Minimum_Failure_Threshold = i[6].strip()
Maximum_Failure_Threshold = i[7].strip()
v = float(Reading)
if v > value:
value = v
addverb('board_temp', Probe_Name, Index, Status, Reading)
if Probe_Name == 'System Board Inlet Temp':
addmsg('ambient_temp', value)
addmsg('board_temp', value)
# cpu temp
def check_cpu_temp():
cmd = 'sensors'
code, stdout = execute(cmd)
lines = stdout.splitlines()
temps = []
id = False
temp = {}
for line in lines:
if line.startswith('coretemp'):
if line != id:
id = line
temp = {}
value = 0
temp['id'] = id
elif line.startswith('Core'):
lastcore = True
key = line.split(':')[0]
vv = line.split(':')[1].split()[0]
v = vv.split('\xc2\xb0C')[0].split('+')[1]
if float(v) > value:
value = float(v)
temp['core'] = key
temp['reading'] = value
elif line == '' and lastcore:
if len(temp) != 0:
temps.append(temp)
else:
lastcore = False
value = 2
for temp in temps:
Index = '%d' % temps.index(temp)
Probe_Name = temp['id']
Reading = temp['reading']
Maximum_Warning_Threshold = 80
Maximum_Failure_Threshold = 90
if Reading >= Maximum_Failure_Threshold:
Status = 'crit'
elif Reading >= Maximum_Warning_Threshold:
Status = 'warn'
else:
Status = 'ok'
if Reading > value:
value = Reading
addverb('cpu_temp', Probe_Name, Index, Status, Reading)
addmsg('cpu_temp', value)
def check(target=False):
if not target:
if check_exec_status() == 0: return messages
check_cpu()
check_memory()
ctrlers = check_raidcard()
check_pdisk(ctrlers=ctrlers)
check_vdisk(ctrlers=ctrlers)
check_raidcard_bat()
check_cmos_bat()
check_bios()
check_fan()
check_power()
check_power_consumption()
check_board_temp()
check_cpu_temp()
elif target == 'cpu':
check_cpu()
elif target == 'memory':
check_memory()
elif target == 'raidcard':
check_raidcard()
elif target == 'pdisk':
c = check_raidcard()
check_pdisk(c)
elif target == 'vdisk':
c = check_raidcard()
check_vdisk(c)
elif target == 'raidcard_bat':
check_raidcard_bat()
elif target == 'cmos_bat':
check_cmos_bat()
elif target == 'bios':
check_bios()
elif target == 'fan':
check_fan()
elif target == 'power':
check_power()
elif target == 'power_consumption':
check_power_consumption()
elif target == 'board_temp':
check_board_temp()
elif target == 'cpu_temp':
check_cpu_temp()
return messages
#def push(message):
# try:
# urllib2.urlopen(
# url = 'http://127.0.0.1:2058/api/collector/push',
# data = json.dumps(message)
# )
# except:
# pass
if __name__ == "__main__":
code, endpoint = commands.getstatusoutput("ifconfig `route|grep '^default'|awk '{print $NF}'`|grep inet|awk '{print $2}'|awk -F ':' '{print $NF}'|head -n 1")
if code != 0:
sys.stderr.write('cannot get local ip')
sys.exit(0)
metrics = ['cpu', 'memory', 'raidcard', 'pdisk', 'vdisk', 'raidcard_bat',
'bios', 'cmos_bat', 'fan', 'power', 'power_consumption', 'board_temp', 'ambient_temp', 'cpu_temp']
parser = OptionParser()
parser.add_option("-p", "--push", action="store_true", dest="push", help="push result to agent")
parser.add_option("-d", "--debug", action="store_true", dest="debug", help="output debug info")
parser.add_option("-m", "--metric", action="store", dest="metric", help="check special metric")
parser.add_option("-s", "--step", action="store", dest="step", help="check special metric")
(options, args) = parser.parse_args()
if not options.step:
step = 600
else:
step = int(options.step)
metric=None
if options.metric:
metric = options.metric
if metric not in metrics:
print(__doc__)
parser.print_help()
exit(1)
messages = check(target=metric)
if options.push:
addmsg('status', 2)
print(json.dumps(messages))
#push(messages)
else:
if options.debug:
print(json.dumps(messages, indent=2))
else:
print(json.dumps(verbs, indent=2))
#!/bin/env python2.6
#-*- coding:utf8 -*-
import sys
import os
import pymongo
from pymongo import MongoClient
class mongodbMonitor(object):
def mongodb_connect(self, host=None, port=None, user=None, password=None):
try:
# conntion timeout 1 sec.
conn = MongoClient(host, port, serverSelectionTimeoutMS=1000)
if user and password:
db_admin = conn["admin"]
if not db_admin.authenticate(user, password):
pass
conn.server_info()
except:
e = sys.exc_info()[0]
return e, None
return 0, conn
# data node(1): standalone, replset primary, replset secondary. mongos(2),
# mongoConfigSrv(3)
def get_mongo_role(self, conn):
mongo_role = 1
conn.server_info()
if (conn.is_mongos):
mongo_role = 2
# Role is a config servers? not mongos and has config.chunks
# collections. it's a config server.
elif ("chunks" in conn.get_database("config").collection_names()):
mongo_role = 3
return mongo_role
def get_mongo_monitor_data(self, conn):
mongo_monitor_dict = {}
# mongo local alive metric for all nodes.
mongo_monitor_dict["local_alive"] = 1
mongo_role = self.get_mongo_role(conn)
if(mongo_role == 1):
mongodb_role, serverStatus_dict = self.serverStatus(conn)
mongo_monitor_dict.update(serverStatus_dict)
repl_status_dict = {}
if (mongodb_role == "master" or mongodb_role == "secondary"):
repl_status_dict = self.repl_status(conn)
mongo_monitor_dict.update(repl_status_dict)
#else:
# pass
# # print "this is standalone node"
elif(mongo_role == 2): # mongos
shards_dict = self.shard_status(conn)
mongo_monitor_dict.update(shards_dict)
return mongo_monitor_dict
def serverStatus(self, connection):
serverStatus = connection.admin.command(
pymongo.son_manipulator.SON([('serverStatus', 1)]))
mongodb_server_dict = {} # mongodb server status metric for upload to falcon
mongo_version = serverStatus["version"]
# uptime metric
mongodb_server_dict["uptime"] = int(serverStatus["uptime"])
# asserts section metrics
mongo_asserts = serverStatus["asserts"]
for asserts_key in mongo_asserts.keys():
asserts_key_name = "asserts_" + asserts_key
mongodb_server_dict[asserts_key_name] = mongo_asserts[asserts_key]
# "extra_info" section metrics: page_faults. falcon counter type.
if "extra_info" in serverStatus:
mongodb_server_dict["page_faults"] = serverStatus[
"extra_info"]["page_faults"]
# "connections" section metrics
current_conn = serverStatus["connections"]["current"]
available_conn = serverStatus["connections"]["available"]
mongodb_server_dict["connections_current"] = current_conn
mongodb_server_dict["connections_available"] = available_conn
# mongodb connection used percent
mongodb_server_dict["connections_used_percent"] = int(
(current_conn * 100 / (current_conn + available_conn)))
# total created from mongodb started. COUNTER metric
mongodb_server_dict["connections_totalCreated"] = serverStatus[
"connections"]["totalCreated"]
# "globalLock" currentQueue
mongodb_server_dict["globalLock_currentQueue_total"] = serverStatus[
"globalLock"]["currentQueue"]["total"]
mongodb_server_dict["globalLock_currentQueue_readers"] = serverStatus[
"globalLock"]["currentQueue"]["readers"]
mongodb_server_dict["globalLock_currentQueue_writers"] = serverStatus[
"globalLock"]["currentQueue"]["writers"]
# "locks" section, Changed in version 3.0
if "locks" in serverStatus and mongo_version > "3.0":
locks_dict_keys = serverStatus["locks"].keys()
for lock_scope in locks_dict_keys: # Global, Database,Collection,Oplog
for lock_metric in serverStatus["locks"][lock_scope]:
for lock_type in serverStatus[
"locks"][lock_scope][lock_metric]:
if lock_type == "R":
lock_name = "Slock"
elif lock_type == "W":
lock_name = "Xlock"
elif lock_type == "r":
lock_name = "ISlock"
elif lock_type == "w":
lock_name = "IXlock"
lock_metric_key = "locks_" + lock_scope + "_" + lock_metric + "_" + lock_name
mongodb_server_dict[lock_metric_key] = serverStatus[
"locks"][lock_scope][lock_metric][lock_type]
# "network" section metrics: bytesIn, bytesOut, numRequests; counter type
if "network" in serverStatus:
for network_metric in serverStatus["network"].keys():
network_metric_key = "network_" + network_metric # network metric key for upload
mongodb_server_dict[network_metric_key] = serverStatus[
"network"][network_metric]
# "opcounters" section metrics: insert, query, update, delete, getmore, command. couter type
if "opcounters" in serverStatus:
for opcounters_metric in serverStatus["opcounters"].keys():
opcounters_metric_key = "opcounters_" + opcounters_metric
mongodb_server_dict[opcounters_metric_key] = serverStatus[
"opcounters"][opcounters_metric]
# "opcountersRepl" section metrics: insert, query, update, delete, getmore, command. couter type
if "opcountersRepl" in serverStatus:
for opcountersRepl_metric in serverStatus["opcountersRepl"].keys():
opcountersRepl_metric_key = "opcountersRepl_" + opcountersRepl_metric
mongodb_server_dict[opcountersRepl_metric_key] = serverStatus[
"opcounters"][opcountersRepl_metric]
# "mem" section metrics:
if "mem" in serverStatus:
for mem_metric in serverStatus["mem"].keys():
mem_metric_key = "mem_" + mem_metric
if(mem_metric in ["bits", "supported"]):
mongodb_server_dict[mem_metric_key] = serverStatus[
"mem"][mem_metric]
else:
mongodb_server_dict[mem_metric_key] = serverStatus[
"mem"][mem_metric] * 1024 * 1024
# "dur" section metrics:
if "dur" in serverStatus:
mongodb_server_dict["dur_journaledBytes"] = serverStatus[
"dur"]["journaledMB"] * 1024 * 1024
mongodb_server_dict["dur_writeToDataFilesBytes"] = serverStatus[
"dur"]["writeToDataFilesMB"] * 1024 * 1024
mongodb_server_dict["dur_commitsInWriteLock"] = serverStatus[
"dur"]["commitsInWriteLock"]
# "repl" section
mongodb_role = ""
if ("repl" in serverStatus and "secondary" in serverStatus["repl"]):
if serverStatus["repl"]["ismaster"]:
mongodb_role = "master"
if serverStatus["repl"]["secondary"]:
mongodb_role = "secondary"
else: # not Replica sets mode
mongodb_role = "standalone"
# "backgroundFlushing" section metrics, only for MMAPv1
if "backgroundFlushing" in serverStatus:
for bgFlush_metric in serverStatus["backgroundFlushing"].keys():
if bgFlush_metric != "last_finished": # discard last_finished metric
bgFlush_metric_key = "backgroundFlushing_" + bgFlush_metric
mongodb_server_dict[bgFlush_metric_key] = serverStatus[
"backgroundFlushing"][bgFlush_metric]
# cursor from "metrics" section
if "metrics" in serverStatus and "cursor" in serverStatus["metrics"]:
cursor_status = serverStatus["metrics"]["cursor"]
mongodb_server_dict["cursor_timedOut"] = cursor_status["timedOut"]
mongodb_server_dict["cursor_open_noTimeout"] = cursor_status[
"open"]["noTimeout"]
mongodb_server_dict["cursor_open_pinned"] = cursor_status[
"open"]["pinned"]
mongodb_server_dict[
"cursor_open_total"] = cursor_status["open"]["total"]
# "wiredTiger" section
if "wiredTiger" in serverStatus:
serverStatus_wt = serverStatus["wiredTiger"]
# cache
wt_cache = serverStatus_wt["cache"]
mongodb_server_dict["wt_cache_used_total_bytes"] = wt_cache[
"bytes currently in the cache"]
mongodb_server_dict["wt_cache_dirty_bytes"] = wt_cache[
"tracked dirty bytes in the cache"]
mongodb_server_dict["wt_cache_readinto_bytes"] = wt_cache[
"bytes read into cache"]
mongodb_server_dict["wt_cache_writtenfrom_bytes"] = wt_cache[
"bytes written from cache"]
# concurrentTransactions
wt_concurrentTransactions = serverStatus_wt[
"concurrentTransactions"]
mongodb_server_dict["wt_concurrentTransactions_write"] = wt_concurrentTransactions[
"write"]["available"]
mongodb_server_dict["wt_concurrentTransactions_read"] = wt_concurrentTransactions[
"read"]["available"]
#"block-manager" section
wt_block_manager = serverStatus_wt["block-manager"]
mongodb_server_dict[
"wt_bm_bytes_read"] = wt_block_manager["bytes read"]
mongodb_server_dict[
"wt_bm_bytes_written"] = wt_block_manager["bytes written"]
mongodb_server_dict[
"wt_bm_blocks_read"] = wt_block_manager["blocks read"]
mongodb_server_dict["wt_bm_blocks_written"] = wt_block_manager[
"blocks written"]
# "rocksdb" engine
if "rocksdb" in serverStatus:
serverStatus_rocksdb = serverStatus["rocksdb"]
mongodb_server_dict["rocksdb_num_immutable_mem_table"] = serverStatus_rocksdb[
"num-immutable-mem-table"]
mongodb_server_dict["rocksdb_mem_table_flush_pending"] = serverStatus_rocksdb[
"mem-table-flush-pending"]
mongodb_server_dict["rocksdb_compaction_pending"] = serverStatus_rocksdb[
"compaction-pending"]
mongodb_server_dict["rocksdb_background_errors"] = serverStatus_rocksdb[
"background-errors"]
mongodb_server_dict["rocksdb_num_entries_active_mem_table"] = serverStatus_rocksdb[
"num-entries-active-mem-table"]
mongodb_server_dict["rocksdb_num_entries_imm_mem_tables"] = serverStatus_rocksdb[
"num-entries-imm-mem-tables"]
mongodb_server_dict[
"rocksdb_num_snapshots"] = serverStatus_rocksdb["num-snapshots"]
mongodb_server_dict["rocksdb_oldest_snapshot_time"] = serverStatus_rocksdb[
"oldest-snapshot-time"]
mongodb_server_dict["rocksdb_num_live_versions"] = serverStatus_rocksdb[
"num-live-versions"]
mongodb_server_dict["rocksdb_total_live_recovery_units"] = serverStatus_rocksdb[
"total-live-recovery-units"]
# "PerconaFT" engine
if "PerconaFT" in serverStatus:
serverStatus_PerconaFT = serverStatus["PerconaFT"]
mongodb_server_dict[
"PerconaFT_log_count"] = serverStatus_PerconaFT["log"]["count"]
mongodb_server_dict[
"PerconaFT_log_time"] = serverStatus_PerconaFT["log"]["time"]
mongodb_server_dict[
"PerconaFT_log_bytes"] = serverStatus_PerconaFT["log"]["bytes"]
mongodb_server_dict["PerconaFT_fsync_count"] = serverStatus_PerconaFT[
"fsync"]["count"]
mongodb_server_dict[
"PerconaFT_fsync_time"] = serverStatus_PerconaFT["fsync"]["time"]
# cachetable
PerconaFT_cachetable = serverStatus_PerconaFT["cachetable"]
mongodb_server_dict[
"PerconaFT_cachetable_size_current"] = PerconaFT_cachetable["size"]["current"]
mongodb_server_dict[
"PerconaFT_cachetable_size_writing"] = PerconaFT_cachetable["size"]["writing"]
mongodb_server_dict[
"PerconaFT_cachetable_size_limit"] = PerconaFT_cachetable["size"]["limit"]
# PerconaFT checkpoint
PerconaFT_checkpoint = serverStatus_PerconaFT["checkpoint"]
mongodb_server_dict[
"PerconaFT_checkpoint_count"] = PerconaFT_checkpoint["count"]
mongodb_server_dict[
"PerconaFT_checkpoint_time"] = PerconaFT_checkpoint["time"]
mongodb_server_dict["PerconaFT_checkpoint_write_nonleaf_count"] = PerconaFT_checkpoint[
"write"]["nonleaf"]["count"]
mongodb_server_dict["PerconaFT_checkpoint_write_nonleaf_time"] = PerconaFT_checkpoint[
"write"]["nonleaf"]["time"]
mongodb_server_dict["PerconaFT_checkpoint_write_nonleaf_bytes_compressed"] = PerconaFT_checkpoint[
"write"]["nonleaf"]["bytes"]["compressed"]
mongodb_server_dict["PerconaFT_checkpoint_write_nonleaf_bytes_uncompressed"] = PerconaFT_checkpoint[
"write"]["nonleaf"]["bytes"]["uncompressed"]
mongodb_server_dict["PerconaFT_checkpoint_write_leaf_count"] = PerconaFT_checkpoint[
"write"]["leaf"]["count"]
mongodb_server_dict["PerconaFT_checkpoint_write_leaf_time"] = PerconaFT_checkpoint[
"write"]["leaf"]["time"]
mongodb_server_dict["PerconaFT_checkpoint_write_leaf_bytes_compressed"] = PerconaFT_checkpoint[
"write"]["leaf"]["bytes"]["compressed"]
mongodb_server_dict["PerconaFT_checkpoint_write_leaf_bytes_uncompressed"] = PerconaFT_checkpoint[
"write"]["leaf"]["bytes"]["uncompressed"]
# serializeTime
for serializeTime_item in serverStatus_PerconaFT["serializeTime"]:
prefix = "PerconaFT_serializeTime_" + serializeTime_item
for serializeTime_key in serverStatus_PerconaFT[
"serializeTime"][serializeTime_item]:
key_name = prefix + "_" + serializeTime_key
mongodb_server_dict[key_name] = serverStatus_PerconaFT[
"serializeTime"][serializeTime_item][serializeTime_key]
# PerconaFT compressionRatio
for compressionRatio_item in serverStatus_PerconaFT[
"compressionRatio"]:
key_name = "PerconaFT_compressionRatio_" + compressionRatio_item
mongodb_server_dict[key_name] = serverStatus_PerconaFT[
"compressionRatio"][compressionRatio_item]
return (mongodb_role, mongodb_server_dict)
def repl_status(self, connection):
replStatus = connection.admin.command("replSetGetStatus")
repl_status_dict = {} # repl set metric dict
# myState "1" for PRIMARY , "2" for SECONDARY, "3":
repl_status_dict["repl_myState"] = replStatus["myState"]
repl_status_members = replStatus["members"]
master_optime = 0 # Master oplog ops time
myself_optime = 0 # SECONDARY oplog ops time
for repl_member in repl_status_members:
if "self" in repl_member and repl_member["self"]:
repl_status_dict["repl_health"] = repl_member["health"]
#repl_status_dict["repl_optime"] = repl_member["optime"].time
if "repl_electionTime" in repl_member:
repl_status_dict["repl_electionTime"] = repl_member[
"electionTime"].time
if "repl_configVersion" in repl_member:
repl_status_dict["repl_configVersion"] = repl_member[
"configVersion"]
#myself_optime = repl_member["optime"].time
#if (replStatus["myState"] == 2 and repl_member[
# "state"] == 1): # CONDARY ,get repl lag
# master_optime = repl_member["optime"].time
#if replStatus["myState"] == 2:
# repl_status_dict["repl_lag"] = master_optime - myself_optime
# oplog window hours
oplog_collection = connection["local"]["oplog.rs"]
oplog_tFirst = oplog_collection.find({}, {"ts": 1}).sort(
'$natural', pymongo.ASCENDING).limit(1).next()
oplog_tLast = oplog_collection.find({}, {"ts": 1}).sort(
'$natural', pymongo.DESCENDING).limit(1).next()
oplogrs_collstats = connection[
"local"].command("collstats", "oplog.rs")
window_multiple = 1 # oplog.rs collections is not full
if "maxSize" in oplogrs_collstats:
window_multiple = oplogrs_collstats[
"maxSize"] / (oplogrs_collstats["count"] * oplogrs_collstats["avgObjSize"])
else:
window_multiple = oplogrs_collstats[
"storageSize"] / (oplogrs_collstats["count"] * oplogrs_collstats["avgObjSize"])
# oplog_window .xx hours
oplog_window = round(
(oplog_tLast["ts"].time - oplog_tFirst["ts"].time) / 3600.0,
2) * window_multiple # full
repl_status_dict["repl_oplog_window"] = oplog_window
return repl_status_dict
# only for mongos node
def shard_status(self, conn):
config_db = conn["config"]
settings_col = config_db["settings"]
balancer_doc = settings_col.find_one({'_id': 'balancer'})
shards_dict = {}
if balancer_doc is None:
shards_dict["shards_BalancerState"] = 1
elif balancer_doc["stopped"]:
shards_dict["shards_BalancerState"] = 0
else:
shards_dict["shards_BalancerState"] = 1
# shards_activeWindow metric,0: without setting, 1:setting
# shards_activeWindow_start metric, { "start" : "23:30", "stop" : "6:00" } : 23.30 for 23:30
# shards_activeWindow_stop metric
if balancer_doc is None:
shards_dict["shards_activeWindow"] = 0
elif "activeWindow" in balancer_doc:
shards_dict["shards_activeWindow"] = 1
if "start" in balancer_doc["activeWindow"]:
window_start = balancer_doc["activeWindow"]["start"]
shards_dict["shards_activeWindow_start"] = window_start.replace(
":", ".")
if "stop" in balancer_doc["activeWindow"]:
window_stop = balancer_doc["activeWindow"]["stop"]
shards_dict["shards_activeWindow_stop"] = window_stop.replace(
":", ".")
# shards_chunkSize metric
chunksize_doc = settings_col.find_one({"_id": "chunksize"})
if chunksize_doc is not None:
shards_dict["shards_chunkSize"] = chunksize_doc["value"]
# shards_isBalancerRunning metric
locks_col = config_db["locks"]
balancer_lock_doc = locks_col.find_one({'_id': 'balancer'})
if balancer_lock_doc is None:
# print "config.locks collection empty or missing. be sure you are
# connected to a mongos"
shards_dict["shards_isBalancerRunning"] = 0
elif balancer_lock_doc["state"] > 0:
shards_dict["shards_isBalancerRunning"] = 1
else:
shards_dict["shards_isBalancerRunning"] = 0
# shards_size metric
shards_col = config_db["shards"]
shards_dict["shards_size"] = shards_col.count()
# shards_mongosSize metric
mongos_col = config_db["mongos"]
shards_dict["shards_mongosSize"] = mongos_col.count()
return shards_dict
#!/usr/bin/env python
# coding: utf-8
import cx_Oracle
import inspect
import json
import re
class OracleTool(object):
def __init__(self, dbconfig):
self.host = dbconfig['host']
self.port = dbconfig['port']
self.user = dbconfig['user']
self.passwd = dbconfig['passwd']
self.dbname = dbconfig['dbname']
self.db_connect()
def db_connect(self):
self.db = cx_Oracle.connect("{0}/{1}@{2}:{3}/{4}".format(self.user, self.passwd, self.host, self.port, self.dbname))
self.cur = self.db.cursor()
def db_close(self):
self.cur.close()
self.db.close()
def check_active(self):
"""Check Intance is active and open"""
sql = "select to_char(case when inst_cnt > 0 then 1 else 0 end, \
'FM99999999999999990') retvalue from (select count(*) inst_cnt \
from v$instance where status = 'OPEN' and logins = 'ALLOWED' \
and database_status = 'ACTIVE')"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def rcachehit(self):
"""Read Cache hit ratio"""
sql = "SELECT nvl(to_char((1 - (phy.value - lob.value - dir.value) / \
ses.value) * 100, 'FM99999990.9999'), '0') retvalue \
FROM v$sysstat ses, v$sysstat lob, \
v$sysstat dir, v$sysstat phy \
WHERE ses.name = 'session logical reads' \
AND dir.name = 'physical reads direct' \
AND lob.name = 'physical reads direct (lob)' \
AND phy.name = 'physical reads'"
self.cur.execute(sql)
res = self.cur.fetchall()
return round(float(res[0][0]),2)
def dsksortratio(self):
"""Disk sorts ratio"""
sql = "SELECT nvl(to_char(d.value/(d.value + m.value)*100, \
'FM99999990.9999'), '0') retvalue \
FROM v$sysstat m, v$sysstat d \
WHERE m.name = 'sorts (memory)' \
AND d.name = 'sorts (disk)'"
self.cur.execute(sql)
res = self.cur.fetchall()
return round(float(res[0][0]),2)
def activeusercount(self):
"""Count of active users"""
sql = "select to_char(count(*)-1, 'FM99999999999999990') retvalue \
from v$session where username is not null \
and status='ACTIVE'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def dbsize(self):
"""Size of user data (without temp)"""
sql = "SELECT to_char(sum( NVL(a.bytes - NVL(f.bytes, 0), 0)), \
'FM99999999999999990') retvalue \
FROM sys.dba_tablespaces d, \
(select tablespace_name, sum(bytes) bytes from dba_data_files \
group by tablespace_name) a, \
(select tablespace_name, sum(bytes) bytes from \
dba_free_space group by tablespace_name) f \
WHERE d.tablespace_name = a.tablespace_name(+) AND \
d.tablespace_name = f.tablespace_name(+) \
AND NOT (d.extent_management like 'LOCAL' AND d.contents \
like 'TEMPORARY')"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def dbfilesize(self):
"""Size of all datafiles"""
sql = "select to_char(sum(bytes), 'FM99999999999999990') retvalue \
from dba_data_files"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def version(self):
"""Oracle version (Banner)"""
sql = "select banner from v$version where rownum=1"
self.cur.execute(sql)
res = self.cur.fetchall()
return res[0][0]
def uptime(self):
"""Instance Uptime (seconds)"""
sql = "select to_char((sysdate-startup_time)*86400, \
'FM99999999999999990') retvalue from v$instance"
self.cur.execute(sql)
res = self.cur.fetchmany(numRows=3)
return int(res[0][0])
def commits(self):
"""User Commits"""
sql = "select nvl(to_char(value, 'FM99999999999999990'), '0') retvalue from \
v$sysstat where name = 'user commits'"
self.cur.execute(sql)
res = self.cur.fetchmany(numRows=3)
return int(res[0][0])
def rollbacks(self):
"""User Rollbacks"""
sql = "select nvl(to_char(value, 'FM99999999999999990'), '0') retvalue from " \
"v$sysstat where name = 'user rollbacks'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def deadlocks(self):
"""Deadlocks"""
sql = "select nvl(to_char(value, 'FM99999999999999990'), '0') retvalue from \
v$sysstat where name = 'enqueue deadlocks'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def redowrites(self):
"""Redo Writes"""
sql = "select nvl(to_char(value, 'FM99999999999999990'), '0') retvalue from \
v$sysstat where name = 'redo writes'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def tblscans(self):
"""Table scans (long tables)"""
sql = "select nvl(to_char(value, 'FM99999999999999990'), '0') retvalue from \
v$sysstat where name = 'table scans (long tables)'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def tblrowsscans(self):
"""Table scan rows gotten"""
sql = "select nvl(to_char(value, 'FM99999999999999990'), '0') retvalue from \
v$sysstat where name = 'table scan rows gotten'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def indexffs(self):
"""Index fast full scans (full)"""
sql = "select nvl(to_char(value, 'FM99999999999999990'), '0') retvalue from \
v$sysstat where name = 'index fast full scans (full)'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def hparsratio(self):
"""Hard parse ratio"""
sql = "SELECT nvl(to_char(h.value/t.value*100,'FM99999990.9999'), '0') \
retvalue FROM v$sysstat h, v$sysstat t WHERE h.name = 'parse \
count (hard)' AND t.name = 'parse count (total)'"
self.cur.execute(sql)
res = self.cur.fetchall()
return round(float(res[0][0]),2)
def netsent(self):
"""Bytes sent via SQL*Net to client"""
sql = "select nvl(to_char(value, 'FM99999999999999990'), '0') retvalue from \
v$sysstat where name = 'bytes sent via SQL*Net to client'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def netresv(self):
"""Bytes received via SQL*Net from client"""
sql = "select nvl(to_char(value, 'FM99999999999999990'), '0') retvalue from \
v$sysstat where name = 'bytes received via SQL*Net from client'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def netroundtrips(self):
"""SQL*Net roundtrips to/from client"""
sql = "select nvl(to_char(value, 'FM99999999999999990'), '0') retvalue from \
v$sysstat where name = 'SQL*Net roundtrips to/from client'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def logonscurrent(self):
"""Logons current"""
sql = "select nvl(to_char(value, 'FM99999999999999990'), '0') retvalue from \
v$sysstat where name = 'logons current'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def lastarclog(self):
"""Last archived log sequence"""
sql = "select to_char(max(SEQUENCE#), 'FM99999999999999990') \
retvalue from v$log where archived = 'YES'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def lastapplarclog(self):
"""Last applied archive log (at standby).Next items requires
[timed_statistics = true]"""
sql = "select to_char(max(lh.SEQUENCE#), 'FM99999999999999990') \
retvalue from v$loghist lh, v$archived_log al \
where lh.SEQUENCE# = al.SEQUENCE# and applied='YES'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def freebufwaits(self):
"""Free buffer waits"""
sql = "select nvl(to_char(time_waited, 'FM99999999999999990'), '0') retvalue \
from v$system_event se, v$event_name en \
where se.event(+) = en.name and en.name = 'free buffer waits'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def bufbusywaits(self):
"""Buffer busy waits"""
sql = "select nvl(to_char(time_waited, 'FM99999999999999990'), '0') retvalue \
from v$system_event se, v$event_name en where se.event(+) = \
en.name and en.name = 'buffer busy waits'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def logswcompletion(self):
"""log file switch completion"""
sql = "select nvl(to_char(time_waited, 'FM99999999999999990'), '0') retvalue \
from v$system_event se, v$event_name en where se.event(+) \
= en.name and en.name = 'log file switch completion'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def logfilesync(self):
"""Log file sync"""
sql = "select nvl(to_char(time_waited, 'FM99999999999999990'), '0') retvalue \
from v$system_event se, v$event_name en \
where se.event(+) = en.name and en.name = 'log file sync'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def logprllwrite(self):
"""Log file parallel write"""
sql = "select nvl(to_char(time_waited, 'FM99999999999999990'), '0') retvalue \
from v$system_event se, v$event_name en where se.event(+) \
= en.name and en.name = 'log file parallel write'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def enqueue(self):
"""Enqueue waits"""
sql = "select nvl(to_char(time_waited, 'FM99999999999999990'), '0') retvalue \
from v$system_event se, v$event_name en \
where se.event(+) = en.name and en.name = 'enqueue'"
self.cur.execute(sql)
res = self.cur.fetchall()
if not res: return 0
return int(res[0][0])
def dbseqread(self):
"""DB file sequential read waits"""
sql = "select nvl(to_char(time_waited, 'FM99999999999999990'), '0') retvalue \
from v$system_event se, v$event_name en where se.event(+) \
= en.name and en.name = 'db file sequential read'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def dbscattread(self):
"""DB file scattered read"""
sql = "select nvl(to_char(time_waited, 'FM99999999999999990'), '0') retvalue \
from v$system_event se, v$event_name en where se.event(+) \
= en.name and en.name = 'db file scattered read'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def dbsnglwrite(self):
"""DB file single write"""
sql = "select nvl(to_char(time_waited, 'FM99999999999999990'), '0') retvalue \
from v$system_event se, v$event_name en where se.event(+) \
= en.name and en.name = 'db file single write'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def dbprllwrite(self):
"""DB file parallel write"""
sql = "select nvl(to_char(time_waited, 'FM99999999999999990'), '0') retvalue \
from v$system_event se, v$event_name en where se.event(+) \
= en.name and en.name = 'db file parallel write'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def directread(self):
"""Direct path read"""
sql = "select nvl(to_char(time_waited, 'FM99999999999999990'), '0') retvalue \
from v$system_event se, v$event_name en where se.event(+) \
= en.name and en.name = 'direct path read'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def directwrite(self):
"""Direct path write"""
sql = "select nvl(to_char(time_waited, 'FM99999999999999990'), '0') retvalue \
from v$system_event se, v$event_name en where se.event(+) \
= en.name and en.name = 'direct path write'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def latchfree(self):
"""latch free"""
sql = "select nvl(to_char(time_waited, 'FM99999999999999990'), '0') retvalue \
from v$system_event se, v$event_name en where se.event(+) \
= en.name and en.name = 'latch free'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def tablespace(self, name):
"""Get tablespace usage"""
sql = '''SELECT tablespace_name,
100-(TRUNC((max_free_mb/max_size_mb) * 100)) AS USED
FROM ( SELECT a.tablespace_name,b.size_mb,a.free_mb,b.max_size_mb,a.free_mb + (b.max_size_mb - b.size_mb) AS max_free_mb
FROM (SELECT tablespace_name,TRUNC(SUM(bytes)/1024/1024) AS free_mb FROM dba_free_space GROUP BY tablespace_name) a,
(SELECT tablespace_name,TRUNC(SUM(bytes)/1024/1024) AS size_mb,TRUNC(SUM(GREATEST(bytes,maxbytes))/1024/1024) AS max_size_mb
FROM dba_data_files GROUP BY tablespace_name) b WHERE a.tablespace_name = b.tablespace_name
) where tablespace_name='{0}' order by 1'''.format(name)
self.cur.execute(sql)
res = self.cur.fetchall()
return res[0][1]
def tablespace_abs(self, name):
"""Get tablespace in use"""
sql = '''SELECT df.tablespace_name "TABLESPACE", (df.totalspace - \
tu.totalusedspace) "FREEMB" from (select tablespace_name, \
sum(bytes) TotalSpace from dba_data_files group by tablespace_name) \
df ,(select sum(bytes) totalusedspace,tablespace_name from dba_segments \
group by tablespace_name) tu WHERE tu.tablespace_name = \
df.tablespace_name and df.tablespace_name = '{0}' '''.format(name)
self.cur.execute(sql)
res = self.cur.fetchall()
return res[0][0]
def show_tablespaces(self):
"""List tablespace names in a JSON like format for Zabbix use"""
sql = "SELECT tablespace_name FROM dba_tablespaces ORDER BY 1"
self.cur.execute(sql)
res = self.cur.fetchall()
return [row[0] for row in res]
def show_tablespaces_temp(self):
"""List temporary tablespace names in a JSON like
format for Zabbix use"""
sql = "SELECT TABLESPACE_NAME FROM DBA_TABLESPACES WHERE \
CONTENTS='TEMPORARY'"
self.cur.execute(sql)
res = self.cur.fetchall()
return [row[0] for row in res]
def check_archive(self, archive):
"""List archive used"""
sql = "select trunc((total_mb-free_mb)*100/(total_mb)) PCT from \
v$asm_diskgroup_stat where name='{0}' \
ORDER BY 1".format(archive)
self.cur.execute(sql)
res = self.cur.fetchall()
return res[0][0]
def show_asm_volumes(self):
"""List als ASM volumes in a JSON like format for Zabbix use"""
sql = "select NAME from v$asm_diskgroup_stat ORDER BY 1"
self.cur.execute(sql)
res = self.cur.fetchall()
return [row[0] for row in res]
def asm_volume_use(self, name):
"""Get ASM volume usage"""
sql = "select round(((TOTAL_MB-FREE_MB)/TOTAL_MB*100),2) from \
v$asm_diskgroup_stat where name = '{0}'".format(name)
self.cur.execute(sql)
res = self.cur.fetchall()
return res[0][0]
def query_lock(self):
"""Query lock"""
sql = "SELECT count(*) FROM gv$lock l WHERE block=1"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def query_redologs(self):
"""Redo logs"""
sql = "select COUNT(*) from v$LOG WHERE STATUS='ACTIVE'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def query_rollbacks(self):
"""Query Rollback"""
sql = "select nvl(trunc(sum(used_ublk*4096)/1024/1024),0) from \
gv$transaction t,gv$session s where ses_addr = saddr"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def query_sessions(self):
"""Query Sessions"""
sql = "select count(*) from gv$session where username is not null \
and status='ACTIVE'"
self.cur.execute(sql)
res = self.cur.fetchall()
return int(res[0][0])
def tablespace_temp(self, name):
"""Query temporary tablespaces"""
sql = "SELECT round(((TABLESPACE_SIZE-FREE_SPACE)/TABLESPACE_SIZE)*100,2) \
PERCENTUAL FROM dba_temp_free_space where \
tablespace_name='{0}'".format(name)
self.cur.execute(sql)
res = self.cur.fetchall()
return res[0][0]
def query_sysmetrics(self, name):
"""Query v$sysmetric parameters"""
sql = "select value from v$sysmetric where METRIC_NAME ='{0}' and \
rownum <=1 order by INTSIZE_CSEC".format(name.replace('_', ' '))
self.cur.execute(sql)
res = self.cur.fetchall()
return res[0][0]
def fra_use(self):
"""Query the Fast Recovery Area usage"""
sql = "select round((SPACE_LIMIT-(SPACE_LIMIT-SPACE_USED))/ \
SPACE_LIMIT*100,2) FROM V$RECOVERY_FILE_DEST"
self.cur.execute(sql)
res = self.cur.fetchall()
return round(float(res[0][0]),2)
def show_users(self):
"""Query the list of users on the instance"""
sql = "SELECT username FROM dba_users ORDER BY 1"
self.cur.execute(sql)
res = self.cur.fetchall()
return [row[0] for row in res]
def user_status(self, dbuser):
"""Determines whether a user is locked or not"""
sql = "SELECT account_status FROM dba_users WHERE username='{0}'" \
.format(dbuser)
self.cur.execute(sql)
res = self.cur.fetchall()
return res[0][0]
if __name__ == "__main__":
dbconfig = {
'host': '127.0.0.1',
'port': 1521,
'user': 'monitor',
'passwd': '3IPSkSxDpiPUtlF',
'dbname': 'orcl11g',
}
odb = OracleTool(dbconfig)
print odb.version()
print odb.tablespace_temp('TEMP')
print odb.show_tablespaces_temp()
odb.db_close()
cluster_state
cluster_slots_assigned
cluster_slots_ok
cluster_slots_pfail
cluster_slots_fail
cluster_known_nodes
cluster_size
cluster_current_epoch
cluster_my_epoch
cluster_stats_messages_sent
cluster_stats_messages_received
cmdstat_get_calls
cmdstat_get_usec
cmdstat_get_usec_per_call
cmdstat_set_calls
cmdstat_set_usec
cmdstat_set_usec_per_call
cmdstat_setex_calls
cmdstat_setex_usec
cmdstat_setex_usec_per_call
cmdstat_del_calls
cmdstat_del_usec
cmdstat_del_usec_per_call
cmdstat_exists_calls
cmdstat_exists_usec
cmdstat_exists_usec_per_call
cmdstat_hset_calls
cmdstat_hset_usec
cmdstat_hset_usec_per_call
cmdstat_hgetall_calls
cmdstat_hgetall_usec
cmdstat_hgetall_usec_per_call
cmdstat_keys_calls
cmdstat_keys_usec
cmdstat_keys_usec_per_call
cmdstat_ping_calls
cmdstat_ping_usec
cmdstat_ping_usec_per_call
cmdstat_info_calls
cmdstat_info_usec
cmdstat_info_usec_per_call
cmdstat_ttl_calls
cmdstat_ttl_usec
cmdstat_ttl_usec_per_call
cmdstat_config_calls
cmdstat_config_usec
cmdstat_config_usec_per_call
cmdstat_cluster_calls
cmdstat_cluster_usec
cmdstat_cluster_usec_per_call
cmdstat_slowlog_calls
cmdstat_slowlog_usec
cmdstat_slowlog_usec_per_call
uptime_in_seconds
uptime_in_days
hz
lru_clock
connected_clients
client_longest_output_list
client_biggest_input_buf
blocked_clients
used_memory
used_memory_rss
used_memory_peak
total_system_memory
used_memory_lua
maxmemory
mem_fragmentation_ratio
loading
rdb_changes_since_last_save
rdb_bgsave_in_progress
rdb_last_save_time
rdb_last_bgsave_status
rdb_last_bgsave_time_sec
rdb_current_bgsave_time_sec
aof_enabled
aof_rewrite_in_progress
aof_rewrite_scheduled
aof_last_rewrite_time_sec
aof_current_rewrite_time_sec
aof_last_bgrewrite_status
aof_last_write_status
total_connections_received
total_commands_processed
instantaneous_ops_per_sec
total_net_input_bytes
total_net_output_bytes
instantaneous_input_kbps
instantaneous_output_kbps
rejected_connections
sync_full
sync_partial_ok
sync_partial_err
expired_keys
evicted_keys
keyspace_hits
keyspace_misses
pubsub_channels
pubsub_patterns
latest_fork_usec
migrate_cached_sockets
role
connected_slaves
master_repl_offset
repl_backlog_active
repl_backlog_size
repl_backlog_first_byte_offset
repl_backlog_histlen
used_cpu_sys
used_cpu_user
used_cpu_sys_children
used_cpu_user_children
cluster_enabled
slave_read_only
maxclients
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment