美文网首页自动化监控Zabbix
Zabbix如何自定义脚本监控RAC11g

Zabbix如何自定义脚本监控RAC11g

作者: 阿乐_822e | 来源:发表于2019-08-27 16:30 被阅读0次

    Zabbix上Rac的模板较少,必须得自己实现。大致思路是:每个node上分别监控CPU、Memory、Disk等指标,再创建一个SCAN_IP主机,主要用于监控RAC相关的指标。

    提示:本文假设数据库版本为oracle11g,2个node,且所有机器的zabbix agent都安装在/usr/local/zabbix-agent目录下。
    前期准备:在每个节点上,把zabbix加入到oracle的用户组oinstall,命令为:useradd -G oinstall Zabbix,随后查看一下:


    1.png

    下面开始安装:
    1、 分别把四个sh文件拷到每个节点的/usr/local/zabbix-agent/scripts/目录下并加上执行权限,再逐一检查文件中的PATH路径是否正确,如不正确要修改好;

    1. SCAN_IP归属地:chk_scanip.sh文件
    #!/bin/bash
    export PATH=$PATH:/u01/app/11.2.0/grid/bin
    #检查SCAN_IP归属地
    scan_master_node=`srvctl status scan | grep "on" | awk '{print $8}'`
    if [ -z $scan_master_node ] ; then
        $scan_master_node="missed"
    fi
    echo $scan_master_node
    
    1. 检查ASM状态:chk_asmstatus.sh
    #ASM状态
    #!/bin/bash
    export PATH=$PATH:/u01/app/11.2.0/grid/bin
    srvctl status asm | awk '{print $5}'
    
    1. 检查在用实例:chk_instancestatus.sh
    #在用实例
    #!/bin/bash
    export PATH=$PATH:/u01/app/11.2.0/grid/bin
    dbName=$1
    srvctl status database -d $dbName | tr "\n" "\t" | awk '{print $7,",",$14}'
    
    1. 检查查节点状态:chk_nodestatus.sh
    #检查节点状态
    #!/bin/bash
    export PATH=$PATH:/u01/app/11.2.0/grid/bin
    node=$1
    olsnodes -s | grep $node | awk '{print $2}'
    

    2、分别修改每个节点的zabbix_cofigd.cong文件,添加自定义项,再重启zgent;

    UserParameter=rac.cluster.chkscanip[*],/usr/local/zabbix-agent/scripts/chk_scanip.sh
    UserParameter=rac.cluster.chknodestatus[*],/usr/local/zabbix-agent/scripts/chk_nodestatus.sh $1
    UserParameter=rac.cluster.chkinstancestatus[*],/usr/local/zabbix-agent/scripts/chk_instancestatus.sh $1
    UserParameter=rac.cluster.chkasmstatus[*],/usr/local/zabbix-agent/scripts/chk_asmstatus.sh
    

    3、在zbx监控的网页上,创建一台RAC的SCAN_IP地址的机器,再在此机器上创建三个宏变量

    {$DB_NAME} :填入srvctl config database命令的结果
    {$NODE1_NAME}:填入第一个节点的hostname
    {$NODE2_NAME}:填入第二个节点的hostname
    
    2.png

    4、将附件中文件存为导入RAC_Cluster_Check_11g.xml模板文件,先导入zabbix,再应用到步骤3的机器上

    5、最后的图形显示如下:


    3.png

    附件:模板文件RAC_Cluster_Check_11g.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <zabbix_export>
        <version>3.4</version>
        <date>2019-08-27T07:50:37Z</date>
        <groups>
            <group>
                <name>Templates</name>
            </group>
        </groups>
        <templates>
            <template>
                <template>RAC_Cluster_Check_11g</template>
                <name>RAC_Cluster_Check_11g</name>
                <description/>
                <groups>
                    <group>
                        <name>Templates</name>
                    </group>
                </groups>
                <applications>
                    <application>
                        <name>RAC_Cluster_Check</name>
                    </application>
                </applications>
                <items>
                    <item>
                        <name>检查ASM状态</name>
                        <type>0</type>
                        <snmp_community/>
                        <snmp_oid/>
                        <key>rac.cluster.chkasmstatus</key>
                        <delay>1m</delay>
                        <history>90d</history>
                        <trends>0</trends>
                        <status>0</status>
                        <value_type>1</value_type>
                        <allowed_hosts/>
                        <units/>
                        <snmpv3_contextname/>
                        <snmpv3_securityname/>
                        <snmpv3_securitylevel>0</snmpv3_securitylevel>
                        <snmpv3_authprotocol>0</snmpv3_authprotocol>
                        <snmpv3_authpassphrase/>
                        <snmpv3_privprotocol>0</snmpv3_privprotocol>
                        <snmpv3_privpassphrase/>
                        <params/>
                        <ipmi_sensor/>
                        <authtype>0</authtype>
                        <username/>
                        <password/>
                        <publickey/>
                        <privatekey/>
                        <port/>
                        <description/>
                        <inventory_link>0</inventory_link>
                        <applications>
                            <application>
                                <name>RAC_Cluster_Check</name>
                            </application>
                        </applications>
                        <valuemap/>
                        <logtimefmt/>
                        <preprocessing/>
                        <jmx_endpoint/>
                        <master_item/>
                    </item>
                    <item>
                        <name>检查在线实例</name>
                        <type>0</type>
                        <snmp_community/>
                        <snmp_oid/>
                        <key>rac.cluster.chkinstancestatus[{$DB_NAME}]</key>
                        <delay>1m</delay>
                        <history>90d</history>
                        <trends>0</trends>
                        <status>0</status>
                        <value_type>1</value_type>
                        <allowed_hosts/>
                        <units/>
                        <snmpv3_contextname/>
                        <snmpv3_securityname/>
                        <snmpv3_securitylevel>0</snmpv3_securitylevel>
                        <snmpv3_authprotocol>0</snmpv3_authprotocol>
                        <snmpv3_authpassphrase/>
                        <snmpv3_privprotocol>0</snmpv3_privprotocol>
                        <snmpv3_privpassphrase/>
                        <params/>
                        <ipmi_sensor/>
                        <authtype>0</authtype>
                        <username/>
                        <password/>
                        <publickey/>
                        <privatekey/>
                        <port/>
                        <description/>
                        <inventory_link>0</inventory_link>
                        <applications>
                            <application>
                                <name>RAC_Cluster_Check</name>
                            </application>
                        </applications>
                        <valuemap/>
                        <logtimefmt/>
                        <preprocessing/>
                        <jmx_endpoint/>
                        <master_item/>
                    </item>
                    <item>
                        <name>检查节点一状态</name>
                        <type>0</type>
                        <snmp_community/>
                        <snmp_oid/>
                        <key>rac.cluster.chknodestatus[{$NODE1_NAME}]</key>
                        <delay>1m</delay>
                        <history>90d</history>
                        <trends>0</trends>
                        <status>0</status>
                        <value_type>1</value_type>
                        <allowed_hosts/>
                        <units/>
                        <snmpv3_contextname/>
                        <snmpv3_securityname/>
                        <snmpv3_securitylevel>0</snmpv3_securitylevel>
                        <snmpv3_authprotocol>0</snmpv3_authprotocol>
                        <snmpv3_authpassphrase/>
                        <snmpv3_privprotocol>0</snmpv3_privprotocol>
                        <snmpv3_privpassphrase/>
                        <params/>
                        <ipmi_sensor/>
                        <authtype>0</authtype>
                        <username/>
                        <password/>
                        <publickey/>
                        <privatekey/>
                        <port/>
                        <description/>
                        <inventory_link>0</inventory_link>
                        <applications>
                            <application>
                                <name>RAC_Cluster_Check</name>
                            </application>
                        </applications>
                        <valuemap/>
                        <logtimefmt/>
                        <preprocessing/>
                        <jmx_endpoint/>
                        <master_item/>
                    </item>
                    <item>
                        <name>检查节点二状态</name>
                        <type>0</type>
                        <snmp_community/>
                        <snmp_oid/>
                        <key>rac.cluster.chknodestatus[{$NODE2_NAME}]</key>
                        <delay>1m</delay>
                        <history>90d</history>
                        <trends>0</trends>
                        <status>0</status>
                        <value_type>1</value_type>
                        <allowed_hosts/>
                        <units/>
                        <snmpv3_contextname/>
                        <snmpv3_securityname/>
                        <snmpv3_securitylevel>0</snmpv3_securitylevel>
                        <snmpv3_authprotocol>0</snmpv3_authprotocol>
                        <snmpv3_authpassphrase/>
                        <snmpv3_privprotocol>0</snmpv3_privprotocol>
                        <snmpv3_privpassphrase/>
                        <params/>
                        <ipmi_sensor/>
                        <authtype>0</authtype>
                        <username/>
                        <password/>
                        <publickey/>
                        <privatekey/>
                        <port/>
                        <description/>
                        <inventory_link>0</inventory_link>
                        <applications>
                            <application>
                                <name>RAC_Cluster_Check</name>
                            </application>
                        </applications>
                        <valuemap/>
                        <logtimefmt/>
                        <preprocessing/>
                        <jmx_endpoint/>
                        <master_item/>
                    </item>
                    <item>
                        <name>检查scanip归属机器</name>
                        <type>0</type>
                        <snmp_community/>
                        <snmp_oid/>
                        <key>rac.cluster.chkscanip</key>
                        <delay>1m</delay>
                        <history>90d</history>
                        <trends>0</trends>
                        <status>0</status>
                        <value_type>1</value_type>
                        <allowed_hosts/>
                        <units/>
                        <snmpv3_contextname/>
                        <snmpv3_securityname/>
                        <snmpv3_securitylevel>0</snmpv3_securitylevel>
                        <snmpv3_authprotocol>0</snmpv3_authprotocol>
                        <snmpv3_authpassphrase/>
                        <snmpv3_privprotocol>0</snmpv3_privprotocol>
                        <snmpv3_privpassphrase/>
                        <params/>
                        <ipmi_sensor/>
                        <authtype>0</authtype>
                        <username/>
                        <password/>
                        <publickey/>
                        <privatekey/>
                        <port/>
                        <description/>
                        <inventory_link>0</inventory_link>
                        <applications>
                            <application>
                                <name>RAC_Cluster_Check</name>
                            </application>
                        </applications>
                        <valuemap/>
                        <logtimefmt/>
                        <preprocessing/>
                        <jmx_endpoint/>
                        <master_item/>
                    </item>
                </items>
                <discovery_rules/>
                <httptests/>
                <macros/>
                <templates/>
                <screens/>
            </template>
        </templates>
        <triggers>
            <trigger>
                <expression>{RAC_Cluster_Check_11g:rac.cluster.chkscanip.change()}=1 and {RAC_Cluster_Check_11g:rac.cluster.chkscanip.str(missed)}=0</expression>
                <recovery_mode>0</recovery_mode>
                <recovery_expression/>
                <name>SCANIP发生漂移</name>
                <correlation_mode>0</correlation_mode>
                <correlation_tag/>
                <url/>
                <status>0</status>
                <priority>2</priority>
                <description/>
                <type>0</type>
                <manual_close>0</manual_close>
                <dependencies/>
                <tags/>
            </trigger>
            <trigger>
                <expression>{RAC_Cluster_Check_11g:rac.cluster.chkscanip.str(missed)}=1</expression>
                <recovery_mode>0</recovery_mode>
                <recovery_expression/>
                <name>SCAN_IP可能丢失</name>
                <correlation_mode>0</correlation_mode>
                <correlation_tag/>
                <url/>
                <status>0</status>
                <priority>4</priority>
                <description/>
                <type>0</type>
                <manual_close>0</manual_close>
                <dependencies/>
                <tags/>
            </trigger>
            <trigger>
                <expression>{RAC_Cluster_Check_11g:rac.cluster.chkasmstatus.str({$NODE1_NAME})}=0</expression>
                <recovery_mode>0</recovery_mode>
                <recovery_expression/>
                <name>{$NODE1_NAME} 上的asm未运行</name>
                <correlation_mode>0</correlation_mode>
                <correlation_tag/>
                <url/>
                <status>0</status>
                <priority>2</priority>
                <description/>
                <type>0</type>
                <manual_close>0</manual_close>
                <dependencies/>
                <tags/>
            </trigger>
            <trigger>
                <expression>{RAC_Cluster_Check_11g:rac.cluster.chkinstancestatus[{$DB_NAME}].str({$NODE1_NAME})}=0</expression>
                <recovery_mode>0</recovery_mode>
                <recovery_expression/>
                <name>{$NODE1_NAME} 上的实例未运行</name>
                <correlation_mode>0</correlation_mode>
                <correlation_tag/>
                <url/>
                <status>0</status>
                <priority>2</priority>
                <description/>
                <type>0</type>
                <manual_close>0</manual_close>
                <dependencies/>
                <tags/>
            </trigger>
            <trigger>
                <expression>{RAC_Cluster_Check_11g:rac.cluster.chkasmstatus.str({$NODE2_NAME})}=0</expression>
                <recovery_mode>0</recovery_mode>
                <recovery_expression/>
                <name>{$NODE2_NAME} 上的asm未运行</name>
                <correlation_mode>0</correlation_mode>
                <correlation_tag/>
                <url/>
                <status>0</status>
                <priority>2</priority>
                <description/>
                <type>0</type>
                <manual_close>0</manual_close>
                <dependencies/>
                <tags/>
            </trigger>
            <trigger>
                <expression>{RAC_Cluster_Check_11g:rac.cluster.chkinstancestatus[{$DB_NAME}].str({$NODE2_NAME})}=0</expression>
                <recovery_mode>0</recovery_mode>
                <recovery_expression/>
                <name>{$NODE2_NAME} 上的实例未运行</name>
                <correlation_mode>0</correlation_mode>
                <correlation_tag/>
                <url/>
                <status>0</status>
                <priority>2</priority>
                <description/>
                <type>0</type>
                <manual_close>0</manual_close>
                <dependencies/>
                <tags/>
            </trigger>
            <trigger>
                <expression>{RAC_Cluster_Check_11g:rac.cluster.chknodestatus[{$NODE1_NAME}].str(Active)}=0</expression>
                <recovery_mode>0</recovery_mode>
                <recovery_expression/>
                <name>节点:{$NODE1_NAME} 下线</name>
                <correlation_mode>0</correlation_mode>
                <correlation_tag/>
                <url/>
                <status>0</status>
                <priority>3</priority>
                <description/>
                <type>0</type>
                <manual_close>0</manual_close>
                <dependencies/>
                <tags/>
            </trigger>
            <trigger>
                <expression>{RAC_Cluster_Check_11g:rac.cluster.chknodestatus[{$NODE2_NAME}].str(Active)}=0</expression>
                <recovery_mode>0</recovery_mode>
                <recovery_expression/>
                <name>节点:{$NODE2_NAME}下线</name>
                <correlation_mode>0</correlation_mode>
                <correlation_tag/>
                <url/>
                <status>0</status>
                <priority>3</priority>
                <description/>
                <type>0</type>
                <manual_close>0</manual_close>
                <dependencies/>
                <tags/>
            </trigger>
        </triggers>
    </zabbix_export>
    
    

    相关文章

      网友评论

        本文标题:Zabbix如何自定义脚本监控RAC11g

        本文链接:https://www.haomeiwen.com/subject/raieectx.html