介绍
ProxySQL是一个高性能的MySQL中间件,我们今天拿它当作MySQL的读写分离器,我们把用户的写操作向master-slave集群的master中写入,而集群中其他的slave一起做负载均衡的读操作。
MHA在msyql高可用方面是一个相对成熟的解决方案,是一套优秀的作为mysql高可用性环境下故障切换和主从提升的高可用软件。在mysql故障切换过程中,MHA能做到在0~30秒内自动完成数据库的故障切换操作,并且在进行故障切换的过程中,MHA能在最大程度上保证数据的一致性,从而达到真正意义上的高可用。
MHA由两部分组成:manager(管理节点),node(数据节点)。manager可以单独部署在一台独立服务器上管理多个master-slave集群,也可以部署在一台slave节点上。node运行在每台mysql服务器上,manager会持续检测master节点,当master出现故障时,manager会自动将拥有最新数据的slave提升为新的master,之后将所有其他slave重新指向这个新的master。
在MHA自动故障切换过程中,manager会自动提升一台node为master,但是很可能此时这台node中没有同步到最新的数据,所以会产生数据不全,造成重大的损失。此时MHA会自动把所有node的二进制日志分析合并后放在某一台node上,并且把这台node提升为master,从而保证了故障切换后数据的完整性。
环境说明
4台linux虚拟机
Linux版本 CentOS7,mysql版本5.7
192.168.3.11(master)
192.168.3.105(slave1)
192.168.3.106(slave2)
192.168.3.107(manager)(proxysql)
我事先在11,105,106三个节点上部署好MySQL,用(11,105,106)三台做了master-slave主从复制集群,11是master主节点,105和106是slave从节点。此集群的安装过程不在重复,可以查看这里https://www.jianshu.com/p/552a83fc2abc
安装porxysql
配置proxysql(192.168.3.107)
[root@localhost ~]# yum -y install ./proxysql-1.3.6-1-centos7.x86_64.rpm
#安装proxysql
[root@localhost ~]# rpm -ql proxysql #显示安装包安装过的文件列表
/etc/init.d/proxysql #启动脚本
/etc/proxysql.cnf #配置文件
/usr/bin/proxysql #主程序
/usr/share/proxysql/tools/proxysql_galera_checker.sh #两个辅助脚本
/usr/share/proxysql/tools/proxysql_galera_writer.pl
[root@localhost ~]# cp /etc/proxysql.cnf{,.bak} #备份
配置proxysql
[root@localhost ~]# vim /etc/proxysql.cnf
datadir="/var/lib/proxysql"
admin_variables=
{
admin_credentials="admin:admin"
mysql_ifaces="127.0.0.1:6032;/tmp/proxysql_admin.sock"
# refresh_interval=2000
# debug=true
}
mysql_variables=
{
threads=4
max_connections=2048
default_query_delay=0
default_query_timeout=36000000
have_compress=true
poll_timeout=2000
interfaces="0.0.0.0:3306;/tmp/proxysql.sock"#修改mysql连接端口为3306
default_schema="mydb" #修改默认连接的数据库
stacksize=1048576
server_version="5.5.30"
connect_timeout_server=3000
monitor_history=600000
monitor_connect_interval=60000
monitor_ping_interval=10000
monitor_read_only_interval=1500
monitor_read_only_timeout=500
ping_interval_server=120000
ping_timeout_server=500
commands_stats=true
sessions_sort=true
connect_retries_on_failure=10
}
mysql_servers =
#mysql服务器,一组花括号对应一台,每台用逗号分隔,设置了1主两从三台
(
{
address = "192.168.3.11"
port = 3306
hostgroup = 0 # 组编号0,这组设置为写
status = "ONLINE" # 默认时在线状态
weight = 1 # 权重
compression = 0 # 压缩
max_connections=200 #最大连接数
},
{
address = "192.168.3.105"
port = 3306
hostgroup = 1 #组编号1,设置为读
status = "ONLINE"
weight = 1
compression = 0
max_connections=500
},
{
address = "192.168.3.106"
port = 3306
hostgroup = 1
status = "ONLINE"
weight = 1
compression = 0
max_connections=500
}
)
mysql_users:
(
{
username = "myadmin"
password = "mypass"
default_hostgroup = 0 # 属于哪个组
active = 1 # 处于激活状态
default_schema="mydb" #默认连接的数据库
}
)
mysql_replication_hostgroups=
(
{
writer_hostgroup=0 #0组为写组
reader_hostgroup=1 #1组为读组
comment="test repl 1"
}
)
#192.168.3.11(master)
mysql> GRANT ALL ON *.* TO 'myadmin'@'192.168.3.%' IDENTIFIED BY 'mypass';
#主服务器授权账号给读写分离服务器登录
mysql> FLUSH PRIVILEGES;
#192.168.3.107(proxysql)
[root@localhost ~]# systemctl start proxysql #启动porxysql
[root@localhost ~]# ss -tnl #查看端口,开启了4个3306
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 127.0.0.1:6032 *:*
LISTEN 0 128 *:22 *:*
LISTEN 0 100 127.0.0.1:25 *:*
LISTEN 0 128 *:3306 *:*
LISTEN 0 128 *:3306 *:*
LISTEN 0 128 *:3306 *:*
LISTEN 0 128 *:3306 *:*
[root@localhost ~]# mysql -h192.168.3.222 -umyadmin -pmypass
#登录测试
mysql> SHOW DATABASES;
+--------------------+
| Database |
+--------------------+
| information_schema |
| mydb |
| mydb1 |
| mylsh |
| mysql |
| performance_schema |
| sys |
+--------------------+
mysql> DROP DATABASE mydb1;#删除一个数据库
#此时去每一台mysql节点查看数据库,mydb1都被删除,读写分离成功
mysql> SHOW DATABASES;
+--------------------+
| Database |
+--------------------+
| information_schema |
| mydb |
| mylsh |
| mysql |
| performance_schema |
| sys |
+--------------------+
192.168.3.107(proxysql)
03:52:28.852969 IP 192.168.3.107.49634 > 192.168.3.106.3306: Flags [F.], seq 201, ack 167, win 229, options [nop,nop,TS val 1269184 ecr 23634899], length 0
03:52:28.853089 IP 192.168.3.105.3306 > 192.168.3.107.60780: Flags [.], ack 202, win 235, options [nop,nop,TS val 23638444 ecr 1269183], length 0
03:52:28.853331 IP 192.168.3.11.3306 > 192.168.3.107.39556: Flags [.], ack 201, win 235, options [nop,nop,TS val 23638521 ecr 1269183], length 0
03:52:28.853361 IP 192.168.3.106.3306 > 192.168.3.107.49634: Flags [.], ack 202, win 235, options [nop,nop,TS val 23634900 ecr 1269184], length 0
03:52:28.853666 IP 192.168.3.11.3306 > 192.168.3.107.39556: Flags [P.], seq 83:166, ack 201, win 235, options [nop,nop,TS val 23638522 ecr 1269183], length 83
03:52:28.853757 IP 192.168.3.11.3306 > 192.168.3.107.39556: Flags [F.], seq 166, ack 201, win 235, options [nop,nop,TS val 23638522 ecr 1269183], length 0
03:52:28.855227 IP 192.168.3.107.39556 > 192.168.3.11.3306: Flags [F.], seq 201, ack 167, win 229, options [nop,nop,TS val 1269186 ecr 23638522], length 0
03:52:28.856037 IP 192.168.3.11.3306 > 192.168.3.107.39556: Flags [.], ack 202, win 235, options [nop,nop,TS val 23638524 ecr 1269186], length 0
#启动tcpdump查看3306端口,不停的有11,105,106,107的数据包
[root@localhost ~]# mysql -uadmin -padmin -hlocalhost -S /tmp/proxysql_admin.sock
#在proxysql上使用此命令登录管理proxysql
mysql> SHOW DATABASES; #查看管理接口
+-----+---------+-------------------------------+
| seq | name | file |
+-----+---------+-------------------------------+
| 0 | main | |
| 2 | disk | /var/lib/proxysql/proxysql.db |
| 3 | stats | |
| 4 | monitor | |
+-----+---------+-------------------------------+
mysql> SELECT * FROM mysql_servers\G
*************************** 1. row ***************************
hostgroup_id: 0
hostname: 192.168.3.11
port: 3306
status: ONLINE
weight: 1
compression: 0
max_connections: 200
max_replication_lag: 0
use_ssl: 0
max_latency_ms: 0
comment:
*************************** 2. row ***************************
hostgroup_id: 1
hostname: 192.168.3.105
port: 3306
status: ONLINE
weight: 1
compression: 0
max_connections: 500
max_replication_lag: 0
use_ssl: 0
max_latency_ms: 0
comment:
*************************** 3. row ***************************
hostgroup_id: 1
hostname: 192.168.3.106
port: 3306
status: ONLINE
weight: 1
compression: 0
max_connections: 500
max_replication_lag: 0
use_ssl: 0
max_latency_ms: 0
comment:
3 rows in set (0.00 sec)
#这里有1主2从,如果要添加加服务器,直接插入一行数据
mysql> SELECT * FROM runtime_mysql_replication_hostgroups;
+------------------+------------------+-------------+
| writer_hostgroup | reader_hostgroup | comment |
+------------------+------------------+-------------+
| 0 | 1 | test repl 1 |
+------------------+------------------+-------------+
#查看写组为0,读组为1,至此porxysql读写分离成功配置完成。
MHA+ProxySQL实现读写分离高可用
#配置master
~]# vim /usr/local/mysql/etc/my.cnf.d/server.cnf
[mysqld]
innodb_file_per_table=ON
skip_name_resolve=ON
server_id=1
log-bin=master-log
relay_log=relay-bin
#配置slave1
vim /usr/local/mysql/etc/my.cnf.d/server.cnf
[mysqld]
innodb_file_per_table=ON
skip_name_resolve=ON
server_id=11 #节点ID
log-bin=master-bin #二进制日志
relay_log=relay-bin #中继日志
relay_log_purge=0 #设为0为关闭中继日志自动清除功能
read_only=1 #节点设为只读模式
#配置slave2
vim /usr/local/mysql/etc/my.cnf.d/server.cnf
[mysqld]
innodb_file_per_table=ON
skip_name_resolve=ON
server_id=12
log-bin=master-log
relay_log=relay-log
relay_log_purge=0
read_only=1
配置ssh通信
MHA 集群中的各节点彼此之间均需要基于 ssh 互信通信,以实现远程控制及数据管理功能。
#manager(192.168.3.107)
~]# ssh-keygen -t rsa -P ''
#生成ssh密钥,每个节点都要运行一遍以建立目录
~]# ssh-copy-id -i .ssh/id_rsa.pub root@192.168.3.107
#复制给本机
~]# ssh root@192.168.3.107 'ifconfig'
#测试ssh连接本机执行命令
[root@localhost ~]# scp -p .ssh/authorized_keys .ssh/id_rsa{,.pub} root@192.168.3.11:/root/.ssh/
[root@localhost ~]# ssh root@192.168.3.11 'ifconfig'
#复制到192.168.3.11并且测试命令
[root@localhost ~]# scp -p .ssh/authorized_keys .ssh/id_rsa{,.pub} root@192.168.3.105:/root/.ssh/
[root@localhost ~]# ssh root@192.168.3.105 'ifconfig'
#复制到192.168.3.105并且测试命令
[root@localhost ~]# scp -p .ssh/authorized_keys .ssh/id_rsa{,.pub} root@192.168.3.106:/root/.ssh/
[root@localhost ~]# ssh root@192.168.3.106 'ifconfig'
#复制到192.168.3.106并且测试命令
#每个节点都连接其他节点测试一遍
安装MHA
#manager(192.168.3.107)节点
#下载两个MHA文件
1. mha4mysql-manager-0.56-0.el6.noarch.rpm
2. mha4mysql-node-0.56-0.el6.noarch.rpm
~]# yum -y install epel-release #安装epel源
~]# yum repolist #更新仓库
~]# yum -y install ./mha4mysql-*.rpm #在manager安装MHA的manager和node
~]# scp mha4mysql-node-0.56-0.el6.noarch.rpm 192.168.3.11:/root
~]# scp mha4mysql-node-0.56-0.el6.noarch.rpm 192.168.3.105:/root
~]# scp mha4mysql-node-0.56-0.el6.noarch.rpm 192.168.3.106:/root
#把MHA的node拷贝到另外三台mysql节点
#三台mysql节点
~]# yum -y install ./mha4mysql-node-0.56-0.el6.noarch.rpm
#在mysql节点上都安装MHA的node包
#master节点
mysql> GRANT ALL ON *.* TO 'mhaadmin'@'192.168.3.%' IDENTIFIED BY 'mhapass';
#主节点授权用户
mysql> FLUSH PRIVILEGES;
配置MHA
#manager节点
[root@localhost ~]# mkdir /etc/masterha
[root@localhost ~]# vim /etc/masterha/app1.cnf
[server default]
user=mhaadmin
password=mhapass
manager_workdir=/data/masterha/app1
manager_log=/data/masterha/app1/manager.log
remote_workdir=/data/masterha/app1
master_binlog_dir=/data/mysql
ssh_user=root
repl_user=repluser
repl_password=replpass
ping_interval=1
[serverl]
hostname=192.168.3.11
candidate_master=1
[server2]
hostname=192.168.3.105
candidate_master=1
[server3]
hostname=192.168.3.106
candidate_master=1
启动MHA
~]# masterha_check_ssh --conf=/etc/masterha/app1.cnf
#检查各路通讯是否OK
~]# masterha_check_repl --conf=/etc/masterha/app1.cnf
................
MySQL Replication Health is OK.
#检测主从节点,最后出现OK,所有都正常。
[root@localhost ~]# nohup masterha_manager --conf=/etc/masterha/app1.cnf &> /data/masterha/app1/manager.log &
#在后台启动nohup
[root@localhost ~]# ps aux | grep masterha
root 1551 0.2 2.1 297140 21624 pts/0 S 23:29 0:00 perl /usr/bin/masterha_manager --conf=/etc/masterha/app1.cnf
#查看后台进程,看到masterha成功在后台启动
[root@localhost ~]# masterha_check_status --conf=/etc/masterha/app1.cnf
app1 (pid:1551) is running(0:PING_OK), master:192.168.3.11
#检查master节点的状态,运行中,成功启动。
#可以使用masterha_stop --conf=/etc/masterha/app1.cnf来停止MHA服务
测试MHA的故障转移
#192.168.3.11(master)
[root@node4 ~]# killall -9 mysqld mysqld_safe
#主节点master中杀死mysql进程
#192.168.3.107(manager) #在mha节点上查看日志
[root@localhost ~]# less /data/masterha/app1/manager.log
----- Failover Report -----
app1: MySQL Master failover 192.168.3.11(192.168.3.11:3306) to 192.168.3.105(192.168.3.105:3306) succeeded
Master 192.168.3.11(192.168.3.11:3306) is down!
Check MHA Manager logs at localhost.localdomain:/data/masterha/app1/manager.log for details.
Started automated(non-interactive) failover.
The latest slave 192.168.3.105(192.168.3.105:3306) has all relay logs for recovery.
Selected 192.168.3.105(192.168.3.105:3306) as a new master.
192.168.3.105(192.168.3.105:3306): OK: Applying all logs succeeded.
192.168.3.106(192.168.3.106:3306): This host has the latest relay log events.
Generating relay diff files from the latest slave succeeded.
192.168.3.106(192.168.3.106:3306): OK: Applying all logs succeeded. Slave started, replicating from 192.168.3.105(192.168.3.105:
3306)
192.168.3.105(192.168.3.105:3306): Resetting slave info succeeded.
Master failover to 192.168.3.105(192.168.3.105:3306) completed successfully.
##查看日志,其中说Master 192.168.3.11 is down, slave 192.168.3.105合并了所有日志,切换成主服务器
#192.168.3.105(这个节点此时提升为master)
mysql> show databases;
+--------------------+
| Database |
+--------------------+
| information_schema |
| AAA |
| mydb |
| mysql |
| performance_schema |
| sys |
+--------------------+
mysql> DROP DATABASES AAA;
#删除其中一个数据库,然后去查看slave2从节点是否同步删除
#192.168.3.106(slave2)
mysql> SHOW DATABASES;
+--------------------+
| Database |
+--------------------+
| information_schema |
| mydb |
| mysql |
| performance_schema |
| sys |
+--------------------+
#另一个slave2从节点,查看数据库,数据成功同步删除
#至此,MHA+ProxySQL高可用架构成功配置完成。
把192.168.3.11修复好,再添加进集群
#在192.168.3.105操作,此节点此时为master
mysql> SHOW MASTER STATUS;
+-------------------+----------+--------------+------------------+-------------------+
| File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set |
+-------------------+----------+--------------+------------------+-------------------+
| master-log.000001 | 304 | | | |
+-------------------+----------+--------------+------------------+-------------------+
#查看二进制日志记录到什么位置
~]# mysqldump -x -R -E --triggers --master-data=2 --all-databases > alldb.sql
#全盘备份
~]# scp alldb.sql 192.168.10.11:/root/
#备份拷贝到192.168.3.11节点
#操作192.168.3.11节点
~]# vim /usr/local/mysql/etc/my.cnf.d/server.cnf
relay_log_purge=0
read_only=1
#此节点部署成slave,环境配置添加上面两句
~]# rm -rf /data/mysql/*
#清空数据
~]# systemctl start mysql
~]# mysql < alldb.sql
~]# head -30 alldb.sql
-- CHANGE MASTER TO MASTER_LOG_FILE='master-log.000001', MASTER_LOG_POS=304;
mysql> CHANGE MASTER TO MASTER_HOST='192.168.10.12',MASTER_USER='repluser',MASTER_PASSWORD='replpass',MASTER_LOG_FILE='master-log.000001',MASTER_LOG_POS=304;
mysql> FLUSH PRIVILEGES;
mysql> SHOW SLAVE STATUS\G
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: 192.168.10.12
Master_User: repluser
Master_Port: 3306
Connect_Retry: 60
Master_Log_File: master-log.000001
Read_Master_Log_Pos: 304
Relay_Log_File: relay-log.000002
Relay_Log_Pos: 321
Relay_Master_Log_File: master-log.000001
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
............
#192.168.3.11成功部署为集群中的slave节点
#操作192.168.3.107(manager)
~]# masterha_check_repl --conf=/etc/masterha/app1.cnf
...................
MySQL Replication Health is OK.
#检测主从节点,一切正常
~]# nohup masterha_manager --conf=/etc/masterha/app1.cnf &> /data/masterha/app1/manager.log &
[1] 7459
#在后台启动MHA
~]# ps aux | grep masterha
root 7459 0.3 4.4 297140 21616 pts/0 S 04:16 0:00 perl /usr/bin/masterha_manager --conf=/etc/masterha/app1.cnf
#查看后台进程,MHA成功在后台启动
~]# exit
#手动输入命令退出伪终端,如直接关闭会使MHA服务中止
MHA安装中遇到的坑
[root@localhost ~]# masterha_check_repl --conf=/etc/masterha/app1.cnf
Tue May 1 04:44:12 2018 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Tue May 1 04:44:12 2018 - [info] Reading application default configuration from /etc/masterha/app1.cnf..
Tue May 1 04:44:12 2018 - [info] Reading server configuration from /etc/masterha/app1.cnf..
Tue May 1 04:44:12 2018 - [info] MHA::MasterMonitor version 0.56.
Tue May 1 04:44:13 2018 - [info] GTID failover mode = 0
Tue May 1 04:44:13 2018 - [info] Dead Servers:
Tue May 1 04:44:13 2018 - [info] Alive Servers:
Tue May 1 04:44:13 2018 - [info] 192.168.152.50(192.168.152.50:3306)
Tue May 1 04:44:13 2018 - [info] 192.168.152.60(192.168.152.60:3306)
Tue May 1 04:44:13 2018 - [info] 192.168.152.40(192.168.152.40:3306)
Tue May 1 04:44:13 2018 - [info] Alive Slaves:
Tue May 1 04:44:13 2018 - [info] 192.168.152.50(192.168.152.50:3306) Version=5.7.18-log (oldest major version between slaves) log-bin:enabled
Tue May 1 04:44:13 2018 - [info] Replicating from 192.168.152.40(192.168.152.40:3306)
Tue May 1 04:44:13 2018 - [info] Primary candidate for the new Master (candidate_master is set)
Tue May 1 04:44:13 2018 - [info] 192.168.152.60(192.168.152.60:3306) Version=5.7.18-log (oldest major version between slaves) log-bin:enabled
Tue May 1 04:44:13 2018 - [info] Replicating from 192.168.152.40(192.168.152.40:3306)
Tue May 1 04:44:13 2018 - [info] Primary candidate for the new Master (candidate_master is set)
Tue May 1 04:44:13 2018 - [info] Current Alive Master: 192.168.152.40(192.168.152.40:3306)
Tue May 1 04:44:13 2018 - [info] Checking slave configurations..
Tue May 1 04:44:13 2018 - [warning] relay_log_purge=0 is not set on slave 192.168.152.60(192.168.152.60:3306).
Tue May 1 04:44:13 2018 - [info] Checking replication filtering settings..
Tue May 1 04:44:13 2018 - [info] binlog_do_db= , binlog_ignore_db=
Tue May 1 04:44:13 2018 - [info] Replication filtering check ok.
Tue May 1 04:44:13 2018 - [info] GTID (with auto-pos) is not supported
Tue May 1 04:44:13 2018 - [info] Starting SSH connection tests..
Tue May 1 04:44:16 2018 - [info] All SSH connection tests passed successfully.
Tue May 1 04:44:16 2018 - [info] Checking MHA Node version..
Tue May 1 04:44:17 2018 - [info] Version check ok.
Tue May 1 04:44:17 2018 - [info] Checking SSH publickey authentication settings on the current master..
Tue May 1 04:44:17 2018 - [info] HealthCheck: SSH to 192.168.152.40 is reachable.
Tue May 1 04:44:18 2018 - [info] Master MHA Node version is 0.56.
Tue May 1 04:44:18 2018 - [info] Checking recovery script configurations on 192.168.152.40(192.168.152.40:3306)..
Tue May 1 04:44:18 2018 - [info] Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/var/lib/mysql,/var/log/mysql --output_file=/data/masterha/app1/save_binary_logs_test --manager_version=0.56 --start_file=master-log.000007
Tue May 1 04:44:18 2018 - [info] Connecting to root@192.168.152.40(192.168.152.40:22)..
Failed to save binary log: Binlog not found from /var/lib/mysql,/var/log/mysql! If you got this error at MHA Manager, please set "master_binlog_dir=/path/to/binlog_directory_of_the_master" correctly in the MHA Manager's configuration file and try again.
at /usr/bin/save_binary_logs line 123.
eval {...} called at /usr/bin/save_binary_logs line 70
main::main() called at /usr/bin/save_binary_logs line 66
Tue May 1 04:44:18 2018 - [error][/usr/share/perl5/vendor_perl/MHA/MasterMonitor.pm, ln158] Binlog setting check failed!
Tue May 1 04:44:18 2018 - [error][/usr/share/perl5/vendor_perl/MHA/MasterMonitor.pm, ln405] Master configuration failed.
Tue May 1 04:44:18 2018 - [error][/usr/share/perl5/vendor_perl/MHA/MasterMonitor.pm, ln424] Error happened on checking configurations. at /usr/bin/masterha_check_repl line 48.
Tue May 1 04:44:18 2018 - [error][/usr/share/perl5/vendor_perl/MHA/MasterMonitor.pm, ln523] Error happened on monitoring servers.
Tue May 1 04:44:18 2018 - [info] Got exit code 1 (Not master dead).
MySQL Replication Health is NOT OK!
[root@localhost ~]# vim /etc/masterha/app1.cnf
master_binlog_dir=/data/mysql #添加这一行,指定二进制目录解决问题
网友评论