#cat /etc/masterha/conf/cls_new.cnf [server default] #workdir on the management server manager_workdir=/masterha/cls_new/ manager_log=/masterha/cls_new/manager.log
#workdir on the node for mysql server master_binlog_dir=/data/mysql_3358/data/
Fri Oct 9 11:54:48 2020 - [info] Checking master_ip_failover_script status: Fri Oct 9 11:54:48 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=status --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 Fri Oct 9 11:54:48 2020 - [info] OK. Fri Oct 9 11:54:48 2020 - [warning] shutdown_script is not defined. Fri Oct 9 11:54:48 2020 - [info] Setmaster ping interval3 seconds. Fri Oct911:54:482020 - [info] Set secondary check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 Fri Oct911:54:482020 - [info] Starting ping health checkon172.16.120.10(172.16.120.10:3358).. Fri Oct911:54:482020 - [info] Ping(INSERT) succeeded, waiting until MySQL doesn't respond.. Fri Oct 9 11:56:42 2020 - [warning] Got error on MySQL insert ping: 2006 (MySQL server has gone away) Fri Oct 9 11:56:42 2020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 --user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=INSERT Fri Oct 9 11:56:42 2020 - [info] Executing SSH check script: exit 0 Fri Oct 9 11:56:43 2020 - [info] HealthCheck: SSH to 172.16.120.10 is reachable. ERROR 1040 (HY000): Too many connections Monitoring server 172.16.120.11 is reachable, Master is not writable from 172.16.120.11. OK. ERROR 1040 (HY000): Too many connections Monitoring server 172.16.120.12 is reachable, Master is not writable from 172.16.120.12. OK. Fri Oct 9 11:56:43 2020 - [info] Master is not reachable from all other monitoring servers. Failover should start. Fri Oct 9 11:56:45 2020 - [warning] Got error on MySQL connect: 1040 (Too many connections) Fri Oct 9 11:56:45 2020 - [info] Got MySQL error 1040, but this is not a MySQL crash. Continue health check.. Fri Oct 9 11:56:48 2020 - [warning] Got error on MySQL connect: 1040 (Too many connections) Fri Oct 9 11:56:48 2020 - [info] Got MySQL error 1040, but this is not a MySQL crash. Continue health check.. Fri Oct 9 11:56:51 2020 - [warning] Got error on MySQL connect: 1040 (Too many connections) Fri Oct 9 11:56:51 2020 - [info] Got MySQL error 1040, but this is not a MySQL crash. Continue health check.. Fri Oct 9 11:56:54 2020 - [warning] Got error on MySQL connect: 1040 (Too many connections) Fri Oct 9 11:56:54 2020 - [info] Got MySQL error 1040, but this is not a MySQL crash. Continue health check.. Fri Oct 9 11:56:57 2020 - [warning] Got error on MySQL connect: 1040 (Too many connections) Fri Oct 9 11:56:57 2020 - [info] Got MySQL error 1040, but this is not a MySQL crash. Continue health check.. Fri Oct 9 11:57:00 2020 - [warning] Got error on MySQL connect: 1040 (Too many connections)
our @ALIVE_ERROR_CODES = ( 1040, # ER_CON_COUNT_ERROR -- too many connection 1042, # ER_BAD_HOST_ERROR -- Can't get hostname for your address 1043, # ER_HANDSHAKE_ERROR -- Bad handshake 1044, # ER_DBACCESS_DENIED_ERROR -- Access denied for user '%s'@'%s' to database '%s' 1045, # ER_ACCESS_DENIED_ERROR -- Access denied for user '%s'@'%s' (using password: %s) 1129, # ER_HOST_IS_BLOCKED -- Host '%s' is blocked because of many connection errors; unblock with 'mysqladmin flush-hosts' 1130, # ER_HOST_NOT_PRIVILEGED -- Host '%s' is not allowed to connect to this MySQL server 1203, # ER_TOO_MANY_USER_CONNECTIONS -- User %s already has more than 'max_user_connections' active connections 1226, # ER_USER_LIMIT_REACHED -- User '%s' has exceeded the '%s' resource (current value: %ld) 1251, # ER_NOT_SUPPORTED_AUTH_MODE -- Client does not support authentication protocol requested by server; consider upgrading MySQL client 1275, # ER_SERVER_IS_IN_SECURE_AUTH_MODE -- Server is running in --secure-auth mode, but '%s'@'%s' has a password in the old format; please change the password to the new format );
Fri Oct 9 15:48:03 2020 - [info] MHA::MasterMonitor version 0.58. Fri Oct 9 15:48:05 2020 - [info] GTID failover mode = 1 Fri Oct 9 15:48:05 2020 - [info] Dead Servers: Fri Oct 9 15:48:05 2020 - [info] Alive Servers: Fri Oct 9 15:48:05 2020 - [info] 172.16.120.10(172.16.120.10:3358) Fri Oct 9 15:48:05 2020 - [info] 172.16.120.11(172.16.120.11:3358) Fri Oct 9 15:48:05 2020 - [info] 172.16.120.12(172.16.120.12:3358) Fri Oct 9 15:48:05 2020 - [info] Alive Slaves: Fri Oct 9 15:48:05 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Fri Oct 9 15:48:05 2020 - [info] GTID ON Fri Oct 9 15:48:05 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Fri Oct 9 15:48:05 2020 - [info] Primary candidate for the new Master (candidate_master is set) Fri Oct915:48:052020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major versionbetween slaves) log-bin:enabled Fri Oct915:48:052020 - [info] GTID ON Fri Oct915:48:052020 - [info] Replicating from172.16.120.10(172.16.120.10:3358) Fri Oct915:48:052020 - [info] Primary candidate for the newMaster (candidate_master isset) Fri Oct915:48:052020 - [info] Current Alive Master: 172.16.120.10(172.16.120.10:3358) Fri Oct915:48:052020 - [info] Checking slave configurations.. Fri Oct915:48:052020 - [info] Checking replication filtering settings.. Fri Oct915:48:052020 - [info] binlog_do_db= , binlog_ignore_db= Fri Oct915:48:052020 - [info] Replication filtering check ok. Fri Oct915:48:052020 - [info] GTID (withauto-pos) is supported. Skipping all SSH and Node package checking. Fri Oct915:48:052020 - [info] Checking SSH publickey authenticationsettingson the current master.. Fri Oct915:48:052020 - [info] HealthCheck: SSH to172.16.120.10is reachable. Fri Oct915:48:052020 - [info] 172.16.120.10(172.16.120.10:3358) (currentmaster) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358)
Fri Oct915:48:052020 - [info] Checking master_ip_failover_script status: Fri Oct915:48:052020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=status --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 Fri Oct915:48:052020 - [info] OK. Fri Oct915:48:052020 - [warning] shutdown_script isnot defined. Fri Oct915:48:052020 - [info] Setmaster ping interval3 seconds. Fri Oct915:48:052020 - [info] Set secondary check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 Fri Oct915:48:052020 - [info] Starting ping health checkon172.16.120.10(172.16.120.10:3358).. Fri Oct915:48:052020 - [info] Ping(CONNECT) succeeded, waiting until MySQL doesn't respond.. Fri Oct 9 15:50:40 2020 - [warning] Got error on MySQL connect ping: DBI connect(';host=172.16.120.10;port=3358;mysql_connect_timeout=1','mha',...) failed: Can't connect to MySQL server on '172.16.120.10' (4) at /usr/local/share/perl5/MHA/HealthCheck.pm line 98. 2003 (Can't connect to MySQL server on '172.16.120.10' (4)) Fri Oct 9 15:50:40 2020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12--user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=CONNECT Fri Oct915:50:402020 - [info] Executing SSH check script: exit0 Fri Oct915:50:442020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (4)) Fri Oct 9 15:50:44 2020 - [warning] Connection failed 2 time(s).. Fri Oct 9 15:50:45 2020 - [warning] HealthCheck: Got timeout on checking SSH connection to 172.16.120.10! at /usr/local/share/perl5/MHA/HealthCheck.pm line 344. ssh: connect to host 172.16.120.11 port 22: Connection timed out Monitoring server 172.16.120.11 is NOT reachable! Fri Oct 9 15:50:45 2020 - [warning] At least one of monitoring servers is not reachable from this script. This is likely a network problem. Failover should not happen. Fri Oct 9 15:50:47 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (4)) Fri Oct915:50:472020 - [warning] Connectionfailed3time(s).. Fri Oct915:50:502020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (4)) Fri Oct 9 15:50:50 2020 - [warning] Connection failed 4 time(s).. Fri Oct 9 15:50:50 2020 - [warning] Secondary network check script returned errors. Failover should not start so checking server status again. Check network settings for details. Fri Oct 9 15:50:53 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (4)) Fri Oct915:50:532020 - [warning] Connectionfailed1time(s).. Fri Oct915:50:532020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12--user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=CONNECT Fri Oct915:50:532020 - [info] Executing SSH check script: exit0 Fri Oct915:50:562020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (4)) Fri Oct 9 15:50:56 2020 - [warning] Connection failed 2 time(s).. Fri Oct 9 15:50:58 2020 - [warning] HealthCheck: Got timeout on checking SSH connection to 172.16.120.10! at /usr/local/share/perl5/MHA/HealthCheck.pm line 344. ssh: connect to host 172.16.120.11 port 22: Connection timed out Monitoring server 172.16.120.11 is NOT reachable! Fri Oct 9 15:50:58 2020 - [warning] At least one of monitoring servers is not reachable from this script. This is likely a network problem. Failover should not happen. Fri Oct 9 15:50:59 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (4)) Fri Oct915:50:592020 - [warning] Connectionfailed3time(s).. Fri Oct915:51:022020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (4)) Fri Oct 9 15:51:02 2020 - [warning] Connection failed 4 time(s).. Fri Oct 9 15:51:02 2020 - [warning] Secondary network check script returned errors. Failover should not start so checking server status again. Check network settings for details. Fri Oct 9 15:51:05 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (4)) Fri Oct915:51:052020 - [warning] Connectionfailed1time(s).. Fri Oct915:51:052020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12--user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=CONNECT Fri Oct915:51:052020 - [info] Executing SSH check script: exit0 Fri Oct915:51:052020 - [info] Ping(CONNECT) succeeded, waiting until MySQL doesn't respond.. Fri Oct 9 15:51:05 2020 - [info] HealthCheck: SSH to 172.16.120.10 is reachable. Fri Oct 9 15:51:09 2020 - [warning] Got timeout on Secondary Check child process and killed it! at /usr/local/share/perl5/MHA/HealthCheck.pm line 435. ssh: connect to host 172.16.120.11 port 22: Connection timed out Monitoring server 172.16.120.11 is NOT reachable!
ping_type=INSERT
slave-1
1 2 3 4 5 6 7
IPTABLES="/sbin/iptables" $IPTABLES -F $IPTABLES -A INPUT -p icmp --icmp-type any -j ACCEPT $IPTABLES -A INPUT -p tcp -s 172.16.120.10 -j ACCEPT $IPTABLES -A INPUT -p tcp -s 172.16.120.11 -j ACCEPT $IPTABLES -A INPUT -p tcp -s 172.16.120.12 -j ACCEPT $IPTABLES -A INPUT -p tcp --syn -j DROP
此时manager已经无法连通slave-1
1 2 3 4 5 6 7 8 9 10 11 12
#ping centos-2 PING centos-2 (172.16.120.11) 56(84) bytes of data. 64 bytes from centos-2 (172.16.120.11): icmp_seq=1 ttl=64 time=0.349 ms 64 bytes from centos-2 (172.16.120.11): icmp_seq=2 ttl=64 time=0.651 ms ^C --- centos-2 ping statistics --- 2 packets transmitted, 2 received, 0% packet loss, time 1001ms rtt min/avg/max/mdev = 0.349/0.500/0.651/0.151 ms
Fri Oct 9 16:43:25 2020 - [info] MHA::MasterMonitor version 0.58. Fri Oct 9 16:43:26 2020 - [info] GTID failover mode = 1 Fri Oct 9 16:43:26 2020 - [info] Dead Servers: Fri Oct 9 16:43:26 2020 - [info] Alive Servers: Fri Oct 9 16:43:26 2020 - [info] 172.16.120.10(172.16.120.10:3358) Fri Oct 9 16:43:26 2020 - [info] 172.16.120.11(172.16.120.11:3358) Fri Oct 9 16:43:26 2020 - [info] 172.16.120.12(172.16.120.12:3358) Fri Oct 9 16:43:26 2020 - [info] Alive Slaves: Fri Oct 9 16:43:26 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Fri Oct 9 16:43:26 2020 - [info] GTID ON Fri Oct 9 16:43:26 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Fri Oct 9 16:43:26 2020 - [info] Primary candidate for the new Master (candidate_master is set) Fri Oct916:43:262020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major versionbetween slaves) log-bin:enabled Fri Oct916:43:262020 - [info] GTID ON Fri Oct916:43:262020 - [info] Replicating from172.16.120.10(172.16.120.10:3358) Fri Oct916:43:262020 - [info] Primary candidate for the newMaster (candidate_master isset) Fri Oct916:43:262020 - [info] Current Alive Master: 172.16.120.10(172.16.120.10:3358) Fri Oct916:43:262020 - [info] Checking slave configurations.. Fri Oct916:43:262020 - [info] Checking replication filtering settings.. Fri Oct916:43:262020 - [info] binlog_do_db= , binlog_ignore_db= Fri Oct916:43:262020 - [info] Replication filtering check ok. Fri Oct916:43:262020 - [info] GTID (withauto-pos) is supported. Skipping all SSH and Node package checking. Fri Oct916:43:262020 - [info] Checking SSH publickey authenticationsettingson the current master.. Fri Oct916:43:262020 - [info] HealthCheck: SSH to172.16.120.10is reachable. Fri Oct916:43:262020 - [info] 172.16.120.10(172.16.120.10:3358) (currentmaster) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358)
Fri Oct916:43:262020 - [info] Checking master_ip_failover_script status: Fri Oct916:43:262020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=status --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 Fri Oct916:43:262020 - [info] OK. Fri Oct916:43:262020 - [warning] shutdown_script isnot defined. Fri Oct916:43:262020 - [info] Setmaster ping interval3 seconds. Fri Oct916:43:262020 - [info] Set secondary check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 Fri Oct916:43:262020 - [info] Starting ping health checkon172.16.120.10(172.16.120.10:3358).. Fri Oct916:43:262020 - [info] Ping(CONNECT) succeeded, waiting until MySQL doesn't respond.. Fri Oct 9 16:45:55 2020 - [warning] Got error on MySQL connect ping: DBI connect(';host=172.16.120.10;port=3358;mysql_connect_timeout=1','mha',...) failed: Can't connect to MySQL server on '172.16.120.10' (4) at /usr/local/share/perl5/MHA/HealthCheck.pm line 98. 2003 (Can't connect to MySQL server on '172.16.120.10' (4)) Fri Oct 9 16:45:55 2020 - [info] Executing SSH check script: exit0 Fri Oct916:45:552020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12--user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=CONNECT Fri Oct916:45:592020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (4)) Fri Oct 9 16:45:59 2020 - [warning] Connection failed 2 time(s).. Fri Oct 9 16:46:00 2020 - [warning] HealthCheck: Got timeout on checking SSH connection to 172.16.120.10! at /usr/local/share/perl5/MHA/HealthCheck.pm line 344. ssh: connect to host 172.16.120.11 port 22: Connection timed out Monitoring server 172.16.120.11 is NOT reachable! Fri Oct 9 16:46:00 2020 - [warning] At least one of monitoring servers is not reachable from this script. This is likely a network problem. Failover should not happen. Fri Oct 9 16:46:02 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (4)) Fri Oct916:46:022020 - [warning] Connectionfailed3time(s).. Fri Oct916:46:052020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (4)) Fri Oct 9 16:46:05 2020 - [warning] Connection failed 4 time(s).. Fri Oct 9 16:46:05 2020 - [warning] Secondary network check script returned errors. Failover should not start so checking server status again. Check network settings for details. Fri Oct 9 16:46:08 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (4)) Fri Oct916:46:082020 - [warning] Connectionfailed1time(s).. Fri Oct916:46:082020 - [info] Executing SSH check script: exit0 Fri Oct916:46:082020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12--user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=CONNECT Fri Oct916:46:112020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (4)) Fri Oct 9 16:46:11 2020 - [warning] Connection failed 2 time(s).. Fri Oct 9 16:46:13 2020 - [warning] HealthCheck: Got timeout on checking SSH connection to 172.16.120.10! at /usr/local/share/perl5/MHA/HealthCheck.pm line 344. ssh: connect to host 172.16.120.11 port 22: Connection timed out Monitoring server 172.16.120.11 is NOT reachable! Fri Oct 9 16:46:13 2020 - [warning] At least one of monitoring servers is not reachable from this script. This is likely a network problem. Failover should not happen. Fri Oct 9 16:46:14 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (4)) Fri Oct916:46:142020 - [warning] Connectionfailed3time(s).. Fri Oct916:46:152020 - [info] Ping(CONNECT) succeeded, waiting until MySQL doesn't respond..
ping_type=INSERT
slave-1,slave-2
1 2 3 4 5 6 7
IPTABLES="/sbin/iptables" $IPTABLES -F $IPTABLES -A INPUT -p icmp --icmp-type any -j ACCEPT $IPTABLES -A INPUT -p tcp -s 172.16.120.10 -j ACCEPT $IPTABLES -A INPUT -p tcp -s 172.16.120.11 -j ACCEPT $IPTABLES -A INPUT -p tcp -s 172.16.120.12 -j ACCEPT $IPTABLES -A INPUT -p tcp --syn -j DROP
Fri Oct 9 16:05:55 2020 - [info] MHA::MasterMonitor version 0.58. Fri Oct 9 16:05:56 2020 - [info] GTID failover mode = 1 Fri Oct 9 16:05:56 2020 - [info] Dead Servers: Fri Oct 9 16:05:56 2020 - [info] Alive Servers: Fri Oct 9 16:05:56 2020 - [info] 172.16.120.10(172.16.120.10:3358) Fri Oct 9 16:05:56 2020 - [info] 172.16.120.11(172.16.120.11:3358) Fri Oct 9 16:05:56 2020 - [info] 172.16.120.12(172.16.120.12:3358) Fri Oct 9 16:05:56 2020 - [info] Alive Slaves: Fri Oct 9 16:05:56 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Fri Oct 9 16:05:56 2020 - [info] GTID ON Fri Oct 9 16:05:56 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Fri Oct 9 16:05:56 2020 - [info] Primary candidate for the new Master (candidate_master is set) Fri Oct916:05:562020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major versionbetween slaves) log-bin:enabled Fri Oct916:05:562020 - [info] GTID ON Fri Oct916:05:562020 - [info] Replicating from172.16.120.10(172.16.120.10:3358) Fri Oct916:05:562020 - [info] Primary candidate for the newMaster (candidate_master isset) Fri Oct916:05:562020 - [info] Current Alive Master: 172.16.120.10(172.16.120.10:3358) Fri Oct916:05:562020 - [info] Checking slave configurations.. Fri Oct916:05:562020 - [info] Checking replication filtering settings.. Fri Oct916:05:562020 - [info] binlog_do_db= , binlog_ignore_db= Fri Oct916:05:562020 - [info] Replication filtering check ok. Fri Oct916:05:562020 - [info] GTID (withauto-pos) is supported. Skipping all SSH and Node package checking. Fri Oct916:05:562020 - [info] Checking SSH publickey authenticationsettingson the current master.. Fri Oct916:05:562020 - [info] HealthCheck: SSH to172.16.120.10is reachable. Fri Oct916:05:562020 - [info] 172.16.120.10(172.16.120.10:3358) (currentmaster) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358)
Fri Oct916:05:562020 - [info] Checking master_ip_failover_script status: Fri Oct916:05:562020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=status --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 Fri Oct916:05:562020 - [info] OK. Fri Oct916:05:562020 - [warning] shutdown_script isnot defined. Fri Oct916:05:562020 - [info] Setmaster ping interval3 seconds. Fri Oct916:05:562020 - [info] Set secondary check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 Fri Oct916:05:562020 - [info] Starting ping health checkon172.16.120.10(172.16.120.10:3358).. Fri Oct916:05:562020 - [info] Ping(CONNECT) succeeded, waiting until MySQL doesn't respond.. Fri Oct 9 16:06:43 2020 - [warning] Got error on MySQL connect ping: DBI connect(';host=172.16.120.10;port=3358;mysql_connect_timeout=1','mha',...) failed: Can't connect to MySQL server on '172.16.120.10' (4) at /usr/local/share/perl5/MHA/HealthCheck.pm line 98. 2003 (Can't connect to MySQL server on '172.16.120.10' (4)) Fri Oct 9 16:06:43 2020 - [info] Executing SSH check script: exit0 Fri Oct916:06:432020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12--user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=CONNECT Fri Oct916:06:472020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (4)) Fri Oct 9 16:06:47 2020 - [warning] Connection failed 2 time(s).. Fri Oct 9 16:06:48 2020 - [warning] HealthCheck: Got timeout on checking SSH connection to 172.16.120.10! at /usr/local/share/perl5/MHA/HealthCheck.pm line 344. Monitoring server 172.16.120.11 is reachable, Master is not reachable from 172.16.120.11. OK. Master is reachable from 172.16.120.12! Fri Oct 9 16:06:48 2020 - [warning] Master is reachable from at least one of other monitoring servers. Failover should not happen. Fri Oct 9 16:06:50 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (4)) Fri Oct916:06:502020 - [warning] Connectionfailed3time(s).. Fri Oct916:06:532020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (4)) Fri Oct 9 16:06:53 2020 - [warning] Connection failed 4 time(s).. Fri Oct 9 16:06:53 2020 - [warning] Secondary network check script returned errors. Failover should not start so checking server status again. Check network settings for details. Fri Oct 9 16:06:56 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (4)) Fri Oct916:06:562020 - [warning] Connectionfailed1time(s).. Fri Oct916:06:562020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12--user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=CONNECT Fri Oct916:06:562020 - [info] Executing SSH check script: exit0 Fri Oct916:06:592020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (4)) Fri Oct 9 16:06:59 2020 - [warning] Connection failed 2 time(s).. Fri Oct 9 16:07:01 2020 - [warning] HealthCheck: Got timeout on checking SSH connection to 172.16.120.10! at /usr/local/share/perl5/MHA/HealthCheck.pm line 344. Monitoring server 172.16.120.11 is reachable, Master is not reachable from 172.16.120.11. OK. Master is reachable from 172.16.120.12! Fri Oct 9 16:07:01 2020 - [warning] Master is reachable from at least one of other monitoring servers. Failover should not happen. Fri Oct 9 16:07:02 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (4)) Fri Oct916:07:022020 - [warning] Connectionfailed3time(s).. Fri Oct916:07:052020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (4)) Fri Oct 9 16:07:05 2020 - [warning] Connection failed 4 time(s).. Fri Oct 9 16:07:05 2020 - [warning] Secondary network check script returned errors. Failover should not start so checking server status again. Check network settings for details. Fri Oct 9 16:07:05 2020 - [info] Ping(CONNECT) succeeded, waiting until MySQL doesn't respond..
ping_type=INSERT
master
1 2 3 4 5 6
IPTABLES="/sbin/iptables" $IPTABLES -F $IPTABLES -A INPUT -p icmp --icmp-type any -j ACCEPT $IPTABLES -A INPUT -p tcp -s 172.16.120.10 -j ACCEPT $IPTABLES -A INPUT -p tcp -s 172.16.120.12 -j ACCEPT $IPTABLES -A INPUT -p tcp --syn -j DROP
Sat Oct 10 10:28:35 2020 - [info] MHA::MasterMonitor version 0.58. Sat Oct 10 10:28:37 2020 - [info] GTID failover mode = 1 Sat Oct 10 10:28:37 2020 - [info] Dead Servers: Sat Oct 10 10:28:37 2020 - [info] Alive Servers: Sat Oct 10 10:28:37 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 10:28:37 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 10:28:37 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 10:28:37 2020 - [info] Alive Slaves: Sat Oct 10 10:28:37 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 10:28:37 2020 - [info] GTID ON Sat Oct 10 10:28:37 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 10:28:37 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct1010:28:372020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major versionbetween slaves) log-bin:enabled Sat Oct1010:28:372020 - [info] GTID ON Sat Oct1010:28:372020 - [info] Replicating from172.16.120.10(172.16.120.10:3358) Sat Oct1010:28:372020 - [info] Primary candidate for the newMaster (candidate_master isset) Sat Oct1010:28:372020 - [info] Current Alive Master: 172.16.120.10(172.16.120.10:3358) Sat Oct1010:28:372020 - [info] Checking slave configurations.. Sat Oct1010:28:372020 - [info] Checking replication filtering settings.. Sat Oct1010:28:372020 - [info] binlog_do_db= , binlog_ignore_db= Sat Oct1010:28:372020 - [info] Replication filtering check ok. Sat Oct1010:28:372020 - [info] GTID (withauto-pos) is supported. Skipping all SSH and Node package checking. Sat Oct1010:28:372020 - [info] Checking SSH publickey authenticationsettingson the current master.. Sat Oct1010:28:372020 - [info] HealthCheck: SSH to172.16.120.10is reachable. Sat Oct1010:28:372020 - [info] 172.16.120.10(172.16.120.10:3358) (currentmaster) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358)
Sat Oct1010:28:372020 - [info] Checking master_ip_failover_script status: Sat Oct1010:28:372020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=status --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 Sat Oct1010:28:372020 - [info] OK. Sat Oct1010:28:372020 - [warning] shutdown_script isnot defined. Sat Oct1010:28:372020 - [info] Setmaster ping interval3 seconds. Sat Oct1010:28:372020 - [info] Set secondary check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 Sat Oct1010:28:372020 - [info] Starting ping health checkon172.16.120.10(172.16.120.10:3358).. Sat Oct1010:28:372020 - [info] Ping(CONNECT) succeeded, waiting until MySQL doesn't respond..
关闭slave-1
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
[root@centos-2 10:19:38 ~] #mysql -uroot -p -S /data/mysql_3358/run/mysql.sock dbms_monitor mysql: [Warning] Using a password on the command line interface can be insecure. Welcome to the MySQL monitor. Commands end with ; or \g. Your MySQL connection id is 2118 Server version: 5.7.31-34-log Percona Server (GPL), Release 34, Revision 2e68637
Copyright (c) 2009-2020 Percona LLC and/or its affiliates Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
Oracle is a registered trademark of Oracle Corporation and/or its affiliates. Other names may be trademarks of their respective owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
[root@centos-1 10:20:35 ~] #mysql -uroot -p -S /data/mysql_3358/run/mysql.sock dbms_monitor mysql: [Warning] Using a password on the command line interface can be insecure. Welcome to the MySQL monitor. Commands end with ; or \g. Your MySQL connection id is 3413 Server version: 5.7.31-34-log Percona Server (GPL), Release 34, Revision 2e68637
Copyright (c) 2009-2020 Percona LLC and/or its affiliates Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
Oracle is a registered trademark of Oracle Corporation and/or its affiliates. Other names may be trademarks of their respective owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
Sat Oct 10 10:50:38 2020 - [warning] Got error on MySQL connect ping: DBI connect(';host=172.16.120.10;port=3358;mysql_connect_timeout=1','mha',...) failed: Can't connect to MySQL server on '172.16.120.10' (111) at /usr/local/share/perl5/MHA/HealthCheck.pm line 98. 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 10:50:38 2020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12--user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=CONNECT Sat Oct1010:50:382020 - [info] Executing SSH check script: exit0 Monitoringserver172.16.120.11is reachable, Masterisnot reachable from172.16.120.11. OK. Sat Oct1010:50:382020 - [info] HealthCheck: SSH to172.16.120.10is reachable. Monitoringserver172.16.120.12is reachable, Masterisnot reachable from172.16.120.12. OK. Sat Oct1010:50:382020 - [info] Masterisnot reachable fromall other monitoring servers. Failover should start. Sat Oct1010:50:412020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 10:50:41 2020 - [warning] Connection failed 2 time(s).. Sat Oct 10 10:50:44 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (111)) Sat Oct1010:50:442020 - [warning] Connectionfailed3time(s).. Sat Oct1010:50:472020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 10:50:47 2020 - [warning] Connection failed 4 time(s).. Sat Oct 10 10:50:47 2020 - [warning] Master is not reachable from health checker! Sat Oct 10 10:50:47 2020 - [warning] Master 172.16.120.10(172.16.120.10:3358) is not reachable! Sat Oct 10 10:50:47 2020 - [warning] SSH is reachable. Sat Oct 10 10:50:47 2020 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha/conf/masterha_default.cnf and /etc/masterha/conf/cls_new.cnf again, and trying to connect to all servers to check server status.. Sat Oct 10 10:50:47 2020 - [info] Reading default configuration from /etc/masterha/conf/masterha_default.cnf.. Sat Oct 10 10:50:47 2020 - [info] Reading application default configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 10:50:47 2020 - [info] Reading server configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 10:50:48 2020 - [info] GTID failover mode = 1 Sat Oct 10 10:50:48 2020 - [info] Dead Servers: Sat Oct 10 10:50:48 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 10:50:48 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 10:50:48 2020 - [info] Alive Servers: Sat Oct 10 10:50:48 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 10:50:48 2020 - [info] Alive Slaves: Sat Oct 10 10:50:48 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 10:50:48 2020 - [info] GTID ON Sat Oct 10 10:50:48 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 10:50:48 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 10:50:48 2020 - [info] Checking slave configurations.. Sat Oct 10 10:50:48 2020 - [info] Checking replication filtering settings.. Sat Oct 10 10:50:48 2020 - [info] Replication filtering check ok. Sat Oct 10 10:50:48 2020 - [info] Master is down! Sat Oct 10 10:50:48 2020 - [info] Terminating monitoring script. Sat Oct 10 10:50:48 2020 - [info] Got exit code 20 (Master dead). Sat Oct 10 10:50:48 2020 - [info] MHA::MasterFailover version 0.58. Sat Oct 10 10:50:48 2020 - [info] Starting master failover. Sat Oct 10 10:50:48 2020 - [info] Sat Oct 10 10:50:48 2020 - [info] * Phase 1: Configuration Check Phase.. Sat Oct 10 10:50:48 2020 - [info] Sat Oct 10 10:50:49 2020 - [info] GTID failover mode = 1 Sat Oct 10 10:50:49 2020 - [info] Dead Servers: Sat Oct 10 10:50:49 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 10:50:49 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 10:50:49 2020 - [info] Checking master reachability via MySQL(double check)... Sat Oct 10 10:50:49 2020 - [info] ok. Sat Oct 10 10:50:49 2020 - [info] Alive Servers: Sat Oct 10 10:50:49 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 10:50:49 2020 - [info] Alive Slaves: Sat Oct 10 10:50:49 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 10:50:49 2020 - [info] GTID ON Sat Oct 10 10:50:49 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 10:50:49 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 10:50:49 2020 - [error][/usr/local/share/perl5/MHA/ServerManager.pm, ln492] Server 172.16.120.11(172.16.120.11:3358) is dead, but must be alive! Check server settings. Sat Oct 10 10:50:49 2020 - [error][/usr/local/share/perl5/MHA/ManagerUtil.pm, ln177] Got ERROR: at /usr/local/share/perl5/MHA/MasterFailover.pm line 269.
Sat Oct 10 10:59:07 2020 - [info] MHA::MasterMonitor version 0.58. Sat Oct 10 10:59:09 2020 - [info] GTID failover mode = 1 Sat Oct 10 10:59:09 2020 - [info] Dead Servers: Sat Oct 10 10:59:09 2020 - [info] Alive Servers: Sat Oct 10 10:59:09 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 10:59:09 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 10:59:09 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 10:59:09 2020 - [info] Alive Slaves: Sat Oct 10 10:59:09 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 10:59:09 2020 - [info] GTID ON Sat Oct 10 10:59:09 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 10:59:09 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 10:59:09 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 10:59:09 2020 - [info] GTID ON Sat Oct 10 10:59:09 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 10:59:09 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 10:59:09 2020 - [info] Current Alive Master: 172.16.120.10(172.16.120.10:3358) Sat Oct 10 10:59:09 2020 - [info] Checking slave configurations.. Sat Oct 10 10:59:09 2020 - [info] Checking replication filtering settings.. Sat Oct 10 10:59:09 2020 - [info] binlog_do_db= , binlog_ignore_db= Sat Oct 10 10:59:09 2020 - [info] Replication filtering check ok. Sat Oct 10 10:59:09 2020 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking. Sat Oct 10 10:59:09 2020 - [info] Checking SSH publickey authentication settings on the current master.. Sat Oct 10 10:59:09 2020 - [info] HealthCheck: SSH to 172.16.120.10 is reachable. Sat Oct 10 10:59:09 2020 - [info] 172.16.120.10(172.16.120.10:3358) (current master) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358)
Sat Oct 10 10:59:09 2020 - [info] Checking master_ip_failover_script status: Sat Oct 10 10:59:09 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=status --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 Sat Oct 10 10:59:09 2020 - [info] OK. Sat Oct 10 10:59:09 2020 - [warning] shutdown_script is not defined. Sat Oct 10 10:59:09 2020 - [info] Set master ping interval 3 seconds. Sat Oct 10 10:59:09 2020 - [info] Set secondary check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 Sat Oct 10 10:59:09 2020 - [info] Starting ping health check on 172.16.120.10(172.16.120.10:3358).. Sat Oct 10 10:59:09 2020 - [info] Ping(INSERT) succeeded, waiting until MySQL doesn't respond..
[root@centos-1 11:07:08 ~] #mysql -uroot -p -S /data/mysql_3358/run/mysql.sock dbms_monitor mysql: [Warning] Using a password on the command line interface can be insecure. Welcome to the MySQL monitor. Commands end with ; or \g. Your MySQL connection id is 35 Server version: 5.7.29-32-log Percona Server (GPL), Release 32, Revision 56bce88
Copyright (c) 2009-2020 Percona LLC and/or its affiliates Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
Oracle is a registered trademark of Oracle Corporation and/or its affiliates. Other names may be trademarks of their respective owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
Sat Oct 10 11:07:15 2020 - [warning] Got error on MySQL insert ping: 2006 (MySQL server has gone away) Sat Oct 10 11:07:15 2020 - [info] Executing SSH check script: exit 0 Sat Oct 10 11:07:15 2020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 --user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=INSERT Monitoring server 172.16.120.11 is reachable, Master is not reachable from 172.16.120.11. OK. Sat Oct 10 11:07:16 2020 - [info] HealthCheck: SSH to 172.16.120.10 is reachable. Monitoring server 172.16.120.12 is reachable, Master is not reachable from 172.16.120.12. OK. Sat Oct 10 11:07:16 2020 - [info] Master is not reachable from all other monitoring servers. Failover should start. Sat Oct 10 11:07:18 2020 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 11:07:18 2020 - [warning] Connection failed 2 time(s).. Sat Oct 10 11:07:21 2020 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 11:07:21 2020 - [warning] Connection failed 3 time(s).. Sat Oct 10 11:07:24 2020 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 11:07:24 2020 - [warning] Connection failed 4 time(s).. Sat Oct 10 11:07:24 2020 - [warning] Master is not reachable from health checker! Sat Oct 10 11:07:24 2020 - [warning] Master 172.16.120.10(172.16.120.10:3358) is not reachable! Sat Oct 10 11:07:24 2020 - [warning] SSH is reachable. Sat Oct 10 11:07:24 2020 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha/conf/masterha_default.cnf and /etc/masterha/conf/cls_new.cnf again, and trying to connect to all servers to check server status.. Sat Oct 10 11:07:24 2020 - [info] Reading default configuration from /etc/masterha/conf/masterha_default.cnf.. Sat Oct 10 11:07:24 2020 - [info] Reading application default configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 11:07:24 2020 - [info] Reading server configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 11:07:25 2020 - [info] GTID failover mode = 1 Sat Oct 10 11:07:25 2020 - [info] Dead Servers: Sat Oct 10 11:07:25 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:07:25 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 11:07:25 2020 - [info] Alive Servers: Sat Oct 10 11:07:25 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 11:07:25 2020 - [info] Alive Slaves: Sat Oct 10 11:07:25 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:07:25 2020 - [info] GTID ON Sat Oct 10 11:07:25 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:07:25 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:07:25 2020 - [info] Checking slave configurations.. Sat Oct 10 11:07:25 2020 - [info] Checking replication filtering settings.. Sat Oct 10 11:07:25 2020 - [info] Replication filtering check ok. Sat Oct 10 11:07:25 2020 - [info] Master is down! Sat Oct 10 11:07:25 2020 - [info] Terminating monitoring script. Sat Oct 10 11:07:25 2020 - [info] Got exit code 20 (Master dead). Sat Oct 10 11:07:25 2020 - [info] MHA::MasterFailover version 0.58. Sat Oct 10 11:07:25 2020 - [info] Starting master failover. Sat Oct 10 11:07:25 2020 - [info] Sat Oct 10 11:07:25 2020 - [info] * Phase 1: Configuration Check Phase.. Sat Oct 10 11:07:25 2020 - [info] Sat Oct 10 11:07:26 2020 - [info] GTID failover mode = 1 Sat Oct 10 11:07:26 2020 - [info] Dead Servers: Sat Oct 10 11:07:26 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:07:26 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 11:07:26 2020 - [info] Alive Servers: Sat Oct 10 11:07:26 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 11:07:26 2020 - [info] Alive Slaves: Sat Oct 10 11:07:26 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:07:26 2020 - [info] GTID ON Sat Oct 10 11:07:26 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:07:26 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:07:26 2020 - [error][/usr/local/share/perl5/MHA/ServerManager.pm, ln492] Server 172.16.120.11(172.16.120.11:3358) is dead, but must be alive! Check server settings. Sat Oct 10 11:07:26 2020 - [error][/usr/local/share/perl5/MHA/ManagerUtil.pm, ln177] Got ERROR: at /usr/local/share/perl5/MHA/MasterFailover.pm line 269.
Sat Oct 10 11:13:58 2020 - [info] MHA::MasterMonitor version 0.58. Sat Oct 10 11:13:59 2020 - [info] GTID failover mode = 1 Sat Oct 10 11:13:59 2020 - [info] Dead Servers: Sat Oct 10 11:13:59 2020 - [info] Alive Servers: Sat Oct 10 11:13:59 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:13:59 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 11:13:59 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 11:13:59 2020 - [info] Alive Slaves: Sat Oct 10 11:13:59 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:13:59 2020 - [info] GTID ON Sat Oct 10 11:13:59 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:13:59 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:13:59 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:13:59 2020 - [info] GTID ON Sat Oct 10 11:13:59 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:13:59 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:13:59 2020 - [info] Current Alive Master: 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:13:59 2020 - [info] Checking slave configurations.. Sat Oct 10 11:13:59 2020 - [info] Checking replication filtering settings.. Sat Oct 10 11:13:59 2020 - [info] binlog_do_db= , binlog_ignore_db= Sat Oct 10 11:13:59 2020 - [info] Replication filtering check ok. Sat Oct 10 11:13:59 2020 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking. Sat Oct 10 11:13:59 2020 - [info] Checking SSH publickey authentication settings on the current master.. Sat Oct 10 11:13:59 2020 - [info] HealthCheck: SSH to 172.16.120.10 is reachable. Sat Oct 10 11:13:59 2020 - [info] 172.16.120.10(172.16.120.10:3358) (current master) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358)
Sat Oct 10 11:13:59 2020 - [info] Checking master_ip_failover_script status: Sat Oct 10 11:13:59 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=status --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 Sat Oct 10 11:14:00 2020 - [info] OK. Sat Oct 10 11:14:00 2020 - [warning] shutdown_script is not defined. Sat Oct 10 11:14:00 2020 - [info] Set master ping interval 3 seconds. Sat Oct 10 11:14:00 2020 - [info] Set secondary check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 Sat Oct 10 11:14:00 2020 - [info] Starting ping health check on 172.16.120.10(172.16.120.10:3358).. Sat Oct 10 11:14:00 2020 - [info] Ping(CONNECT) succeeded, waiting until MySQL doesn't respond..
Sat Oct 10 11:22:12 2020 - [warning] Got error on MySQL connect ping: DBI connect(';host=172.16.120.10;port=3358;mysql_connect_timeout=1','mha',...) failed: Can't connect to MySQL server on '172.16.120.10' (111) at /usr/local/share/perl5/MHA/HealthCheck.pm line 98. 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 11:22:12 2020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12--user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=CONNECT Sat Oct1011:22:122020 - [info] Executing SSH check script: exit0 Sat Oct1011:22:122020 - [info] HealthCheck: SSH to172.16.120.10is reachable. Monitoringserver172.16.120.11is reachable, Masterisnot reachable from172.16.120.11. OK. Monitoringserver172.16.120.12is reachable, Masterisnot reachable from172.16.120.12. OK. Sat Oct1011:22:122020 - [info] Masterisnot reachable fromall other monitoring servers. Failover should start. Sat Oct1011:22:152020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 11:22:15 2020 - [warning] Connection failed 2 time(s).. Sat Oct 10 11:22:18 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (111)) Sat Oct1011:22:182020 - [warning] Connectionfailed3time(s).. Sat Oct1011:22:212020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 11:22:21 2020 - [warning] Connection failed 4 time(s).. Sat Oct 10 11:22:21 2020 - [warning] Master is not reachable from health checker! Sat Oct 10 11:22:21 2020 - [warning] Master 172.16.120.10(172.16.120.10:3358) is not reachable! Sat Oct 10 11:22:21 2020 - [warning] SSH is reachable. Sat Oct 10 11:22:21 2020 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha/conf/masterha_default.cnf and /etc/masterha/conf/cls_new.cnf again, and trying to connect to all servers to check server status.. Sat Oct 10 11:22:21 2020 - [info] Reading default configuration from /etc/masterha/conf/masterha_default.cnf.. Sat Oct 10 11:22:21 2020 - [info] Reading application default configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 11:22:21 2020 - [info] Reading server configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 11:22:22 2020 - [info] GTID failover mode = 1 Sat Oct 10 11:22:22 2020 - [info] Dead Servers: Sat Oct 10 11:22:22 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:22:22 2020 - [info] Alive Servers: Sat Oct 10 11:22:22 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 11:22:22 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 11:22:22 2020 - [info] Alive Slaves: Sat Oct 10 11:22:22 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:22:22 2020 - [info] GTID ON Sat Oct 10 11:22:22 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:22:22 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:22:22 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:22:22 2020 - [info] GTID ON Sat Oct 10 11:22:22 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:22:22 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:22:22 2020 - [info] Checking slave configurations.. Sat Oct 10 11:22:22 2020 - [info] Checking replication filtering settings.. Sat Oct 10 11:22:22 2020 - [info] Replication filtering check ok. Sat Oct 10 11:22:22 2020 - [info] Master is down! Sat Oct 10 11:22:22 2020 - [info] Terminating monitoring script. Sat Oct 10 11:22:22 2020 - [info] Got exit code 20 (Master dead). Sat Oct 10 11:22:22 2020 - [info] MHA::MasterFailover version 0.58. Sat Oct 10 11:22:22 2020 - [info] Starting master failover. Sat Oct 10 11:22:22 2020 - [info] Sat Oct 10 11:22:22 2020 - [info] * Phase 1: Configuration Check Phase.. Sat Oct 10 11:22:22 2020 - [info] Sat Oct 10 11:22:23 2020 - [info] GTID failover mode = 1 Sat Oct 10 11:22:23 2020 - [info] Dead Servers: Sat Oct 10 11:22:23 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:22:23 2020 - [info] Checking master reachability via MySQL(double check)... Sat Oct 10 11:22:23 2020 - [info] ok. Sat Oct 10 11:22:23 2020 - [info] Alive Servers: Sat Oct 10 11:22:23 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 11:22:23 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 11:22:23 2020 - [info] Alive Slaves: Sat Oct 10 11:22:23 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:22:23 2020 - [info] GTID ON Sat Oct 10 11:22:23 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:22:23 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:22:23 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:22:23 2020 - [info] GTID ON Sat Oct 10 11:22:23 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:22:23 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:22:23 2020 - [info] Starting GTID based failover. Sat Oct 10 11:22:23 2020 - [info] Sat Oct 10 11:22:23 2020 - [info] ** Phase 1: Configuration Check Phase completed. Sat Oct 10 11:22:23 2020 - [info] Sat Oct 10 11:22:23 2020 - [info] * Phase 2: Dead Master Shutdown Phase.. Sat Oct 10 11:22:23 2020 - [info] Sat Oct 10 11:22:23 2020 - [info] Forcing shutdown so that applications never connect to the current master.. Sat Oct 10 11:22:23 2020 - [info] Executing master IP deactivation script: Sat Oct 10 11:22:23 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 --command=stopssh --ssh_user=root Disabling the VIP on old master: 172.16.120.10 Fake!!! 原主库 rpl_semi_sync_master_enabled=0 rpl_semi_sync_slave_enabled=1 Sat Oct 10 11:22:23 2020 - [info] done. Sat Oct 10 11:22:23 2020 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master. Sat Oct 10 11:22:23 2020 - [info] * Phase 2: Dead Master Shutdown Phase completed. Sat Oct 10 11:22:23 2020 - [info] Sat Oct 10 11:22:23 2020 - [info] * Phase 3: Master Recovery Phase.. Sat Oct 10 11:22:23 2020 - [info] Sat Oct 10 11:22:23 2020 - [info] * Phase 3.1: Getting Latest Slaves Phase.. Sat Oct 10 11:22:23 2020 - [info] Sat Oct 10 11:22:23 2020 - [info] The latest binary log file/position on all slaves is mysql-bin.000011:486 Sat Oct 10 11:22:23 2020 - [info] Retrieved Gtid Set: 44a4ea53-fcad-11ea-bd16-0050563b7b42:20042-20531 Sat Oct 10 11:22:23 2020 - [info] Latest slaves (Slaves that received relay log files to the latest): Sat Oct 10 11:22:23 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:22:23 2020 - [info] GTID ON Sat Oct 10 11:22:23 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:22:23 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:22:23 2020 - [info] The oldest binary log file/position on all slaves is mysql-bin.000011:194 Sat Oct 10 11:22:23 2020 - [info] Retrieved Gtid Set: 44a4ea53-fcad-11ea-bd16-0050563b7b42:20143-20530 Sat Oct 10 11:22:23 2020 - [info] Oldest slaves: Sat Oct 10 11:22:23 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:22:23 2020 - [info] GTID ON Sat Oct 10 11:22:23 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:22:23 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:22:23 2020 - [info] Sat Oct 10 11:22:23 2020 - [info] * Phase 3.3: Determining New Master Phase.. Sat Oct 10 11:22:23 2020 - [info] Sat Oct 10 11:22:23 2020 - [info] Searching new master from slaves.. Sat Oct 10 11:22:23 2020 - [info] Candidate masters from the configuration file: Sat Oct 10 11:22:23 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:22:23 2020 - [info] GTID ON Sat Oct 10 11:22:23 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:22:23 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:22:23 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:22:23 2020 - [info] GTID ON Sat Oct 10 11:22:23 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:22:23 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:22:23 2020 - [info] Non-candidate masters: Sat Oct 10 11:22:23 2020 - [info] Searching from candidate_master slaves which have received the latest relay log events.. Sat Oct 10 11:22:23 2020 - [info] New master is 172.16.120.12(172.16.120.12:3358) Sat Oct 10 11:22:23 2020 - [info] Starting master failover.. Sat Oct 10 11:22:23 2020 - [info] From: 172.16.120.10(172.16.120.10:3358) (current master) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358) To: 172.16.120.12(172.16.120.12:3358) (new master) +--172.16.120.11(172.16.120.11:3358) Sat Oct 10 11:22:23 2020 - [info] Sat Oct 10 11:22:23 2020 - [info] * Phase 3.3: New Master Recovery Phase.. Sat Oct 10 11:22:23 2020 - [info] Sat Oct 10 11:22:23 2020 - [info] Waiting all logs to be applied.. Sat Oct 10 11:22:23 2020 - [info] done. Sat Oct 10 11:22:23 2020 - [info] Getting new master's binlognameand position.. Sat Oct1011:22:232020 - [info] mysql-bin.000007:3182161 Sat Oct1011:22:232020 - [info] All other slaves should startreplicationfrom here. Statement should be: CHANGEMASTERTO MASTER_HOST='172.16.120.12', MASTER_PORT=3358, MASTER_AUTO_POSITION=1, MASTER_USER='repler', MASTER_PASSWORD='xxx'; Sat Oct 10 11:22:23 2020 - [info] Master Recovery succeeded. File:Pos:Exec_Gtid_Set: mysql-bin.000007, 3182161, 44a4ea53-fcad-11ea-bd16-0050563b7b42:1-20531, 45d1f02a-fcad-11ea-8a44-0050562f2198:1-27 Sat Oct 10 11:22:23 2020 - [info] Executing master IP activate script: Sat Oct 10 11:22:23 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=start --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 --new_master_host=172.16.120.12 --new_master_ip=172.16.120.12 --new_master_port=3358 --new_master_user='mha' --new_master_password=xxx Enabling the VIP - 172.16.120.128 on the new master - 172.16.120.12 Fake!!! 新主库 rpl_semi_sync_master_enabled=1 rpl_semi_sync_slave_enabled=0 Set read_only=0on the new master. Creating app useron the new master.. Sat Oct1011:22:242020 - [info] OK. Sat Oct1011:22:242020 - [info] ** Finished masterrecovery successfully. Sat Oct1011:22:242020 - [info] * Phase 3: MasterRecovery Phase completed. Sat Oct1011:22:242020 - [info] Sat Oct1011:22:242020 - [info] * Phase 4: Slaves Recovery Phase.. Sat Oct1011:22:242020 - [info] Sat Oct1011:22:242020 - [info] Sat Oct1011:22:242020 - [info] * Phase 4.1: Starting Slaves in parallel.. Sat Oct1011:22:242020 - [info] Sat Oct1011:22:242020 - [info] -- Slave recovery on host 172.16.120.11(172.16.120.11:3358) started, pid: 77208. Check tmp log /masterha/cls_new//172.16.120.11_3358_20201010112222.log if it takes time.. Sat Oct1011:22:252020 - [info] Sat Oct1011:22:252020 - [info] Log messages from172.16.120.11 ... Sat Oct1011:22:252020 - [info] Sat Oct1011:22:242020 - [info] Resetting slave172.16.120.11(172.16.120.11:3358) andstartingreplicationfrom the newmaster172.16.120.12(172.16.120.12:3358).. Sat Oct1011:22:242020 - [info] Executed CHANGE MASTER. Sat Oct1011:22:242020 - [info] Slave started. Sat Oct1011:22:242020 - [info] gtid_wait(44a4ea53-fcad-11ea-bd16-0050563b7b42:1-20531, 45d1f02a-fcad-11ea-8a44-0050562f2198:1-27) completed on172.16.120.11(172.16.120.11:3358). Executed 2 events. Sat Oct1011:22:252020 - [info] Endoflog messages from172.16.120.11. Sat Oct1011:22:252020 - [info] -- Slave on host 172.16.120.11(172.16.120.11:3358) started. Sat Oct1011:22:252020 - [info] Allnewslave servers recovered successfully. Sat Oct1011:22:252020 - [info] Sat Oct1011:22:252020 - [info] * Phase 5: Newmastercleanup phase.. Sat Oct1011:22:252020 - [info] Sat Oct1011:22:252020 - [info] Resetting slave info on the new master.. Sat Oct1011:22:252020 - [info] 172.16.120.12: Resetting slave info succeeded. Sat Oct1011:22:252020 - [info] Masterfailoverto172.16.120.12(172.16.120.12:3358) completed successfully. Sat Oct1011:22:252020 - [info]
----- Failover Report -----
cls_new: MySQL Masterfailover172.16.120.10(172.16.120.10:3358) to172.16.120.12(172.16.120.12:3358) succeeded
Master172.16.120.10(172.16.120.10:3358) is down!
Check MHA Manager logsat centos-4:/masterha/cls_new/manager.log for details.
Started automated(non-interactive) failover. Invalidated master IP address on172.16.120.10(172.16.120.10:3358) Selected 172.16.120.12(172.16.120.12:3358) as a new master. 172.16.120.12(172.16.120.12:3358): OK: Applying alllogs succeeded. 172.16.120.12(172.16.120.12:3358): OK: Activated master IP address. 172.16.120.11(172.16.120.11:3358): OK: Slave started, replicating from172.16.120.12(172.16.120.12:3358) 172.16.120.12(172.16.120.12:3358): Resetting slave info succeeded. Masterfailoverto172.16.120.12(172.16.120.12:3358) completed successfully. Sat Oct1011:22:252020 - [info] Sending mail..
Sat Oct 10 11:29:28 2020 - [info] MHA::MasterMonitor version 0.58. Sat Oct 10 11:29:30 2020 - [info] GTID failover mode = 1 Sat Oct 10 11:29:30 2020 - [info] Dead Servers: Sat Oct 10 11:29:30 2020 - [info] Alive Servers: Sat Oct 10 11:29:30 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:29:30 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 11:29:30 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 11:29:30 2020 - [info] Alive Slaves: Sat Oct 10 11:29:30 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:29:30 2020 - [info] GTID ON Sat Oct 10 11:29:30 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:29:30 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct1011:29:302020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major versionbetween slaves) log-bin:enabled Sat Oct1011:29:302020 - [info] GTID ON Sat Oct1011:29:302020 - [info] Replicating from172.16.120.10(172.16.120.10:3358) Sat Oct1011:29:302020 - [info] Primary candidate for the newMaster (candidate_master isset) Sat Oct1011:29:302020 - [info] Current Alive Master: 172.16.120.10(172.16.120.10:3358) Sat Oct1011:29:302020 - [info] Checking slave configurations.. Sat Oct1011:29:302020 - [info] Checking replication filtering settings.. Sat Oct1011:29:302020 - [info] binlog_do_db= , binlog_ignore_db= Sat Oct1011:29:302020 - [info] Replication filtering check ok. Sat Oct1011:29:302020 - [info] GTID (withauto-pos) is supported. Skipping all SSH and Node package checking. Sat Oct1011:29:302020 - [info] Checking SSH publickey authenticationsettingson the current master.. Sat Oct1011:29:302020 - [info] HealthCheck: SSH to172.16.120.10is reachable. Sat Oct1011:29:302020 - [info] 172.16.120.10(172.16.120.10:3358) (currentmaster) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358)
Sat Oct1011:29:302020 - [info] Checking master_ip_failover_script status: Sat Oct1011:29:302020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=status --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 Sat Oct1011:29:302020 - [info] OK. Sat Oct1011:29:302020 - [warning] shutdown_script isnot defined. Sat Oct1011:29:302020 - [info] Setmaster ping interval3 seconds. Sat Oct1011:29:302020 - [info] Set secondary check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 Sat Oct1011:29:302020 - [info] Starting ping health checkon172.16.120.10(172.16.120.10:3358).. Sat Oct1011:29:302020 - [info] Ping(INSERT) succeeded, waiting until MySQL doesn't respond..
Sat Oct 10 11:33:09 2020 - [warning] SSH is reachable. Sat Oct 10 11:33:09 2020 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha/conf/masterha_default.cnf and /etc/masterha/conf/cls_new.cnf again, and trying to connect to all servers to checkserver status.. Sat Oct1011:33:092020 - [info] Reading default configuration from /etc/masterha/conf/masterha_default.cnf.. Sat Oct1011:33:092020 - [info] Reading application default configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct1011:33:092020 - [info] Reading server configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct1011:33:102020 - [info] GTID failovermode = 1 Sat Oct1011:33:102020 - [info] Dead Servers: Sat Oct1011:33:102020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct1011:33:102020 - [info] Alive Servers: Sat Oct1011:33:102020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct1011:33:102020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct1011:33:102020 - [info] Alive Slaves: Sat Oct1011:33:102020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major versionbetween slaves) log-bin:enabled Sat Oct1011:33:102020 - [info] GTID ON Sat Oct1011:33:102020 - [info] Replicating from172.16.120.10(172.16.120.10:3358) Sat Oct1011:33:102020 - [info] Primary candidate for the newMaster (candidate_master isset) Sat Oct1011:33:102020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major versionbetween slaves) log-bin:enabled Sat Oct1011:33:102020 - [info] GTID ON Sat Oct1011:33:102020 - [info] Replicating from172.16.120.10(172.16.120.10:3358) Sat Oct1011:33:102020 - [info] Primary candidate for the newMaster (candidate_master isset) Sat Oct1011:33:102020 - [info] Checking slave configurations.. Sat Oct1011:33:102020 - [info] Checking replication filtering settings.. Sat Oct1011:33:102020 - [info] Replication filtering check ok. Sat Oct1011:33:102020 - [info] Masteris down! Sat Oct1011:33:102020 - [info] Terminating monitoring script. Sat Oct1011:33:102020 - [info] Got exit code 20 (Master dead). Sat Oct1011:33:102020 - [info] MHA::MasterFailover version0.58. Sat Oct1011:33:102020 - [info] Startingmaster failover. Sat Oct1011:33:102020 - [info] Sat Oct1011:33:102020 - [info] * Phase 1: Configuration Check Phase.. Sat Oct1011:33:102020 - [info] Sat Oct1011:33:112020 - [info] GTID failovermode = 1 Sat Oct1011:33:112020 - [info] Dead Servers: Sat Oct1011:33:112020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct1011:33:112020 - [info] Alive Servers: Sat Oct1011:33:112020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct1011:33:112020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct1011:33:112020 - [info] Alive Slaves: Sat Oct1011:33:112020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major versionbetween slaves) log-bin:enabled Sat Oct1011:33:112020 - [info] GTID ON Sat Oct1011:33:112020 - [info] Replicating from172.16.120.10(172.16.120.10:3358) Sat Oct1011:33:112020 - [info] Primary candidate for the newMaster (candidate_master isset) Sat Oct1011:33:112020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major versionbetween slaves) log-bin:enabled Sat Oct1011:33:112020 - [info] GTID ON Sat Oct1011:33:112020 - [info] Replicating from172.16.120.10(172.16.120.10:3358) Sat Oct1011:33:112020 - [info] Primary candidate for the newMaster (candidate_master isset) Sat Oct1011:33:112020 - [info] Starting GTID based failover. Sat Oct1011:33:112020 - [info] Sat Oct1011:33:112020 - [info] ** Phase 1: Configuration Check Phase completed. Sat Oct1011:33:112020 - [info] Sat Oct1011:33:112020 - [info] * Phase 2: Dead MasterShutdown Phase.. Sat Oct1011:33:112020 - [info] Sat Oct1011:33:112020 - [info] Forcing shutdown so that applications neverconnectto the current master.. Sat Oct1011:33:112020 - [info] Executing master IP deactivation script: Sat Oct1011:33:112020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 --command=stopssh --ssh_user=root Disabling the VIP onoldmaster: 172.16.120.10 RTNETLINK answers: Cannot assign requested address Fake!!! 原主库 rpl_semi_sync_master_enabled=0 rpl_semi_sync_slave_enabled=1 Sat Oct1011:33:112020 - [info] done. Sat Oct1011:33:112020 - [warning] shutdown_script isnot set. Skipping explicit shutting down of the dead master. Sat Oct1011:33:112020 - [info] * Phase 2: Dead MasterShutdown Phase completed. Sat Oct1011:33:112020 - [info] Sat Oct1011:33:112020 - [info] * Phase 3: MasterRecovery Phase.. Sat Oct1011:33:112020 - [info] Sat Oct1011:33:112020 - [info] * Phase 3.1: Getting Latest Slaves Phase.. Sat Oct1011:33:112020 - [info] Sat Oct1011:33:112020 - [info] The latest binarylogfile/positiononall slaves is mysql-bin.000012:50590 Sat Oct1011:33:112020 - [info] Retrieved Gtid Set: 44a4ea53-fcad-11ea-bd16-0050563b7b42:20532-20745 Sat Oct1011:33:112020 - [info] Latest slaves (Slaves that received relay log files to the latest): Sat Oct1011:33:112020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major versionbetween slaves) log-bin:enabled Sat Oct1011:33:112020 - [info] GTID ON Sat Oct1011:33:112020 - [info] Replicating from172.16.120.10(172.16.120.10:3358) Sat Oct1011:33:112020 - [info] Primary candidate for the newMaster (candidate_master isset) Sat Oct1011:33:112020 - [info] The oldest binarylogfile/positiononall slaves is mysql-bin.000012:18307 Sat Oct1011:33:112020 - [info] Retrieved Gtid Set: 44a4ea53-fcad-11ea-bd16-0050563b7b42:20532-20608 Sat Oct1011:33:112020 - [info] Oldest slaves: Sat Oct1011:33:112020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major versionbetween slaves) log-bin:enabled Sat Oct1011:33:112020 - [info] GTID ON Sat Oct1011:33:112020 - [info] Replicating from172.16.120.10(172.16.120.10:3358) Sat Oct1011:33:112020 - [info] Primary candidate for the newMaster (candidate_master isset) Sat Oct1011:33:112020 - [info] Sat Oct1011:33:112020 - [info] * Phase 3.3: Determining NewMaster Phase.. Sat Oct1011:33:112020 - [info] Sat Oct1011:33:112020 - [info] Searching newmasterfrom slaves.. Sat Oct1011:33:112020 - [info] Candidate masters from the configuration file: Sat Oct1011:33:112020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major versionbetween slaves) log-bin:enabled Sat Oct1011:33:112020 - [info] GTID ON Sat Oct1011:33:112020 - [info] Replicating from172.16.120.10(172.16.120.10:3358) Sat Oct1011:33:112020 - [info] Primary candidate for the newMaster (candidate_master isset) Sat Oct1011:33:112020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major versionbetween slaves) log-bin:enabled Sat Oct1011:33:112020 - [info] GTID ON Sat Oct1011:33:112020 - [info] Replicating from172.16.120.10(172.16.120.10:3358) Sat Oct1011:33:112020 - [info] Primary candidate for the newMaster (candidate_master isset) Sat Oct1011:33:112020 - [info] Non-candidate masters: Sat Oct1011:33:112020 - [info] Searching from candidate_master slaves which have received the latest relay log events.. Sat Oct1011:33:112020 - [info] Newmasteris172.16.120.12(172.16.120.12:3358) Sat Oct1011:33:112020 - [info] Startingmaster failover.. Sat Oct1011:33:112020 - [info] From: 172.16.120.10(172.16.120.10:3358) (currentmaster) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358)
To: 172.16.120.12(172.16.120.12:3358) (newmaster) +--172.16.120.11(172.16.120.11:3358) Sat Oct1011:33:112020 - [info] Sat Oct1011:33:112020 - [info] * Phase 3.3: NewMasterRecovery Phase.. Sat Oct1011:33:112020 - [info] Sat Oct1011:33:112020 - [info] Waiting alllogsto be applied.. Sat Oct1011:33:112020 - [info] done. Sat Oct1011:33:112020 - [info] Getting newmaster's binlog name and position.. Sat Oct 10 11:33:11 2020 - [info] mysql-bin.000007:3232182 Sat Oct 10 11:33:11 2020 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='172.16.120.12', MASTER_PORT=3358, MASTER_AUTO_POSITION=1, MASTER_USER='repler', MASTER_PASSWORD='xxx'; Sat Oct 10 11:33:11 2020 - [info] Master Recovery succeeded. File:Pos:Exec_Gtid_Set: mysql-bin.000007, 3232182, 44a4ea53-fcad-11ea-bd16-0050563b7b42:1-20745, 45d1f02a-fcad-11ea-8a44-0050562f2198:1-27 Sat Oct 10 11:33:11 2020 - [info] Executing master IP activate script: Sat Oct 10 11:33:11 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=start --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 --new_master_host=172.16.120.12 --new_master_ip=172.16.120.12 --new_master_port=3358 --new_master_user='mha' --new_master_password=xxx Enabling the VIP - 172.16.120.128 on the new master - 172.16.120.12 RTNETLINK answers: File exists Fake!!! 新主库 rpl_semi_sync_master_enabled=1 rpl_semi_sync_slave_enabled=0 Set read_only=0 on the new master. Creating app user on the new master.. Sat Oct 10 11:33:11 2020 - [info] OK. Sat Oct 10 11:33:11 2020 - [info] ** Finished master recovery successfully. Sat Oct 10 11:33:11 2020 - [info] * Phase 3: Master Recovery Phase completed. Sat Oct 10 11:33:11 2020 - [info] Sat Oct 10 11:33:11 2020 - [info] * Phase 4: Slaves Recovery Phase.. Sat Oct 10 11:33:11 2020 - [info] Sat Oct 10 11:33:11 2020 - [info] Sat Oct 10 11:33:11 2020 - [info] * Phase 4.1: Starting Slaves in parallel.. Sat Oct 10 11:33:11 2020 - [info] Sat Oct 10 11:33:11 2020 - [info] -- Slave recovery on host 172.16.120.11(172.16.120.11:3358) started, pid: 78319. Check tmp log /masterha/cls_new//172.16.120.11_3358_20201010113310.log if it takes time.. Sat Oct 10 11:33:12 2020 - [info] Sat Oct 10 11:33:12 2020 - [info] Log messages from 172.16.120.11 ... Sat Oct 10 11:33:12 2020 - [info] Sat Oct 10 11:33:11 2020 - [info] Resetting slave 172.16.120.11(172.16.120.11:3358) and starting replication from the new master 172.16.120.12(172.16.120.12:3358).. Sat Oct 10 11:33:11 2020 - [info] Executed CHANGE MASTER. Sat Oct 10 11:33:11 2020 - [info] Slave started. Sat Oct 10 11:33:12 2020 - [info] gtid_wait(44a4ea53-fcad-11ea-bd16-0050563b7b42:1-20745, 45d1f02a-fcad-11ea-8a44-0050562f2198:1-27) completed on 172.16.120.11(172.16.120.11:3358). Executed 2 events. Sat Oct 10 11:33:12 2020 - [info] End of log messages from 172.16.120.11. Sat Oct 10 11:33:12 2020 - [info] -- Slave on host 172.16.120.11(172.16.120.11:3358) started. Sat Oct 10 11:33:12 2020 - [info] All new slave servers recovered successfully. Sat Oct 10 11:33:12 2020 - [info] Sat Oct 10 11:33:12 2020 - [info] * Phase 5: New master cleanup phase.. Sat Oct 10 11:33:12 2020 - [info] Sat Oct 10 11:33:12 2020 - [info] Resetting slave info on the new master.. Sat Oct 10 11:33:13 2020 - [info] 172.16.120.12: Resetting slave info succeeded. Sat Oct 10 11:33:13 2020 - [info] Master failover to 172.16.120.12(172.16.120.12:3358) completed successfully. Sat Oct 10 11:33:13 2020 - [info] ----- Failover Report ----- cls_new: MySQL Master failover 172.16.120.10(172.16.120.10:3358) to 172.16.120.12(172.16.120.12:3358) succeeded Master 172.16.120.10(172.16.120.10:3358) is down! Check MHA Manager logs at centos-4:/masterha/cls_new/manager.log for details. Started automated(non-interactive) failover. Invalidated master IP address on 172.16.120.10(172.16.120.10:3358) Selected 172.16.120.12(172.16.120.12:3358) as a new master. 172.16.120.12(172.16.120.12:3358): OK: Applying all logs succeeded. 172.16.120.12(172.16.120.12:3358): OK: Activated master IP address. 172.16.120.11(172.16.120.11:3358): OK: Slave started, replicating from 172.16.120.12(172.16.120.12:3358) 172.16.120.12(172.16.120.12:3358): Resetting slave info succeeded. Master failover to 172.16.120.12(172.16.120.12:3358) completed successfully. Sat Oct 10 11:33:13 2020 - [info] Sending mail..
Sat Oct 10 11:58:40 2020 - [info] MHA::MasterMonitor version 0.58. Sat Oct 10 11:58:41 2020 - [info] GTID failover mode = 1 Sat Oct 10 11:58:41 2020 - [info] Dead Servers: Sat Oct 10 11:58:41 2020 - [info] Alive Servers: Sat Oct 10 11:58:41 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:58:41 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 11:58:41 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 11:58:41 2020 - [info] Alive Slaves: Sat Oct 10 11:58:41 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:58:41 2020 - [info] GTID ON Sat Oct 10 11:58:41 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:58:41 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:58:41 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:58:41 2020 - [info] GTID ON Sat Oct 10 11:58:41 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:58:41 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:58:41 2020 - [info] Current Alive Master: 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:58:41 2020 - [info] Checking slave configurations.. Sat Oct 10 11:58:41 2020 - [info] Checking replication filtering settings.. Sat Oct 10 11:58:41 2020 - [info] binlog_do_db= , binlog_ignore_db= Sat Oct 10 11:58:41 2020 - [info] Replication filtering check ok. Sat Oct 10 11:58:41 2020 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking. Sat Oct 10 11:58:41 2020 - [info] Checking SSH publickey authentication settings on the current master.. Sat Oct 10 11:58:41 2020 - [info] HealthCheck: SSH to 172.16.120.10 is reachable. Sat Oct 10 11:58:41 2020 - [info] 172.16.120.10(172.16.120.10:3358) (current master) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358)
Sat Oct 10 11:58:41 2020 - [info] Checking master_ip_failover_script status: Sat Oct 10 11:58:41 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=status --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 Sat Oct 10 11:58:41 2020 - [info] OK. Sat Oct 10 11:58:41 2020 - [warning] shutdown_script is not defined. Sat Oct 10 11:58:41 2020 - [info] Set master ping interval 3 seconds. Sat Oct 10 11:58:41 2020 - [info] Set secondary check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 Sat Oct 10 11:58:41 2020 - [info] Starting ping health check on 172.16.120.10(172.16.120.10:3358).. Sat Oct 10 11:58:41 2020 - [info] Ping(CONNECT) succeeded, waiting until MySQL doesn't respond..
调整master防火墙, 禁止slave-1访问
1 2 3 4 5 6 7
IPTABLES="/sbin/iptables" $IPTABLES -F $IPTABLES -A INPUT -p icmp --icmp-type any -j ACCEPT $IPTABLES -A INPUT -p tcp -s 172.16.120.10 -j ACCEPT $IPTABLES -A INPUT -p tcp -s 172.16.120.13 -j ACCEPT $IPTABLES -A INPUT -p tcp -s 172.16.120.12 -j ACCEPT $IPTABLES -A INPUT -p tcp --syn -j DROP
Sat Oct 10 12:11:18 2020 - [warning] Got error on MySQL connect ping: DBI connect(';host=172.16.120.10;port=3358;mysql_connect_timeout=1','mha',...) failed: Can't connect to MySQL server on '172.16.120.10' (111) at /usr/local/share/perl5/MHA/HealthCheck.pm line 98. 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 12:11:18 2020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12--user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=CONNECT Sat Oct1012:11:182020 - [info] Executing SSH check script: exit0 Sat Oct1012:11:192020 - [info] HealthCheck: SSH to172.16.120.10is reachable. Sat Oct1012:11:212020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 12:11:21 2020 - [warning] Connection failed 2 time(s).. Monitoring server 172.16.120.11 is reachable, Master is not reachable from 172.16.120.11. OK. Monitoring server 172.16.120.12 is reachable, Master is not reachable from 172.16.120.12. OK. Sat Oct 10 12:11:24 2020 - [info] Master is not reachable from all other monitoring servers. Failover should start. Sat Oct 10 12:11:24 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (111)) Sat Oct1012:11:242020 - [warning] Connectionfailed3time(s).. Sat Oct1012:11:272020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 12:11:27 2020 - [warning] Connection failed 4 time(s).. Sat Oct 10 12:11:27 2020 - [warning] Master is not reachable from health checker! Sat Oct 10 12:11:27 2020 - [warning] Master 172.16.120.10(172.16.120.10:3358) is not reachable! Sat Oct 10 12:11:27 2020 - [warning] SSH is reachable. Sat Oct 10 12:11:27 2020 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha/conf/masterha_default.cnf and /etc/masterha/conf/cls_new.cnf again, and trying to connect to all servers to check server status.. Sat Oct 10 12:11:27 2020 - [info] Reading default configuration from /etc/masterha/conf/masterha_default.cnf.. Sat Oct 10 12:11:27 2020 - [info] Reading application default configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 12:11:27 2020 - [info] Reading server configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 12:11:28 2020 - [info] GTID failover mode = 1 Sat Oct 10 12:11:28 2020 - [info] Dead Servers: Sat Oct 10 12:11:28 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:11:28 2020 - [info] Alive Servers: Sat Oct 10 12:11:28 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 12:11:28 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 12:11:28 2020 - [info] Alive Slaves: Sat Oct 10 12:11:28 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:11:28 2020 - [info] GTID ON Sat Oct 10 12:11:28 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:11:28 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:11:28 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:11:28 2020 - [info] GTID ON Sat Oct 10 12:11:28 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:11:28 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:11:28 2020 - [info] Checking slave configurations.. Sat Oct 10 12:11:28 2020 - [info] Checking replication filtering settings.. Sat Oct 10 12:11:28 2020 - [info] Replication filtering check ok. Sat Oct 10 12:11:28 2020 - [info] Master is down! Sat Oct 10 12:11:28 2020 - [info] Terminating monitoring script. Sat Oct 10 12:11:28 2020 - [info] Got exit code 20 (Master dead). Sat Oct 10 12:11:28 2020 - [info] MHA::MasterFailover version 0.58. Sat Oct 10 12:11:28 2020 - [info] Starting master failover. Sat Oct 10 12:11:28 2020 - [info] Sat Oct 10 12:11:28 2020 - [info] * Phase 1: Configuration Check Phase.. Sat Oct 10 12:11:28 2020 - [info] Sat Oct 10 12:11:29 2020 - [info] GTID failover mode = 1 Sat Oct 10 12:11:29 2020 - [info] Dead Servers: Sat Oct 10 12:11:29 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:11:29 2020 - [info] Checking master reachability via MySQL(double check)... Sat Oct 10 12:11:29 2020 - [info] ok. Sat Oct 10 12:11:29 2020 - [info] Alive Servers: Sat Oct 10 12:11:29 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 12:11:29 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 12:11:29 2020 - [info] Alive Slaves: Sat Oct 10 12:11:29 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:11:29 2020 - [info] GTID ON Sat Oct 10 12:11:29 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:11:29 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:11:29 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:11:29 2020 - [info] GTID ON Sat Oct 10 12:11:29 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:11:29 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:11:29 2020 - [info] Starting GTID based failover. Sat Oct 10 12:11:29 2020 - [info] Sat Oct 10 12:11:29 2020 - [info] ** Phase 1: Configuration Check Phase completed. Sat Oct 10 12:11:29 2020 - [info] Sat Oct 10 12:11:29 2020 - [info] * Phase 2: Dead Master Shutdown Phase.. Sat Oct 10 12:11:29 2020 - [info] Sat Oct 10 12:11:29 2020 - [info] Forcing shutdown so that applications never connect to the current master.. Sat Oct 10 12:11:29 2020 - [info] Executing master IP deactivation script: Sat Oct 10 12:11:29 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 --command=stopssh --ssh_user=root Disabling the VIP on old master: 172.16.120.10 Fake!!! 原主库 rpl_semi_sync_master_enabled=0 rpl_semi_sync_slave_enabled=1 Sat Oct 10 12:11:29 2020 - [info] done. Sat Oct 10 12:11:29 2020 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master. Sat Oct 10 12:11:29 2020 - [info] * Phase 2: Dead Master Shutdown Phase completed. Sat Oct 10 12:11:29 2020 - [info] Sat Oct 10 12:11:29 2020 - [info] * Phase 3: Master Recovery Phase.. Sat Oct 10 12:11:29 2020 - [info] Sat Oct 10 12:11:29 2020 - [info] * Phase 3.1: Getting Latest Slaves Phase.. Sat Oct 10 12:11:29 2020 - [info] Sat Oct 10 12:11:29 2020 - [info] The latest binary log file/position on all slaves is mysql-bin.000014:1070 Sat Oct 10 12:11:29 2020 - [info] Retrieved Gtid Set: 44a4ea53-fcad-11ea-bd16-0050563b7b42:20747-20749 Sat Oct 10 12:11:29 2020 - [info] Latest slaves (Slaves that received relay log files to the latest): Sat Oct 10 12:11:29 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:11:29 2020 - [info] GTID ON Sat Oct 10 12:11:29 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:11:29 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:11:29 2020 - [info] The oldest binary log file/position on all slaves is mysql-bin.000014:778 Sat Oct 10 12:11:29 2020 - [info] Retrieved Gtid Set: 44a4ea53-fcad-11ea-bd16-0050563b7b42:20747-20748 Sat Oct 10 12:11:29 2020 - [info] Oldest slaves: Sat Oct 10 12:11:29 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:11:29 2020 - [info] GTID ON Sat Oct 10 12:11:29 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:11:29 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:11:29 2020 - [info] Sat Oct 10 12:11:29 2020 - [info] * Phase 3.3: Determining New Master Phase.. Sat Oct 10 12:11:29 2020 - [info] Sat Oct 10 12:11:29 2020 - [info] Searching new master from slaves.. Sat Oct 10 12:11:29 2020 - [info] Candidate masters from the configuration file: Sat Oct 10 12:11:29 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:11:29 2020 - [info] GTID ON Sat Oct 10 12:11:29 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:11:29 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:11:29 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:11:29 2020 - [info] GTID ON Sat Oct 10 12:11:29 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:11:29 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:11:29 2020 - [info] Non-candidate masters: Sat Oct 10 12:11:29 2020 - [info] Searching from candidate_master slaves which have received the latest relay log events.. Sat Oct 10 12:11:29 2020 - [info] New master is 172.16.120.12(172.16.120.12:3358) Sat Oct 10 12:11:29 2020 - [info] Starting master failover.. Sat Oct 10 12:11:29 2020 - [info] From: 172.16.120.10(172.16.120.10:3358) (current master) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358) To: 172.16.120.12(172.16.120.12:3358) (new master) +--172.16.120.11(172.16.120.11:3358) Sat Oct 10 12:11:29 2020 - [info] Sat Oct 10 12:11:29 2020 - [info] * Phase 3.3: New Master Recovery Phase.. Sat Oct 10 12:11:29 2020 - [info] Sat Oct 10 12:11:29 2020 - [info] Waiting all logs to be applied.. Sat Oct 10 12:11:29 2020 - [info] done. Sat Oct 10 12:11:29 2020 - [info] Getting new master's binlognameand position.. Sat Oct1012:11:292020 - [info] mysql-bin.000007:3233250 Sat Oct1012:11:292020 - [info] All other slaves should startreplicationfrom here. Statement should be: CHANGEMASTERTO MASTER_HOST='172.16.120.12', MASTER_PORT=3358, MASTER_AUTO_POSITION=1, MASTER_USER='repler', MASTER_PASSWORD='xxx'; Sat Oct 10 12:11:29 2020 - [info] Master Recovery succeeded. File:Pos:Exec_Gtid_Set: mysql-bin.000007, 3233250, 44a4ea53-fcad-11ea-bd16-0050563b7b42:1-20749, 45d1f02a-fcad-11ea-8a44-0050562f2198:1-27 Sat Oct 10 12:11:29 2020 - [info] Executing master IP activate script: Sat Oct 10 12:11:29 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=start --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 --new_master_host=172.16.120.12 --new_master_ip=172.16.120.12 --new_master_port=3358 --new_master_user='mha' --new_master_password=xxx Enabling the VIP - 172.16.120.128 on the new master - 172.16.120.12 RTNETLINK answers: File exists Fake!!! 新主库 rpl_semi_sync_master_enabled=1 rpl_semi_sync_slave_enabled=0 Set read_only=0on the new master. Creating app useron the new master.. Sat Oct1012:11:302020 - [info] OK. Sat Oct1012:11:302020 - [info] ** Finished masterrecovery successfully. Sat Oct1012:11:302020 - [info] * Phase 3: MasterRecovery Phase completed. Sat Oct1012:11:302020 - [info] Sat Oct1012:11:302020 - [info] * Phase 4: Slaves Recovery Phase.. Sat Oct1012:11:302020 - [info] Sat Oct1012:11:302020 - [info] Sat Oct1012:11:302020 - [info] * Phase 4.1: Starting Slaves in parallel.. Sat Oct1012:11:302020 - [info] Sat Oct1012:11:302020 - [info] -- Slave recovery on host 172.16.120.11(172.16.120.11:3358) started, pid: 81557. Check tmp log /masterha/cls_new//172.16.120.11_3358_20201010121128.log if it takes time.. Sat Oct1012:11:312020 - [info] Sat Oct1012:11:312020 - [info] Log messages from172.16.120.11 ... Sat Oct1012:11:312020 - [info] Sat Oct1012:11:302020 - [info] Resetting slave172.16.120.11(172.16.120.11:3358) andstartingreplicationfrom the newmaster172.16.120.12(172.16.120.12:3358).. Sat Oct1012:11:302020 - [info] Executed CHANGE MASTER. Sat Oct1012:11:302020 - [info] Slave started. Sat Oct1012:11:302020 - [info] gtid_wait(44a4ea53-fcad-11ea-bd16-0050563b7b42:1-20749, 45d1f02a-fcad-11ea-8a44-0050562f2198:1-27) completed on172.16.120.11(172.16.120.11:3358). Executed 2 events. Sat Oct1012:11:312020 - [info] Endoflog messages from172.16.120.11. Sat Oct1012:11:312020 - [info] -- Slave on host 172.16.120.11(172.16.120.11:3358) started. Sat Oct1012:11:312020 - [info] Allnewslave servers recovered successfully. Sat Oct1012:11:312020 - [info] Sat Oct1012:11:312020 - [info] * Phase 5: Newmastercleanup phase.. Sat Oct1012:11:312020 - [info] Sat Oct1012:11:312020 - [info] Resetting slave info on the new master.. Sat Oct1012:11:312020 - [info] 172.16.120.12: Resetting slave info succeeded. Sat Oct1012:11:312020 - [info] Masterfailoverto172.16.120.12(172.16.120.12:3358) completed successfully. Sat Oct1012:11:312020 - [info]
----- Failover Report -----
cls_new: MySQL Masterfailover172.16.120.10(172.16.120.10:3358) to172.16.120.12(172.16.120.12:3358) succeeded
Master172.16.120.10(172.16.120.10:3358) is down!
Check MHA Manager logsat centos-4:/masterha/cls_new/manager.log for details.
Started automated(non-interactive) failover. Invalidated master IP address on172.16.120.10(172.16.120.10:3358) Selected 172.16.120.12(172.16.120.12:3358) as a new master. 172.16.120.12(172.16.120.12:3358): OK: Applying alllogs succeeded. 172.16.120.12(172.16.120.12:3358): OK: Activated master IP address. 172.16.120.11(172.16.120.11:3358): OK: Slave started, replicating from172.16.120.12(172.16.120.12:3358) 172.16.120.12(172.16.120.12:3358): Resetting slave info succeeded. Masterfailoverto172.16.120.12(172.16.120.12:3358) completed successfully. Sat Oct1012:11:312020 - [info] Sending mail..
Sat Oct 10 12:14:59 2020 - [info] MHA::MasterMonitor version 0.58. Sat Oct 10 12:15:00 2020 - [info] GTID failover mode = 1 Sat Oct 10 12:15:00 2020 - [info] Dead Servers: Sat Oct 10 12:15:00 2020 - [info] Alive Servers: Sat Oct 10 12:15:00 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:15:00 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 12:15:00 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 12:15:00 2020 - [info] Alive Slaves: Sat Oct 10 12:15:00 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:15:00 2020 - [info] GTID ON Sat Oct 10 12:15:00 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:15:00 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:15:00 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:15:00 2020 - [info] GTID ON Sat Oct 10 12:15:00 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:15:00 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:15:00 2020 - [info] Current Alive Master: 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:15:00 2020 - [info] Checking slave configurations.. Sat Oct 10 12:15:00 2020 - [info] Checking replication filtering settings.. Sat Oct 10 12:15:00 2020 - [info] binlog_do_db= , binlog_ignore_db= Sat Oct 10 12:15:00 2020 - [info] Replication filtering check ok. Sat Oct 10 12:15:00 2020 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking. Sat Oct 10 12:15:00 2020 - [info] Checking SSH publickey authentication settings on the current master.. Sat Oct 10 12:15:00 2020 - [info] HealthCheck: SSH to 172.16.120.10 is reachable. Sat Oct 10 12:15:00 2020 - [info] 172.16.120.10(172.16.120.10:3358) (current master) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358)
Sat Oct 10 12:15:00 2020 - [info] Checking master_ip_failover_script status: Sat Oct 10 12:15:00 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=status --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 Sat Oct 10 12:15:00 2020 - [info] OK. Sat Oct 10 12:15:00 2020 - [warning] shutdown_script is not defined. Sat Oct 10 12:15:00 2020 - [info] Set master ping interval 3 seconds. Sat Oct 10 12:15:00 2020 - [info] Set secondary check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 Sat Oct 10 12:15:00 2020 - [info] Starting ping health check on 172.16.120.10(172.16.120.10:3358).. Sat Oct 10 12:15:01 2020 - [info] Ping(INSERT) succeeded, waiting until MySQL doesn't respond..
调整master防火墙, 禁止slave-1访问
1 2 3 4 5 6 7
IPTABLES="/sbin/iptables" $IPTABLES -F $IPTABLES -A INPUT -p icmp --icmp-type any -j ACCEPT $IPTABLES -A INPUT -p tcp -s 172.16.120.10 -j ACCEPT $IPTABLES -A INPUT -p tcp -s 172.16.120.13 -j ACCEPT $IPTABLES -A INPUT -p tcp -s 172.16.120.12 -j ACCEPT $IPTABLES -A INPUT -p tcp --syn -j DROP
Sat Oct 10 12:22:43 2020 - [warning] Got error on MySQL insert ping: 2006 (MySQL server has gone away) Sat Oct1012:22:432020 - [info] Executing SSH check script: exit0 Sat Oct1012:22:432020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12--user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=INSERT Sat Oct1012:22:432020 - [info] HealthCheck: SSH to172.16.120.10is reachable. Sat Oct1012:22:462020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 12:22:46 2020 - [warning] Connection failed 2 time(s).. Monitoring server 172.16.120.11 is reachable, Master is not reachable from 172.16.120.11. OK. Monitoring server 172.16.120.12 is reachable, Master is not reachable from 172.16.120.12. OK. Sat Oct 10 12:22:48 2020 - [info] Master is not reachable from all other monitoring servers. Failover should start. Sat Oct 10 12:22:49 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (111)) Sat Oct1012:22:492020 - [warning] Connectionfailed3time(s).. Sat Oct1012:22:522020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 12:22:52 2020 - [warning] Connection failed 4 time(s).. Sat Oct 10 12:22:52 2020 - [warning] Master is not reachable from health checker! Sat Oct 10 12:22:52 2020 - [warning] Master 172.16.120.10(172.16.120.10:3358) is not reachable! Sat Oct 10 12:22:52 2020 - [warning] SSH is reachable. Sat Oct 10 12:22:52 2020 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha/conf/masterha_default.cnf and /etc/masterha/conf/cls_new.cnf again, and trying to connect to all servers to check server status.. Sat Oct 10 12:22:52 2020 - [info] Reading default configuration from /etc/masterha/conf/masterha_default.cnf.. Sat Oct 10 12:22:52 2020 - [info] Reading application default configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 12:22:52 2020 - [info] Reading server configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 12:22:53 2020 - [info] GTID failover mode = 1 Sat Oct 10 12:22:53 2020 - [info] Dead Servers: Sat Oct 10 12:22:53 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:22:53 2020 - [info] Alive Servers: Sat Oct 10 12:22:53 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 12:22:53 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 12:22:53 2020 - [info] Alive Slaves: Sat Oct 10 12:22:53 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:22:53 2020 - [info] GTID ON Sat Oct 10 12:22:53 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:22:53 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:22:53 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:22:53 2020 - [info] GTID ON Sat Oct 10 12:22:53 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:22:53 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:22:53 2020 - [info] Checking slave configurations.. Sat Oct 10 12:22:53 2020 - [info] Checking replication filtering settings.. Sat Oct 10 12:22:53 2020 - [info] Replication filtering check ok. Sat Oct 10 12:22:53 2020 - [info] Master is down! Sat Oct 10 12:22:53 2020 - [info] Terminating monitoring script. Sat Oct 10 12:22:53 2020 - [info] Got exit code 20 (Master dead). Sat Oct 10 12:22:53 2020 - [info] MHA::MasterFailover version 0.58. Sat Oct 10 12:22:53 2020 - [info] Starting master failover. Sat Oct 10 12:22:53 2020 - [info] Sat Oct 10 12:22:53 2020 - [info] * Phase 1: Configuration Check Phase.. Sat Oct 10 12:22:53 2020 - [info] Sat Oct 10 12:22:54 2020 - [info] GTID failover mode = 1 Sat Oct 10 12:22:54 2020 - [info] Dead Servers: Sat Oct 10 12:22:54 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:22:54 2020 - [info] Alive Servers: Sat Oct 10 12:22:54 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 12:22:54 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 12:22:54 2020 - [info] Alive Slaves: Sat Oct 10 12:22:54 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:22:54 2020 - [info] GTID ON Sat Oct 10 12:22:54 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:22:54 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:22:54 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:22:54 2020 - [info] GTID ON Sat Oct 10 12:22:54 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:22:54 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:22:54 2020 - [info] Starting GTID based failover. Sat Oct 10 12:22:54 2020 - [info] Sat Oct 10 12:22:54 2020 - [info] ** Phase 1: Configuration Check Phase completed. Sat Oct 10 12:22:54 2020 - [info] Sat Oct 10 12:22:54 2020 - [info] * Phase 2: Dead Master Shutdown Phase.. Sat Oct 10 12:22:54 2020 - [info] Sat Oct 10 12:22:54 2020 - [info] Forcing shutdown so that applications never connect to the current master.. Sat Oct 10 12:22:54 2020 - [info] Executing master IP deactivation script: Sat Oct 10 12:22:54 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 --command=stopssh --ssh_user=root Disabling the VIP on old master: 172.16.120.10 RTNETLINK answers: Cannot assign requested address Fake!!! 原主库 rpl_semi_sync_master_enabled=0 rpl_semi_sync_slave_enabled=1 Sat Oct 10 12:22:54 2020 - [info] done. Sat Oct 10 12:22:54 2020 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master. Sat Oct 10 12:22:54 2020 - [info] * Phase 2: Dead Master Shutdown Phase completed. Sat Oct 10 12:22:54 2020 - [info] Sat Oct 10 12:22:54 2020 - [info] * Phase 3: Master Recovery Phase.. Sat Oct 10 12:22:54 2020 - [info] Sat Oct 10 12:22:54 2020 - [info] * Phase 3.1: Getting Latest Slaves Phase.. Sat Oct 10 12:22:54 2020 - [info] Sat Oct 10 12:22:54 2020 - [info] The latest binary log file/position on all slaves is mysql-bin.000015:110146 Sat Oct 10 12:22:54 2020 - [info] Retrieved Gtid Set: 44a4ea53-fcad-11ea-bd16-0050563b7b42:20750-21216 Sat Oct 10 12:22:54 2020 - [info] Latest slaves (Slaves that received relay log files to the latest): Sat Oct 10 12:22:54 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:22:54 2020 - [info] GTID ON Sat Oct 10 12:22:54 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:22:54 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:22:54 2020 - [info] The oldest binary log file/position on all slaves is mysql-bin.000015:85472 Sat Oct 10 12:22:54 2020 - [info] Retrieved Gtid Set: 44a4ea53-fcad-11ea-bd16-0050563b7b42:20750-21111 Sat Oct 10 12:22:54 2020 - [info] Oldest slaves: Sat Oct 10 12:22:54 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:22:54 2020 - [info] GTID ON Sat Oct 10 12:22:54 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:22:54 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:22:54 2020 - [info] Sat Oct 10 12:22:54 2020 - [info] * Phase 3.3: Determining New Master Phase.. Sat Oct 10 12:22:54 2020 - [info] Sat Oct 10 12:22:54 2020 - [info] Searching new master from slaves.. Sat Oct 10 12:22:54 2020 - [info] Candidate masters from the configuration file: Sat Oct 10 12:22:54 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:22:54 2020 - [info] GTID ON Sat Oct 10 12:22:54 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:22:54 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:22:54 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 12:22:54 2020 - [info] GTID ON Sat Oct 10 12:22:54 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 12:22:54 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 12:22:54 2020 - [info] Non-candidate masters: Sat Oct 10 12:22:54 2020 - [info] Searching from candidate_master slaves which have received the latest relay log events.. Sat Oct 10 12:22:54 2020 - [info] New master is 172.16.120.12(172.16.120.12:3358) Sat Oct 10 12:22:54 2020 - [info] Starting master failover.. Sat Oct 10 12:22:54 2020 - [info] From: 172.16.120.10(172.16.120.10:3358) (current master) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358) To: 172.16.120.12(172.16.120.12:3358) (new master) +--172.16.120.11(172.16.120.11:3358) Sat Oct 10 12:22:54 2020 - [info] Sat Oct 10 12:22:54 2020 - [info] * Phase 3.3: New Master Recovery Phase.. Sat Oct 10 12:22:54 2020 - [info] Sat Oct 10 12:22:54 2020 - [info] Waiting all logs to be applied.. Sat Oct 10 12:22:54 2020 - [info] done. Sat Oct 10 12:22:54 2020 - [info] Getting new master's binlognameand position.. Sat Oct1012:22:542020 - [info] mysql-bin.000007:3342407 Sat Oct1012:22:542020 - [info] All other slaves should startreplicationfrom here. Statement should be: CHANGEMASTERTO MASTER_HOST='172.16.120.12', MASTER_PORT=3358, MASTER_AUTO_POSITION=1, MASTER_USER='repler', MASTER_PASSWORD='xxx'; Sat Oct 10 12:22:54 2020 - [info] Master Recovery succeeded. File:Pos:Exec_Gtid_Set: mysql-bin.000007, 3342407, 44a4ea53-fcad-11ea-bd16-0050563b7b42:1-21216, 45d1f02a-fcad-11ea-8a44-0050562f2198:1-27 Sat Oct 10 12:22:54 2020 - [info] Executing master IP activate script: Sat Oct 10 12:22:54 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=start --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 --new_master_host=172.16.120.12 --new_master_ip=172.16.120.12 --new_master_port=3358 --new_master_user='mha' --new_master_password=xxx Enabling the VIP - 172.16.120.128 on the new master - 172.16.120.12 RTNETLINK answers: File exists Fake!!! 新主库 rpl_semi_sync_master_enabled=1 rpl_semi_sync_slave_enabled=0 Set read_only=0on the new master. Creating app useron the new master.. Sat Oct1012:22:542020 - [info] OK. Sat Oct1012:22:542020 - [info] ** Finished masterrecovery successfully. Sat Oct1012:22:542020 - [info] * Phase 3: MasterRecovery Phase completed. Sat Oct1012:22:542020 - [info] Sat Oct1012:22:542020 - [info] * Phase 4: Slaves Recovery Phase.. Sat Oct1012:22:542020 - [info] Sat Oct1012:22:542020 - [info] Sat Oct1012:22:542020 - [info] * Phase 4.1: Starting Slaves in parallel.. Sat Oct1012:22:542020 - [info] Sat Oct1012:22:542020 - [info] -- Slave recovery on host 172.16.120.11(172.16.120.11:3358) started, pid: 82756. Check tmp log /masterha/cls_new//172.16.120.11_3358_20201010122253.log if it takes time.. Sat Oct1012:22:552020 - [info] Sat Oct1012:22:552020 - [info] Log messages from172.16.120.11 ... Sat Oct1012:22:552020 - [info] Sat Oct1012:22:542020 - [info] Resetting slave172.16.120.11(172.16.120.11:3358) andstartingreplicationfrom the newmaster172.16.120.12(172.16.120.12:3358).. Sat Oct1012:22:542020 - [info] Executed CHANGE MASTER. Sat Oct1012:22:542020 - [info] Slave started. Sat Oct1012:22:552020 - [info] gtid_wait(44a4ea53-fcad-11ea-bd16-0050563b7b42:1-21216, 45d1f02a-fcad-11ea-8a44-0050562f2198:1-27) completed on172.16.120.11(172.16.120.11:3358). Executed 2 events. Sat Oct1012:22:552020 - [info] Endoflog messages from172.16.120.11. Sat Oct1012:22:552020 - [info] -- Slave on host 172.16.120.11(172.16.120.11:3358) started. Sat Oct1012:22:552020 - [info] Allnewslave servers recovered successfully. Sat Oct1012:22:552020 - [info] Sat Oct1012:22:552020 - [info] * Phase 5: Newmastercleanup phase.. Sat Oct1012:22:552020 - [info] Sat Oct1012:22:552020 - [info] Resetting slave info on the new master.. Sat Oct1012:22:552020 - [info] 172.16.120.12: Resetting slave info succeeded. Sat Oct1012:22:552020 - [info] Masterfailoverto172.16.120.12(172.16.120.12:3358) completed successfully. Sat Oct1012:22:552020 - [info]
----- Failover Report -----
cls_new: MySQL Masterfailover172.16.120.10(172.16.120.10:3358) to172.16.120.12(172.16.120.12:3358) succeeded
Master172.16.120.10(172.16.120.10:3358) is down!
Check MHA Manager logsat centos-4:/masterha/cls_new/manager.log for details.
Started automated(non-interactive) failover. Invalidated master IP address on172.16.120.10(172.16.120.10:3358) Selected 172.16.120.12(172.16.120.12:3358) as a new master. 172.16.120.12(172.16.120.12:3358): OK: Applying alllogs succeeded. 172.16.120.12(172.16.120.12:3358): OK: Activated master IP address. 172.16.120.11(172.16.120.11:3358): OK: Slave started, replicating from172.16.120.12(172.16.120.12:3358) 172.16.120.12(172.16.120.12:3358): Resetting slave info succeeded. Masterfailoverto172.16.120.12(172.16.120.12:3358) completed successfully. Sat Oct1012:22:552020 - [info] Sending mail..
Sat Oct 10 11:43:34 2020 - [info] MHA::MasterMonitor version 0.58. Sat Oct 10 11:43:35 2020 - [info] GTID failover mode = 1 Sat Oct 10 11:43:35 2020 - [info] Dead Servers: Sat Oct 10 11:43:35 2020 - [info] Alive Servers: Sat Oct 10 11:43:35 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:43:35 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 11:43:35 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 11:43:35 2020 - [info] Alive Slaves: Sat Oct 10 11:43:35 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:43:35 2020 - [info] GTID ON Sat Oct 10 11:43:35 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:43:35 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:43:35 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:43:35 2020 - [info] GTID ON Sat Oct 10 11:43:35 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:43:35 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:43:35 2020 - [info] Current Alive Master: 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:43:35 2020 - [info] Checking slave configurations.. Sat Oct 10 11:43:35 2020 - [info] Checking replication filtering settings.. Sat Oct 10 11:43:35 2020 - [info] binlog_do_db= , binlog_ignore_db= Sat Oct 10 11:43:35 2020 - [info] Replication filtering check ok. Sat Oct 10 11:43:35 2020 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking. Sat Oct 10 11:43:35 2020 - [info] Checking SSH publickey authentication settings on the current master.. Sat Oct 10 11:43:35 2020 - [info] HealthCheck: SSH to 172.16.120.10 is reachable. Sat Oct 10 11:43:35 2020 - [info] 172.16.120.10(172.16.120.10:3358) (current master) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358)
Sat Oct 10 11:43:35 2020 - [info] Checking master_ip_failover_script status: Sat Oct 10 11:43:35 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=status --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 Sat Oct 10 11:43:35 2020 - [info] OK. Sat Oct 10 11:43:35 2020 - [warning] shutdown_script is not defined. Sat Oct 10 11:43:35 2020 - [info] Set master ping interval 3 seconds. Sat Oct 10 11:43:35 2020 - [info] Set secondary check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 Sat Oct 10 11:43:35 2020 - [info] Starting ping health check on 172.16.120.10(172.16.120.10:3358).. Sat Oct 10 11:43:35 2020 - [info] Ping(CONNECT) succeeded, waiting until MySQL doesn't respond..
Sat Oct 10 11:51:18 2020 - [warning] Got error on MySQL connect ping: DBI connect(';host=172.16.120.10;port=3358;mysql_connect_timeout=1','mha',...) failed: Can't connect to MySQL server on '172.16.120.10' (111) at /usr/local/share/perl5/MHA/HealthCheck.pm line 98. 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 11:51:18 2020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12--user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=CONNECT Sat Oct1011:51:182020 - [info] Executing SSH check script: exit0 Sat Oct1011:51:182020 - [info] HealthCheck: SSH to172.16.120.10is reachable. Monitoringserver172.16.120.11is reachable, Masterisnot reachable from172.16.120.11. OK. Monitoringserver172.16.120.12is reachable, Masterisnot reachable from172.16.120.12. OK. Sat Oct1011:51:182020 - [info] Masterisnot reachable fromall other monitoring servers. Failover should start. Sat Oct1011:51:212020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 11:51:21 2020 - [warning] Connection failed 2 time(s).. Sat Oct 10 11:51:24 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (111)) Sat Oct1011:51:242020 - [warning] Connectionfailed3time(s).. Sat Oct1011:51:272020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 11:51:27 2020 - [warning] Connection failed 4 time(s).. Sat Oct 10 11:51:27 2020 - [warning] Master is not reachable from health checker! Sat Oct 10 11:51:27 2020 - [warning] Master 172.16.120.10(172.16.120.10:3358) is not reachable! Sat Oct 10 11:51:27 2020 - [warning] SSH is reachable. Sat Oct 10 11:51:27 2020 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha/conf/masterha_default.cnf and /etc/masterha/conf/cls_new.cnf again, and trying to connect to all servers to check server status.. Sat Oct 10 11:51:27 2020 - [info] Reading default configuration from /etc/masterha/conf/masterha_default.cnf.. Sat Oct 10 11:51:27 2020 - [info] Reading application default configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 11:51:27 2020 - [info] Reading server configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 11:51:28 2020 - [warning] SQL Thread is stopped(no error) on 172.16.120.11(172.16.120.11:3358) Sat Oct 10 11:51:28 2020 - [info] GTID failover mode = 1 Sat Oct 10 11:51:28 2020 - [info] Dead Servers: Sat Oct 10 11:51:28 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:51:28 2020 - [info] Alive Servers: Sat Oct 10 11:51:28 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 11:51:28 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 11:51:28 2020 - [info] Alive Slaves: Sat Oct 10 11:51:28 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:51:28 2020 - [info] GTID ON Sat Oct 10 11:51:28 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:51:28 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:51:28 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:51:28 2020 - [info] GTID ON Sat Oct 10 11:51:28 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:51:28 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:51:28 2020 - [info] Checking slave configurations.. Sat Oct 10 11:51:28 2020 - [info] Checking replication filtering settings.. Sat Oct 10 11:51:28 2020 - [info] Replication filtering check ok. Sat Oct 10 11:51:28 2020 - [info] Master is down! Sat Oct 10 11:51:28 2020 - [info] Terminating monitoring script. Sat Oct 10 11:51:28 2020 - [info] Got exit code 20 (Master dead). Sat Oct 10 11:51:28 2020 - [info] MHA::MasterFailover version 0.58. Sat Oct 10 11:51:28 2020 - [info] Starting master failover. Sat Oct 10 11:51:28 2020 - [info] Sat Oct 10 11:51:28 2020 - [info] * Phase 1: Configuration Check Phase.. Sat Oct 10 11:51:28 2020 - [info] Sat Oct 10 11:51:29 2020 - [warning] SQL Thread is stopped(no error) on 172.16.120.11(172.16.120.11:3358) Sat Oct 10 11:51:29 2020 - [info] GTID failover mode = 1 Sat Oct 10 11:51:29 2020 - [info] Dead Servers: Sat Oct 10 11:51:29 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:51:29 2020 - [info] Checking master reachability via MySQL(double check)... Sat Oct 10 11:51:29 2020 - [info] ok. Sat Oct 10 11:51:29 2020 - [info] Alive Servers: Sat Oct 10 11:51:29 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 11:51:29 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 11:51:29 2020 - [info] Alive Slaves: Sat Oct 10 11:51:29 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:51:29 2020 - [info] GTID ON Sat Oct 10 11:51:29 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:51:29 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:51:29 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:51:29 2020 - [info] GTID ON Sat Oct 10 11:51:29 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:51:29 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:51:29 2020 - [info] Starting SQL thread on 172.16.120.11(172.16.120.11:3358) .. Sat Oct 10 11:51:29 2020 - [info] done. Sat Oct 10 11:51:29 2020 - [info] Starting GTID based failover. Sat Oct 10 11:51:29 2020 - [info] Sat Oct 10 11:51:29 2020 - [info] ** Phase 1: Configuration Check Phase completed. Sat Oct 10 11:51:29 2020 - [info] Sat Oct 10 11:51:29 2020 - [info] * Phase 2: Dead Master Shutdown Phase.. Sat Oct 10 11:51:29 2020 - [info] Sat Oct 10 11:51:29 2020 - [info] Forcing shutdown so that applications never connect to the current master.. Sat Oct 10 11:51:29 2020 - [info] Executing master IP deactivation script: Sat Oct 10 11:51:29 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 --command=stopssh --ssh_user=root Disabling the VIP on old master: 172.16.120.10 RTNETLINK answers: Cannot assign requested address Fake!!! 原主库 rpl_semi_sync_master_enabled=0 rpl_semi_sync_slave_enabled=1 Sat Oct 10 11:51:29 2020 - [info] done. Sat Oct 10 11:51:29 2020 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master. Sat Oct 10 11:51:29 2020 - [info] * Phase 2: Dead Master Shutdown Phase completed. Sat Oct 10 11:51:29 2020 - [info] Sat Oct 10 11:51:29 2020 - [info] * Phase 3: Master Recovery Phase.. Sat Oct 10 11:51:29 2020 - [info] Sat Oct 10 11:51:29 2020 - [info] * Phase 3.1: Getting Latest Slaves Phase.. Sat Oct 10 11:51:29 2020 - [info] Sat Oct 10 11:51:29 2020 - [info] The latest binary log file/position on all slaves is mysql-bin.000013:486 Sat Oct 10 11:51:29 2020 - [info] Retrieved Gtid Set: 44a4ea53-fcad-11ea-bd16-0050563b7b42:20746 Sat Oct 10 11:51:29 2020 - [info] Latest slaves (Slaves that received relay log files to the latest): Sat Oct 10 11:51:29 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:51:29 2020 - [info] GTID ON Sat Oct 10 11:51:29 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:51:29 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:51:29 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:51:29 2020 - [info] GTID ON Sat Oct 10 11:51:29 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:51:29 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:51:29 2020 - [info] The oldest binary log file/position on all slaves is mysql-bin.000013:486 Sat Oct 10 11:51:29 2020 - [info] Retrieved Gtid Set: 44a4ea53-fcad-11ea-bd16-0050563b7b42:20746 Sat Oct 10 11:51:29 2020 - [info] Oldest slaves: Sat Oct 10 11:51:29 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:51:29 2020 - [info] GTID ON Sat Oct 10 11:51:29 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:51:29 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:51:29 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:51:29 2020 - [info] GTID ON Sat Oct 10 11:51:29 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:51:29 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:51:29 2020 - [info] Sat Oct 10 11:51:29 2020 - [info] * Phase 3.3: Determining New Master Phase.. Sat Oct 10 11:51:29 2020 - [info] Sat Oct 10 11:51:29 2020 - [info] Searching new master from slaves.. Sat Oct 10 11:51:29 2020 - [info] Candidate masters from the configuration file: Sat Oct 10 11:51:29 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:51:29 2020 - [info] GTID ON Sat Oct 10 11:51:29 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:51:29 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:51:29 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 11:51:29 2020 - [info] GTID ON Sat Oct 10 11:51:29 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 11:51:29 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 11:51:29 2020 - [info] Non-candidate masters: Sat Oct 10 11:51:29 2020 - [info] Searching from candidate_master slaves which have received the latest relay log events.. Sat Oct 10 11:51:29 2020 - [info] New master is 172.16.120.11(172.16.120.11:3358) Sat Oct 10 11:51:29 2020 - [info] Starting master failover.. Sat Oct 10 11:51:29 2020 - [info] From: 172.16.120.10(172.16.120.10:3358) (current master) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358) To: 172.16.120.11(172.16.120.11:3358) (new master) +--172.16.120.12(172.16.120.12:3358) Sat Oct 10 11:51:29 2020 - [info] Sat Oct 10 11:51:29 2020 - [info] * Phase 3.3: New Master Recovery Phase.. Sat Oct 10 11:51:29 2020 - [info] Sat Oct 10 11:51:29 2020 - [info] Waiting all logs to be applied.. Sat Oct 10 11:51:29 2020 - [info] done. Sat Oct 10 11:51:29 2020 - [info] Replicating from the latest slave 172.16.120.12(172.16.120.12:3358) and waiting to apply.. Sat Oct 10 11:51:29 2020 - [info] Waiting all logs to be applied on the latest slave.. Sat Oct 10 11:51:29 2020 - [info] Resetting slave 172.16.120.11(172.16.120.11:3358) and starting replication from the new master 172.16.120.12(172.16.120.12:3358).. Sat Oct 10 11:51:29 2020 - [info] Executed CHANGE MASTER. Sat Oct 10 11:51:29 2020 - [info] Slave started. Sat Oct 10 11:51:29 2020 - [info] Waiting to execute all relay logs on 172.16.120.11(172.16.120.11:3358).. Sat Oct 10 11:51:29 2020 - [info] master_pos_wait(mysql-bin.000007:3232449) completed on 172.16.120.11(172.16.120.11:3358). Executed 1 events. Sat Oct 10 11:51:29 2020 - [info] done. Sat Oct 10 11:51:29 2020 - [info] done. Sat Oct 10 11:51:29 2020 - [info] Getting new master's binlognameand position.. Sat Oct1011:51:292020 - [info] mysql-bin.000010:141523 Sat Oct1011:51:292020 - [info] All other slaves should startreplicationfrom here. Statement should be: CHANGEMASTERTO MASTER_HOST='172.16.120.11', MASTER_PORT=3358, MASTER_AUTO_POSITION=1, MASTER_USER='repler', MASTER_PASSWORD='xxx'; Sat Oct 10 11:51:29 2020 - [info] Master Recovery succeeded. File:Pos:Exec_Gtid_Set: mysql-bin.000010, 141523, 44a4ea53-fcad-11ea-bd16-0050563b7b42:1-20746, 45d1f02a-fcad-11ea-8a44-0050562f2198:1-27 Sat Oct 10 11:51:29 2020 - [info] Executing master IP activate script: Sat Oct 10 11:51:29 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=start --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 --new_master_host=172.16.120.11 --new_master_ip=172.16.120.11 --new_master_port=3358 --new_master_user='mha' --new_master_password=xxx Enabling the VIP - 172.16.120.128 on the new master - 172.16.120.11 Fake!!! 新主库 rpl_semi_sync_master_enabled=1 rpl_semi_sync_slave_enabled=0 Set read_only=0on the new master. Creating app useron the new master.. Sat Oct1011:51:292020 - [info] OK. Sat Oct1011:51:292020 - [info] ** Finished masterrecovery successfully. Sat Oct1011:51:292020 - [info] * Phase 3: MasterRecovery Phase completed. Sat Oct1011:51:292020 - [info] Sat Oct1011:51:292020 - [info] * Phase 4: Slaves Recovery Phase.. Sat Oct1011:51:292020 - [info] Sat Oct1011:51:292020 - [info] Sat Oct1011:51:292020 - [info] * Phase 4.1: Starting Slaves in parallel.. Sat Oct1011:51:292020 - [info] Sat Oct1011:51:292020 - [info] -- Slave recovery on host 172.16.120.12(172.16.120.12:3358) started, pid: 79937. Check tmp log /masterha/cls_new//172.16.120.12_3358_20201010115128.log if it takes time.. Sat Oct1011:51:302020 - [info] Sat Oct1011:51:302020 - [info] Log messages from172.16.120.12 ... Sat Oct1011:51:302020 - [info] Sat Oct1011:51:292020 - [info] Resetting slave172.16.120.12(172.16.120.12:3358) andstartingreplicationfrom the newmaster172.16.120.11(172.16.120.11:3358).. Sat Oct1011:51:292020 - [info] Executed CHANGE MASTER. Sat Oct1011:51:292020 - [info] Slave started. Sat Oct1011:51:292020 - [info] gtid_wait(44a4ea53-fcad-11ea-bd16-0050563b7b42:1-20746, 45d1f02a-fcad-11ea-8a44-0050562f2198:1-27) completed on172.16.120.12(172.16.120.12:3358). Executed 0 events. Sat Oct1011:51:302020 - [info] Endoflog messages from172.16.120.12. Sat Oct1011:51:302020 - [info] -- Slave on host 172.16.120.12(172.16.120.12:3358) started. Sat Oct1011:51:302020 - [info] Allnewslave servers recovered successfully. Sat Oct1011:51:302020 - [info] Sat Oct1011:51:302020 - [info] * Phase 5: Newmastercleanup phase.. Sat Oct1011:51:302020 - [info] Sat Oct1011:51:302020 - [info] Resetting slave info on the new master.. Sat Oct1011:51:302020 - [info] 172.16.120.11: Resetting slave info succeeded. Sat Oct1011:51:302020 - [info] Masterfailoverto172.16.120.11(172.16.120.11:3358) completed successfully. Sat Oct1011:51:302020 - [info]
----- Failover Report -----
cls_new: MySQL Masterfailover172.16.120.10(172.16.120.10:3358) to172.16.120.11(172.16.120.11:3358) succeeded
Master172.16.120.10(172.16.120.10:3358) is down!
Check MHA Manager logsat centos-4:/masterha/cls_new/manager.log for details.
Started automated(non-interactive) failover. Invalidated master IP address on172.16.120.10(172.16.120.10:3358) Selected 172.16.120.11(172.16.120.11:3358) as a new master. 172.16.120.11(172.16.120.11:3358): OK: Applying alllogs succeeded. 172.16.120.11(172.16.120.11:3358): OK: Activated master IP address. 172.16.120.12(172.16.120.12:3358): OK: Slave started, replicating from172.16.120.11(172.16.120.11:3358) 172.16.120.11(172.16.120.11:3358): Resetting slave info succeeded. Masterfailoverto172.16.120.11(172.16.120.11:3358) completed successfully. Sat Oct1011:51:302020 - [info] Sending mail..
slave-1成了new master
1 2 3 4 5 6 7 8 9 10 11 12
root@localhost 11:51:06 [dbms_monitor]> select * from monitor_delay; +----+---------------------+ | id | ctime | +----+---------------------+ | 1 | 2020-10-10 11:21:39 | | 2 | 2020-10-10 11:31:45 | | 3 | 2020-10-10 11:51:01 | +----+---------------------+ 3 rows in set (0.00 sec)
root@localhost 11:54:53 [dbms_monitor]> showslavestatus; Empty set (0.00 sec)
Sat Oct 10 14:06:09 2020 - [info] MHA::MasterMonitor version 0.58. Sat Oct 10 14:06:10 2020 - [info] GTID failover mode = 1 Sat Oct 10 14:06:10 2020 - [info] Dead Servers: Sat Oct 10 14:06:10 2020 - [info] Alive Servers: Sat Oct 10 14:06:10 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:06:10 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 14:06:10 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 14:06:10 2020 - [info] Alive Slaves: Sat Oct 10 14:06:10 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 14:06:10 2020 - [info] GTID ON Sat Oct 10 14:06:10 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:06:10 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 14:06:10 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 14:06:10 2020 - [info] GTID ON Sat Oct 10 14:06:10 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:06:10 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 14:06:10 2020 - [info] Current Alive Master: 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:06:10 2020 - [info] Checking slave configurations.. Sat Oct 10 14:06:10 2020 - [info] Checking replication filtering settings.. Sat Oct 10 14:06:10 2020 - [info] binlog_do_db= , binlog_ignore_db= Sat Oct 10 14:06:10 2020 - [info] Replication filtering check ok. Sat Oct 10 14:06:10 2020 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking. Sat Oct 10 14:06:10 2020 - [info] Checking SSH publickey authentication settings on the current master.. Sat Oct 10 14:06:10 2020 - [info] HealthCheck: SSH to 172.16.120.10 is reachable. Sat Oct 10 14:06:10 2020 - [info] 172.16.120.10(172.16.120.10:3358) (current master) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358)
Sat Oct 10 14:06:10 2020 - [info] Checking master_ip_failover_script status: Sat Oct 10 14:06:10 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=status --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 Sat Oct 10 14:06:10 2020 - [info] OK. Sat Oct 10 14:06:10 2020 - [warning] shutdown_script is not defined. Sat Oct 10 14:06:10 2020 - [info] Set master ping interval 3 seconds. Sat Oct 10 14:06:10 2020 - [info] Set secondary check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 Sat Oct 10 14:06:10 2020 - [info] Starting ping health check on 172.16.120.10(172.16.120.10:3358).. Sat Oct 10 14:06:10 2020 - [info] Ping(INSERT) succeeded, waiting until MySQL doesn't respond..
Sat Oct 10 14:25:05 2020 - [warning] Got error on MySQL insert ping: 2006 (MySQL server has gone away) Sat Oct1014:25:052020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12--user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=INSERT Sat Oct1014:25:052020 - [info] Executing SSH check script: exit0 Monitoringserver172.16.120.11is reachable, Masterisnot reachable from172.16.120.11. OK. Sat Oct1014:25:062020 - [info] HealthCheck: SSH to172.16.120.10is reachable. Monitoringserver172.16.120.12is reachable, Masterisnot reachable from172.16.120.12. OK. Sat Oct1014:25:062020 - [info] Masterisnot reachable fromall other monitoring servers. Failover should start. Sat Oct1014:25:082020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 14:25:08 2020 - [warning] Connection failed 2 time(s).. Sat Oct 10 14:25:11 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (111)) Sat Oct1014:25:112020 - [warning] Connectionfailed3time(s).. Sat Oct1014:25:142020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 14:25:14 2020 - [warning] Connection failed 4 time(s).. Sat Oct 10 14:25:14 2020 - [warning] Master is not reachable from health checker! Sat Oct 10 14:25:14 2020 - [warning] Master 172.16.120.10(172.16.120.10:3358) is not reachable! Sat Oct 10 14:25:14 2020 - [warning] SSH is reachable. Sat Oct 10 14:25:14 2020 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha/conf/masterha_default.cnf and /etc/masterha/conf/cls_new.cnf again, and trying to connect to all servers to check server status.. Sat Oct 10 14:25:14 2020 - [info] Reading default configuration from /etc/masterha/conf/masterha_default.cnf.. Sat Oct 10 14:25:14 2020 - [info] Reading application default configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 14:25:14 2020 - [info] Reading server configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 14:25:15 2020 - [warning] SQL Thread is stopped(no error) on 172.16.120.11(172.16.120.11:3358) Sat Oct 10 14:25:15 2020 - [info] GTID failover mode = 1 Sat Oct 10 14:25:15 2020 - [info] Dead Servers: Sat Oct 10 14:25:15 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:25:15 2020 - [info] Alive Servers: Sat Oct 10 14:25:15 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 14:25:15 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 14:25:15 2020 - [info] Alive Slaves: Sat Oct 10 14:25:15 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 14:25:15 2020 - [info] GTID ON Sat Oct 10 14:25:15 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:25:15 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 14:25:15 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 14:25:15 2020 - [info] GTID ON Sat Oct 10 14:25:15 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:25:15 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 14:25:15 2020 - [info] Checking slave configurations.. Sat Oct 10 14:25:15 2020 - [info] Checking replication filtering settings.. Sat Oct 10 14:25:15 2020 - [info] Replication filtering check ok. Sat Oct 10 14:25:15 2020 - [info] Master is down! Sat Oct 10 14:25:15 2020 - [info] Terminating monitoring script. Sat Oct 10 14:25:15 2020 - [info] Got exit code 20 (Master dead). Sat Oct 10 14:25:15 2020 - [info] MHA::MasterFailover version 0.58. Sat Oct 10 14:25:15 2020 - [info] Starting master failover. Sat Oct 10 14:25:15 2020 - [info] Sat Oct 10 14:25:15 2020 - [info] * Phase 1: Configuration Check Phase.. Sat Oct 10 14:25:15 2020 - [info] Sat Oct 10 14:25:16 2020 - [warning] SQL Thread is stopped(no error) on 172.16.120.11(172.16.120.11:3358) Sat Oct 10 14:25:16 2020 - [info] GTID failover mode = 1 Sat Oct 10 14:25:16 2020 - [info] Dead Servers: Sat Oct 10 14:25:16 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:25:16 2020 - [info] Alive Servers: Sat Oct 10 14:25:16 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 14:25:16 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 14:25:16 2020 - [info] Alive Slaves: Sat Oct 10 14:25:16 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 14:25:16 2020 - [info] GTID ON Sat Oct 10 14:25:16 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:25:16 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 14:25:16 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 14:25:16 2020 - [info] GTID ON Sat Oct 10 14:25:16 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:25:16 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 14:25:16 2020 - [info] Starting SQL thread on 172.16.120.11(172.16.120.11:3358) .. Sat Oct 10 14:25:16 2020 - [info] done. Sat Oct 10 14:25:16 2020 - [info] Starting GTID based failover. Sat Oct 10 14:25:16 2020 - [info] Sat Oct 10 14:25:16 2020 - [info] ** Phase 1: Configuration Check Phase completed. Sat Oct 10 14:25:16 2020 - [info] Sat Oct 10 14:25:16 2020 - [info] * Phase 2: Dead Master Shutdown Phase.. Sat Oct 10 14:25:16 2020 - [info] Sat Oct 10 14:25:16 2020 - [info] Forcing shutdown so that applications never connect to the current master.. Sat Oct 10 14:25:16 2020 - [info] Executing master IP deactivation script: Sat Oct 10 14:25:16 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 --command=stopssh --ssh_user=root Disabling the VIP on old master: 172.16.120.10 RTNETLINK answers: Cannot assign requested address Fake!!! 原主库 rpl_semi_sync_master_enabled=0 rpl_semi_sync_slave_enabled=1 Sat Oct 10 14:25:16 2020 - [info] done. Sat Oct 10 14:25:16 2020 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master. Sat Oct 10 14:25:16 2020 - [info] * Phase 2: Dead Master Shutdown Phase completed. Sat Oct 10 14:25:16 2020 - [info] Sat Oct 10 14:25:16 2020 - [info] * Phase 3: Master Recovery Phase.. Sat Oct 10 14:25:16 2020 - [info] Sat Oct 10 14:25:16 2020 - [info] * Phase 3.1: Getting Latest Slaves Phase.. Sat Oct 10 14:25:16 2020 - [info] Sat Oct 10 14:25:16 2020 - [info] The latest binary log file/position on all slaves is mysql-bin.000016:268346 Sat Oct 10 14:25:16 2020 - [info] Retrieved Gtid Set: 44a4ea53-fcad-11ea-bd16-0050563b7b42:21217-22354 Sat Oct 10 14:25:16 2020 - [info] Latest slaves (Slaves that received relay log files to the latest): Sat Oct 10 14:25:16 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 14:25:16 2020 - [info] GTID ON Sat Oct 10 14:25:16 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:25:16 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 14:25:16 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 14:25:16 2020 - [info] GTID ON Sat Oct 10 14:25:16 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:25:16 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 14:25:16 2020 - [info] The oldest binary log file/position on all slaves is mysql-bin.000016:268346 Sat Oct 10 14:25:16 2020 - [info] Retrieved Gtid Set: 44a4ea53-fcad-11ea-bd16-0050563b7b42:21217-22354 Sat Oct 10 14:25:16 2020 - [info] Oldest slaves: Sat Oct 10 14:25:16 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 14:25:16 2020 - [info] GTID ON Sat Oct 10 14:25:16 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:25:16 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 14:25:16 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 14:25:16 2020 - [info] GTID ON Sat Oct 10 14:25:16 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:25:16 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 14:25:16 2020 - [info] Sat Oct 10 14:25:16 2020 - [info] * Phase 3.3: Determining New Master Phase.. Sat Oct 10 14:25:16 2020 - [info] Sat Oct 10 14:25:16 2020 - [info] Searching new master from slaves.. Sat Oct 10 14:25:16 2020 - [info] Candidate masters from the configuration file: Sat Oct 10 14:25:16 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 14:25:16 2020 - [info] GTID ON Sat Oct 10 14:25:16 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:25:16 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 14:25:16 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 14:25:16 2020 - [info] GTID ON Sat Oct 10 14:25:16 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:25:16 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 14:25:16 2020 - [info] Non-candidate masters: Sat Oct 10 14:25:16 2020 - [info] Searching from candidate_master slaves which have received the latest relay log events.. Sat Oct 10 14:25:16 2020 - [info] New master is 172.16.120.11(172.16.120.11:3358) Sat Oct 10 14:25:16 2020 - [info] Starting master failover.. Sat Oct 10 14:25:16 2020 - [info] From: 172.16.120.10(172.16.120.10:3358) (current master) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358) To: 172.16.120.11(172.16.120.11:3358) (new master) +--172.16.120.12(172.16.120.12:3358) Sat Oct 10 14:25:16 2020 - [info] Sat Oct 10 14:25:16 2020 - [info] * Phase 3.3: New Master Recovery Phase.. Sat Oct 10 14:25:16 2020 - [info] Sat Oct 10 14:25:16 2020 - [info] Waiting all logs to be applied.. Sat Oct 10 14:25:17 2020 - [info] done. Sat Oct 10 14:25:17 2020 - [info] Replicating from the latest slave 172.16.120.12(172.16.120.12:3358) and waiting to apply.. Sat Oct 10 14:25:17 2020 - [info] Waiting all logs to be applied on the latest slave.. Sat Oct 10 14:25:17 2020 - [info] Resetting slave 172.16.120.11(172.16.120.11:3358) and starting replication from the new master 172.16.120.12(172.16.120.12:3358).. Sat Oct 10 14:25:17 2020 - [info] Executed CHANGE MASTER. Sat Oct 10 14:25:17 2020 - [info] Slave started. Sat Oct 10 14:25:17 2020 - [info] Waiting to execute all relay logs on 172.16.120.11(172.16.120.11:3358).. Sat Oct 10 14:25:17 2020 - [info] master_pos_wait(mysql-bin.000007:3608644) completed on 172.16.120.11(172.16.120.11:3358). Executed 1 events. Sat Oct 10 14:25:17 2020 - [info] done. Sat Oct 10 14:25:17 2020 - [info] done. Sat Oct 10 14:25:17 2020 - [info] Getting new master's binlognameand position.. Sat Oct1014:25:172020 - [info] mysql-bin.000010:517718 Sat Oct1014:25:172020 - [info] All other slaves should startreplicationfrom here. Statement should be: CHANGEMASTERTO MASTER_HOST='172.16.120.11', MASTER_PORT=3358, MASTER_AUTO_POSITION=1, MASTER_USER='repler', MASTER_PASSWORD='xxx'; Sat Oct 10 14:25:17 2020 - [info] Master Recovery succeeded. File:Pos:Exec_Gtid_Set: mysql-bin.000010, 517718, 44a4ea53-fcad-11ea-bd16-0050563b7b42:1-22354, 45d1f02a-fcad-11ea-8a44-0050562f2198:1-27 Sat Oct 10 14:25:17 2020 - [info] Executing master IP activate script: Sat Oct 10 14:25:17 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=start --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 --new_master_host=172.16.120.11 --new_master_ip=172.16.120.11 --new_master_port=3358 --new_master_user='mha' --new_master_password=xxx Enabling the VIP - 172.16.120.128 on the new master - 172.16.120.11 Fake!!! 新主库 rpl_semi_sync_master_enabled=1 rpl_semi_sync_slave_enabled=0 Set read_only=0on the new master. Creating app useron the new master.. Sat Oct1014:25:182020 - [info] OK. Sat Oct1014:25:182020 - [info] ** Finished masterrecovery successfully. Sat Oct1014:25:182020 - [info] * Phase 3: MasterRecovery Phase completed. Sat Oct1014:25:182020 - [info] Sat Oct1014:25:182020 - [info] * Phase 4: Slaves Recovery Phase.. Sat Oct1014:25:182020 - [info] Sat Oct1014:25:182020 - [info] Sat Oct1014:25:182020 - [info] * Phase 4.1: Starting Slaves in parallel.. Sat Oct1014:25:182020 - [info] Sat Oct1014:25:182020 - [info] -- Slave recovery on host 172.16.120.12(172.16.120.12:3358) started, pid: 89417. Check tmp log /masterha/cls_new//172.16.120.12_3358_20201010142515.log if it takes time.. Sat Oct1014:25:192020 - [info] Sat Oct1014:25:192020 - [info] Log messages from172.16.120.12 ... Sat Oct1014:25:192020 - [info] Sat Oct1014:25:182020 - [info] Resetting slave172.16.120.12(172.16.120.12:3358) andstartingreplicationfrom the newmaster172.16.120.11(172.16.120.11:3358).. Sat Oct1014:25:182020 - [info] Executed CHANGE MASTER. Sat Oct1014:25:182020 - [info] Slave started. Sat Oct1014:25:182020 - [info] gtid_wait(44a4ea53-fcad-11ea-bd16-0050563b7b42:1-22354, 45d1f02a-fcad-11ea-8a44-0050562f2198:1-27) completed on172.16.120.12(172.16.120.12:3358). Executed 0 events. Sat Oct1014:25:192020 - [info] Endoflog messages from172.16.120.12. Sat Oct1014:25:192020 - [info] -- Slave on host 172.16.120.12(172.16.120.12:3358) started. Sat Oct1014:25:192020 - [info] Allnewslave servers recovered successfully. Sat Oct1014:25:192020 - [info] Sat Oct1014:25:192020 - [info] * Phase 5: Newmastercleanup phase.. Sat Oct1014:25:192020 - [info] Sat Oct1014:25:192020 - [info] Resetting slave info on the new master.. Sat Oct1014:25:192020 - [info] 172.16.120.11: Resetting slave info succeeded. Sat Oct1014:25:192020 - [info] Masterfailoverto172.16.120.11(172.16.120.11:3358) completed successfully. Sat Oct1014:25:192020 - [info]
----- Failover Report -----
cls_new: MySQL Masterfailover172.16.120.10(172.16.120.10:3358) to172.16.120.11(172.16.120.11:3358) succeeded
Master172.16.120.10(172.16.120.10:3358) is down!
Check MHA Manager logsat centos-4:/masterha/cls_new/manager.log for details.
Started automated(non-interactive) failover. Invalidated master IP address on172.16.120.10(172.16.120.10:3358) Selected 172.16.120.11(172.16.120.11:3358) as a new master. 172.16.120.11(172.16.120.11:3358): OK: Applying alllogs succeeded. 172.16.120.11(172.16.120.11:3358): OK: Activated master IP address. 172.16.120.12(172.16.120.12:3358): OK: Slave started, replicating from172.16.120.11(172.16.120.11:3358) 172.16.120.11(172.16.120.11:3358): Resetting slave info succeeded. Masterfailoverto172.16.120.11(172.16.120.11:3358) completed successfully. Sat Oct1014:25:192020 - [info] Sending mail..
slave-1
1 2 3 4 5 6 7 8 9
root@localhost 14:23:56 [dbms_monitor]> select * from monitor_delay; +----+---------------------+ | id | ctime | +----+---------------------+ | 88 | 2020-10-10 12:21:17 | | 90 | 2020-10-10 14:22:29 | +----+---------------------+ 2 rows in set (0.00 sec)
Sat Oct 10 14:34:42 2020 - [info] MHA::MasterMonitor version 0.58. Sat Oct 10 14:34:43 2020 - [info] GTID failover mode = 1 Sat Oct 10 14:34:43 2020 - [info] Dead Servers: Sat Oct 10 14:34:43 2020 - [info] Alive Servers: Sat Oct 10 14:34:43 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:34:43 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 14:34:43 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 14:34:43 2020 - [info] Alive Slaves: Sat Oct 10 14:34:43 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 14:34:43 2020 - [info] GTID ON Sat Oct 10 14:34:43 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:34:43 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 14:34:43 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.31-34-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 14:34:43 2020 - [info] GTID ON Sat Oct 10 14:34:43 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:34:43 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 14:34:43 2020 - [info] Current Alive Master: 172.16.120.10(172.16.120.10:3358) Sat Oct 10 14:34:43 2020 - [info] Checking slave configurations.. Sat Oct 10 14:34:43 2020 - [info] Checking replication filtering settings.. Sat Oct 10 14:34:43 2020 - [info] binlog_do_db= , binlog_ignore_db= Sat Oct 10 14:34:43 2020 - [info] Replication filtering check ok. Sat Oct 10 14:34:43 2020 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking. Sat Oct 10 14:34:43 2020 - [info] Checking SSH publickey authentication settings on the current master.. Sat Oct 10 14:34:43 2020 - [info] HealthCheck: SSH to 172.16.120.10 is reachable. Sat Oct 10 14:34:43 2020 - [info] 172.16.120.10(172.16.120.10:3358) (current master) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358)
Sat Oct 10 14:34:43 2020 - [info] Checking master_ip_failover_script status: Sat Oct 10 14:34:43 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=status --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 Sat Oct 10 14:34:43 2020 - [info] OK. Sat Oct 10 14:34:43 2020 - [warning] shutdown_script is not defined. Sat Oct 10 14:34:43 2020 - [info] Set master ping interval 3 seconds. Sat Oct 10 14:34:43 2020 - [info] Set secondary check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 Sat Oct 10 14:34:43 2020 - [info] Starting ping health check on 172.16.120.10(172.16.120.10:3358).. Sat Oct 10 14:34:43 2020 - [info] Ping(CONNECT) succeeded, waiting until MySQL doesn't respond..
制造slave-1 sql_thread error
在master创建表
1 2
root@localhost 14:35:55 [dbms_monitor]> create table make_error(id int not null auto_increment primary key); Query OK, 0 rows affected (0.02 sec)
在slave-1删除make_error表
1 2 3 4 5 6 7 8 9 10 11
root@localhost 14:38:10 [dbms_monitor]> set global super_read_only=0; Query OK, 0 rows affected (0.00 sec)
Sat Oct 10 14:40:59 2020 - [warning] Got error on MySQL connect ping: DBI connect(';host=172.16.120.10;port=3358;mysql_connect_timeout=1','mha',...) failed: Can't connect to MySQL server on '172.16.120.10' (111) at /usr/local/share/perl5/MHA/HealthCheck.pm line 98. 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 14:40:59 2020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12--user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=CONNECT Sat Oct1014:40:592020 - [info] Executing SSH check script: exit0 Sat Oct1014:40:592020 - [info] HealthCheck: SSH to172.16.120.10is reachable. Monitoringserver172.16.120.11is reachable, Masterisnot reachable from172.16.120.11. OK. Monitoringserver172.16.120.12is reachable, Masterisnot reachable from172.16.120.12. OK. Sat Oct1014:40:592020 - [info] Masterisnot reachable fromall other monitoring servers. Failover should start. Sat Oct1014:41:022020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 14:41:02 2020 - [warning] Connection failed 2 time(s).. Sat Oct 10 14:41:05 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (111)) Sat Oct1014:41:052020 - [warning] Connectionfailed3time(s).. Sat Oct1014:41:082020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 14:41:08 2020 - [warning] Connection failed 4 time(s).. Sat Oct 10 14:41:08 2020 - [warning] Master is not reachable from health checker! Sat Oct 10 14:41:08 2020 - [warning] Master 172.16.120.10(172.16.120.10:3358) is not reachable! Sat Oct 10 14:41:08 2020 - [warning] SSH is reachable. Sat Oct 10 14:41:08 2020 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha/conf/masterha_default.cnf and /etc/masterha/conf/cls_new.cnf again, and trying to connect to all servers to check server status.. Sat Oct 10 14:41:08 2020 - [info] Reading default configuration from /etc/masterha/conf/masterha_default.cnf.. Sat Oct 10 14:41:08 2020 - [info] Reading application default configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 14:41:08 2020 - [info] Reading server configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 14:41:09 2020 - [error][/usr/local/share/perl5/MHA/Server.pm, ln935] SQL Thread is stopped(error) on 172.16.120.11(172.16.120.11:3358)! Errno:1051, Error:Coordinator stopped because there were error(s) in the worker(s). The most recent failure being: Worker 1 failed executing transaction '44a4ea53-fcad-11ea-bd16-0050563b7b42:22356' at master log mysql-bin.000017, end_log_pos 620. See error log and/or performance_schema.replication_applier_status_by_worker table for more details about this failure or others, if any. Sat Oct 10 14:41:09 2020 - [error][/usr/local/share/perl5/MHA/ServerManager.pm, ln703] Server 172.16.120.11(172.16.120.11:3358) is alive, but does not work as a slave! Sat Oct 10 14:41:09 2020 - [warning] Got Error: at /usr/local/share/perl5/MHA/MasterMonitor.pm line 560. Sat Oct 10 14:41:09 2020 - [info] Got exit code 1 (Not master dead).
Sat Oct 10 15:54:20 2020 - [info] MHA::MasterMonitor version 0.58. Sat Oct 10 15:54:21 2020 - [info] GTID failover mode = 1 Sat Oct 10 15:54:21 2020 - [info] Dead Servers: Sat Oct 10 15:54:21 2020 - [info] Alive Servers: Sat Oct 10 15:54:21 2020 - [info] 172.16.120.10(172.16.120.10:3358) Sat Oct 10 15:54:21 2020 - [info] 172.16.120.11(172.16.120.11:3358) Sat Oct 10 15:54:21 2020 - [info] 172.16.120.12(172.16.120.12:3358) Sat Oct 10 15:54:21 2020 - [info] Alive Slaves: Sat Oct 10 15:54:21 2020 - [info] 172.16.120.11(172.16.120.11:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 15:54:21 2020 - [info] GTID ON Sat Oct 10 15:54:21 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 15:54:21 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 15:54:21 2020 - [info] 172.16.120.12(172.16.120.12:3358) Version=5.7.29-32-log (oldest major version between slaves) log-bin:enabled Sat Oct 10 15:54:21 2020 - [info] GTID ON Sat Oct 10 15:54:21 2020 - [info] Replicating from 172.16.120.10(172.16.120.10:3358) Sat Oct 10 15:54:21 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sat Oct 10 15:54:21 2020 - [info] Current Alive Master: 172.16.120.10(172.16.120.10:3358) Sat Oct 10 15:54:21 2020 - [info] Checking slave configurations.. Sat Oct 10 15:54:21 2020 - [info] Checking replication filtering settings.. Sat Oct 10 15:54:21 2020 - [info] binlog_do_db= , binlog_ignore_db= Sat Oct 10 15:54:21 2020 - [info] Replication filtering check ok. Sat Oct 10 15:54:21 2020 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking. Sat Oct 10 15:54:21 2020 - [info] Checking SSH publickey authentication settings on the current master.. Sat Oct 10 15:54:21 2020 - [info] HealthCheck: SSH to 172.16.120.10 is reachable. Sat Oct 10 15:54:21 2020 - [info] 172.16.120.10(172.16.120.10:3358) (current master) +--172.16.120.11(172.16.120.11:3358) +--172.16.120.12(172.16.120.12:3358)
Sat Oct 10 15:54:21 2020 - [info] Checking master_ip_failover_script status: Sat Oct 10 15:54:21 2020 - [info] /etc/masterha/scripts/master_ip_failover_vip --vip=172.16.120.128 --command=status --ssh_user=root --orig_master_host=172.16.120.10 --orig_master_ip=172.16.120.10 --orig_master_port=3358 Sat Oct 10 15:54:21 2020 - [info] OK. Sat Oct 10 15:54:21 2020 - [warning] shutdown_script is not defined. Sat Oct 10 15:54:21 2020 - [info] Set master ping interval 3 seconds. Sat Oct 10 15:54:21 2020 - [info] Set secondary check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12 Sat Oct 10 15:54:21 2020 - [info] Starting ping health check on 172.16.120.10(172.16.120.10:3358).. Sat Oct 10 15:54:21 2020 - [info] Ping(INSERT) succeeded, waiting until MySQL doesn't respond..
Sat Oct 10 15:56:03 2020 - [warning] Got error on MySQL insert ping: 2006 (MySQL server has gone away) Sat Oct1015:56:032020 - [info] Executing secondary network check script: masterha_secondary_check -s 172.16.120.11 -s 172.16.120.12--user=root --master_host=172.16.120.10 --master_ip=172.16.120.10 --master_port=3358 --master_user=mha --master_password=xxx --ping_type=INSERT Sat Oct1015:56:032020 - [info] Executing SSH check script: exit0 Sat Oct1015:56:042020 - [info] HealthCheck: SSH to172.16.120.10is reachable. Monitoringserver172.16.120.11is reachable, Masterisnot reachable from172.16.120.11. OK. Monitoringserver172.16.120.12is reachable, Masterisnot reachable from172.16.120.12. OK. Sat Oct1015:56:042020 - [info] Masterisnot reachable fromall other monitoring servers. Failover should start. Sat Oct1015:56:062020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 15:56:06 2020 - [warning] Connection failed 2 time(s).. Sat Oct 10 15:56:09 2020 - [warning] Got error on MySQL connect: 2003 (Can't connectto MySQL serveron'172.16.120.10' (111)) Sat Oct1015:56:092020 - [warning] Connectionfailed3time(s).. Sat Oct1015:56:122020 - [warning] Got erroron MySQL connect: 2003 (Can't connect to MySQL server on '172.16.120.10' (111)) Sat Oct 10 15:56:12 2020 - [warning] Connection failed 4 time(s).. Sat Oct 10 15:56:12 2020 - [warning] Master is not reachable from health checker! Sat Oct 10 15:56:12 2020 - [warning] Master 172.16.120.10(172.16.120.10:3358) is not reachable! Sat Oct 10 15:56:12 2020 - [warning] SSH is reachable. Sat Oct 10 15:56:12 2020 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha/conf/masterha_default.cnf and /etc/masterha/conf/cls_new.cnf again, and trying to connect to all servers to check server status.. Sat Oct 10 15:56:12 2020 - [info] Reading default configuration from /etc/masterha/conf/masterha_default.cnf.. Sat Oct 10 15:56:12 2020 - [info] Reading application default configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 15:56:12 2020 - [info] Reading server configuration from /etc/masterha/conf/cls_new.cnf.. Sat Oct 10 15:56:13 2020 - [error][/usr/local/share/perl5/MHA/Server.pm, ln935] SQL Thread is stopped(error) on 172.16.120.11(172.16.120.11:3358)! Errno:1051, Error:Coordinator stopped because there were error(s) in the worker(s). The most recent failure being: Worker 1 failed executing transaction '44a4ea53-fcad-11ea-bd16-0050563b7b42:22419' at master log mysql-bin.000019, end_log_pos 14917. See error log and/or performance_schema.replication_applier_status_by_worker table for more details about this failure or others, if any. Sat Oct 10 15:56:13 2020 - [error][/usr/local/share/perl5/MHA/ServerManager.pm, ln703] Server 172.16.120.11(172.16.120.11:3358) is alive, but does not work as a slave! Sat Oct 10 15:56:13 2020 - [warning] Got Error: at /usr/local/share/perl5/MHA/MasterMonitor.pm line 560. Sat Oct 10 15:56:13 2020 - [info] Got exit code 1 (Not master dead).