MongoDB复制集概述
MongoDB复制集实现了冗余备份和故障转移两大功能,这样能保证数据库的高可用性。
在生产环境,复制集至少包括三个节点,其中一个必须为主节点,一个从节点,一个仲裁节点。
其中每一个节点都是mongod进程对应的实例,节点间通过心跳检查对方的状态。
primary节点:负责数据库的读写操作。
secondary节点:备份primary节点上的数据,可以有多个。
arbiter节点:主节点故障时,参与复制集剩下节点中选举一个新的primary节点。
准备
centos7 1804
网络NAT+仅主机
3台主机
192.168.25.11 主节点
192.168.25.12 从节点
192.168.25.13 仲裁节点
/etc/hosts
192.168.25.11 node1.fgq.com node1
192.168.25.12 node2.fgq.com node2
192.168.25.13 node3.fgq.com node3
192.168.25.14 node4.fgq.com node4
192.168.25.15 node5.fgq.com node5
下载mongodb包
https://www.mongodb.com/download-center/community
mongodb-linux-x86_64-rhel70-4.0.6.tgz
## 3个节点都操作,以node1为例
[root@node1 ~]# mkdir -p /fgq/base-env/
[root@node1 ~]# cd /fgq/base-env
[root@node1 base-env]# rz 上传mongodb包
[root@node1 base-env]# tar zxf mongodb-linux-x86_64-rhel70-4.0.6.tgz
[root@node1 base-env]# ln -s mongodb-linux-x86_64-rhel70-4.0.6 mongodb
[root@node1 ~]# mkdir -p /fgq/data/mongodb/logs
[root@node1 ~]# cd /fgq/data/mongodb/
[root@node1 mongodb]# vi mongodb.conf
#端口号port = 27017
#数据目录
dbpath=/fgq/data/mongodb
#日志目录
logpath=/fgq/data/mongodb/logs/mongodb.log
#日志持久化,单实例需要开启
#journal = true
#最大同时连接数
maxConns = 5000
#设置后台运行
fork=true
#日志输出方式
logappend = true
#开启认证
#auth = true
bind_ip=0.0.0.0
#副本集名称
replSet = rs1
[root@node1 ~]# vim /etc/profile.d/mongodb.sh
export PATH=$PATH:/fgq/base-env/mongodb/bin
[root@node1 ~]# source /etc/profile.d/mongodb.sh
[root@node1 ~]# mongo -h
[root@node3 ~]# nohup /fgq/base-env/mongodb/bin/mongod -f /fgq/data/mongodb/mongodb.conf > mongodlog 2>&1 &
[1] 1465
[root@node3 ~]# ss -ntl|grep 27017
LISTEN 0 128 *:27017 *:*
[root@node1 ~]# ps aux |grep mongod
root 1503 1.5 3.2 1111320 60372 ? Sl 15:23 0:01 /fgq/base-env/mongodb/bin/mongod -f /fgq/data/mongodb/mongodb.conf
root 1551 0.0 0.0 112704 972 pts/0 S+ 15:25 0:00 grep --color=auto mongod
初始化复制集主节点、添加从节点和仲裁节点
node1 主节点操作
##进入shell交互式
[root@node1 ~]# mongo
MongoDB shell version v4.0.6
connecting to: mongodb://127.0.0.1:27017/?gssapiServiceName=mongodb
Implicit session: session { "id" : UUID("a6b8d8c6-79ee-4a18-95b0-7d065bd336e9") }
MongoDB server version: 4.0.6
Welcome to the MongoDB shell.
## 初始化复制集primary节点
> rs.initiate({_id:'rs1',members:[{_id:1,host:'node1:27017'}]})
{
"ok" : 1,
"operationTime" : Timestamp(1552030156, 1),
"$clusterTime" : {
"clusterTime" : Timestamp(1552030156, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
rs1:SECONDARY> rs.conf()
{
"_id" : "rs1",
"version" : 1,
"protocolVersion" : NumberLong(1),
"writeConcernMajorityJournalDefault" : true,
"members" : [
{
"_id" : 1,
"host" : "node1:27017",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 1,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
}
],
"settings" : {
"chainingAllowed" : true,
"heartbeatIntervalMillis" : 2000,
"heartbeatTimeoutSecs" : 10,
"electionTimeoutMillis" : 10000,
"catchUpTimeoutMillis" : -1,
"catchUpTakeoverDelayMillis" : 30000,
"getLastErrorModes" : {
},
"getLastErrorDefaults" : {
"w" : 1,
"wtimeout" : 0
},
"replicaSetId" : ObjectId("5c8219cca795d643beefd1c7")
}
}
## 添加second节点和arbiter节点
rs1:PRIMARY> rs.add("node2:27017")
{
"ok" : 1,
"operationTime" : Timestamp(1552030573, 1),
"$clusterTime" : {
"clusterTime" : Timestamp(1552030573, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
rs1:PRIMARY> rs.addArb("node3:27017")
{
"ok" : 1,
"operationTime" : Timestamp(1552030656, 1),
"$clusterTime" : {
"clusterTime" : Timestamp(1552030656, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
检查各个节点local库信息
node1操作
rs1:PRIMARY> use local
switched to db local
rs1:PRIMARY> show collections
oplog.rs
replset.election
replset.minvalid
replset.oplogTruncateAfterPoint
startup_log
node2操作
[root@node2 ~]# mongo
rs1:SECONDARY> use local
switched to db local
rs1:SECONDARY> show collections
oplog.rs
replset.minvalid
replset.oplogTruncateAfterPoint
startup_log
node3操作
rs1:ARBITER> use local
switched to db local
rs1:ARBITER> show collections
replset.minvalid
replset.oplogTruncateAfterPoint
startup_log
检查复制集状态
node1操作
rs1:PRIMARY> rs.status()
{
"set" : "rs1",
"date" : ISODate("2019-03-08T07:45:32.161Z"),
"myState" : 1,
"term" : NumberLong(1),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1552031129, 1),
"t" : NumberLong(1)
},
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1552031129, 1),
"t" : NumberLong(1)
},
"appliedOpTime" : {
"ts" : Timestamp(1552031129, 1),
"t" : NumberLong(1)
},
"durableOpTime" : {
"ts" : Timestamp(1552031129, 1),
"t" : NumberLong(1)
}
},
"lastStableCheckpointTimestamp" : Timestamp(1552031119, 1),
"members" : [
{
"_id" : 1,
"name" : "node1:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 1315,
"optime" : {
"ts" : Timestamp(1552031129, 1),
"t" : NumberLong(1)
},
"optimeDate" : ISODate("2019-03-08T07:45:29Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1552030156, 2),
"electionDate" : ISODate("2019-03-08T07:29:16Z"),
"configVersion" : 3,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 2,
"name" : "node2:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 559,
"optime" : {
"ts" : Timestamp(1552031129, 1),
"t" : NumberLong(1)
},
"optimeDurable" : {
"ts" : Timestamp(1552031129, 1),
"t" : NumberLong(1)
},
"optimeDate" : ISODate("2019-03-08T07:45:29Z"),
"optimeDurableDate" : ISODate("2019-03-08T07:45:29Z"),
"lastHeartbeat" : ISODate("2019-03-08T07:45:31.621Z"),
"lastHeartbeatRecv" : ISODate("2019-03-08T07:45:30.657Z"),
"pingMs" : NumberLong(2),
"lastHeartbeatMessage" : "",
"syncingTo" : "node1:27017",
"syncSourceHost" : "node1:27017",
"syncSourceId" : 1,
"infoMessage" : "",
"configVersion" : 3
},
{
"_id" : 3,
"name" : "node3:27017",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 475,
"lastHeartbeat" : ISODate("2019-03-08T07:45:31.621Z"),
"lastHeartbeatRecv" : ISODate("2019-03-08T07:45:31.611Z"),
"pingMs" : NumberLong(1),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 3
}
],
"ok" : 1,
"operationTime" : Timestamp(1552031129, 1),
"$clusterTime" : {
"clusterTime" : Timestamp(1552031129, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
测试复制集
## 主节点插入数据
[root@node1 ~]# mongo
rs1:PRIMARY> use testdb
switched to db testdb
rs1:PRIMARY> db
testdb
rs1:PRIMARY> for (i=1;i<=100;i++) db.students.insert({name:"student"+i,age:(i%10),address:"#25 Lianyun Road,Zhengzhou,China"})
WriteResult({ "nInserted" : 1 })
rs1:PRIMARY> show collections
students
rs1:PRIMARY> db.students.find()
{ "_id" : ObjectId("5c82215ba2f532712480afda"), "name" : "student1", "age" : 1, "address" : "#25 Lianyun Road,Zhengzhou,China" }
{ "_id" : ObjectId("5c82215ba2f532712480afdb"), "name" : "student2", "age" : 2, "address" : "#25 Lianyun Road,Zhengzhou,China" }
{ "_id" : ObjectId("5c82215ba2f532712480afdc"), "name" : "student3", "age" : 3, "address" : "#25 Lianyun Road,Zhengzhou,China" }
... ...
## 从节点检查复制情况
[root@node2 ~]# mongo
rs1:SECONDARY> db
testdb
rs1:SECONDARY> show collections
2019-03-08T16:10:45.196+0800 E QUERY [js] Error: listCollections failed: {
"operationTime" : Timestamp(1552032639, 1),
"ok" : 0,
"errmsg" : "not master and slaveOk=false", //因为secondary是不允许读写的,如果非要解决,则执行:rs.slaveOk()
"code" : 13435,
"codeName" : "NotMasterNoSlaveOk",
"$clusterTime" : {
"clusterTime" : Timestamp(1552032639, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
} :
_getErrorWithCode@src/mongo/shell/utils.js:25:13
DB.prototype._getCollectionInfosCommand@src/mongo/shell/db.js:943:1
DB.prototype.getCollectionInfos@src/mongo/shell/db.js:993:20
DB.prototype.getCollectionNames@src/mongo/shell/db.js:1031:16
shellHelper.show@src/mongo/shell/utils.js:869:9
shellHelper@src/mongo/shell/utils.js:766:15
@(shellhelp2):1:1
rs1:SECONDARY> rs.slaveOk()
rs1:SECONDARY> show collections
students
rs1:SECONDARY> db.students.find()
{ "_id" : ObjectId("5c82215ba2f532712480afda"), "name" : "student1", "age" : 1, "address" : "#25 Lianyun Road,Zhengzhou,China" }
{ "_id" : ObjectId("5c82215ba2f532712480afdd"), "name" : "student4", "age" : 4, "address" : "#25 Lianyun Road,Zhengzhou,China" }
{ "_id" : ObjectId("5c82215ba2f532712480afe6"), "name" : "student13", "age" : 3, "address" : "#25 Lianyun Road,Zhengzhou,China" }
通过db.students.find()查询到和主复制集上一样的数据,表示数据同步成功!
## arbiter节点检查数据情况
rs1:ARBITER> use testdb
switched to db testdb
rs1:ARBITER> show collections
2019-03-08T16:12:39.844+0800 E QUERY [js] Error: listCollections failed: {
"ok" : 0,
"errmsg" : "not master and slaveOk=false",
"code" : 13435,
"codeName" : "NotMasterNoSlaveOk"
} :
_getErrorWithCode@src/mongo/shell/utils.js:25:13
DB.prototype._getCollectionInfosCommand@src/mongo/shell/db.js:943:1
DB.prototype.getCollectionInfos@src/mongo/shell/db.js:993:20
DB.prototype.getCollectionNames@src/mongo/shell/db.js:1031:16
shellHelper.show@src/mongo/shell/utils.js:869:9
shellHelper@src/mongo/shell/utils.js:766:15
@(shellhelp2):1:1
rs1:ARBITER> rs.slaveOk()
rs1:ARBITER> show collections
rs1:ARBITER>
arbiter并没有进行数据同步,因为仲裁节点只参与投票,不接收数据!
复制集工作原理
复制集通过local库下的oplog.rs集合进行数据同步
测试MongoDB复制集自动故障转移功能
MongoDB复制集搭建完成后,相当于具备了备份的功能,上文已展示
下面我们主要测试一下MongoDB的另一大功能,自动故障转移的功能
测试时,先测试kill掉从节点,OK后,再测试kill掉主节点
--------------------------------------------------------------------------
--------------------------------------------------------------------------
## MongoDB的primary节点宕机
通过kill primary节点进行测试
### 查看集群状态,所有节点运行正常
node1操作
rs1:PRIMARY> rs.status()
{
"set" : "rs1",
"date" : ISODate("2019-03-08T08:38:37.523Z"),
"myState" : 1,
"term" : NumberLong(1),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1552034309, 1),
"t" : NumberLong(1)
},
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1552034309, 1),
"t" : NumberLong(1)
},
"appliedOpTime" : {
"ts" : Timestamp(1552034309, 1),
"t" : NumberLong(1)
},
"durableOpTime" : {
"ts" : Timestamp(1552034309, 1),
"t" : NumberLong(1)
}
},
"lastStableCheckpointTimestamp" : Timestamp(1552034299, 1),
"members" : [
{
"_id" : 1,
"name" : "node1:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 4500,
"optime" : {
"ts" : Timestamp(1552034309, 1),
"t" : NumberLong(1)
},
"optimeDate" : ISODate("2019-03-08T08:38:29Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1552030156, 2),
"electionDate" : ISODate("2019-03-08T07:29:16Z"),
"configVersion" : 3,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 2,
"name" : "node2:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 3744,
"optime" : {
"ts" : Timestamp(1552034309, 1),
"t" : NumberLong(1)
},
"optimeDurable" : {
"ts" : Timestamp(1552034309, 1),
"t" : NumberLong(1)
},
"optimeDate" : ISODate("2019-03-08T08:38:29Z"),
"optimeDurableDate" : ISODate("2019-03-08T08:38:29Z"),
"lastHeartbeat" : ISODate("2019-03-08T08:38:36.447Z"),
"lastHeartbeatRecv" : ISODate("2019-03-08T08:38:37.485Z"),
"pingMs" : NumberLong(1),
"lastHeartbeatMessage" : "",
"syncingTo" : "node1:27017",
"syncSourceHost" : "node1:27017",
"syncSourceId" : 1,
"infoMessage" : "",
"configVersion" : 3
},
{
"_id" : 3,
"name" : "node3:27017",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 3660,
"lastHeartbeat" : ISODate("2019-03-08T08:38:36.445Z"),
"lastHeartbeatRecv" : ISODate("2019-03-08T08:38:36.445Z"),
"pingMs" : NumberLong(1),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 3
}
],
"ok" : 1,
"operationTime" : Timestamp(1552034309, 1),
"$clusterTime" : {
"clusterTime" : Timestamp(1552034309, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
### kill掉主节点
[root@node1 ~]# ps -ef|grep mongo
root 1503 1 1 15:23 ? 00:01:02 /fgq/base-env/mongodb/bin/mongod -f /fgq/data/mongodb/mongodb.conf
root 4085 1284 0 15:54 pts/0 00:00:00 mongo
root 4239 4221 0 16:40 pts/1 00:00:00 grep --color=auto mongo
[root@node1 ~]# kill -9 1503 ## 生产中不要这样操作,详情请看最下方
[root@node1 ~]# ps aux|grep mongo
root 4085 0.0 1.4 786668 26140 pts/0 Sl+ 15:54 0:00 mongo
root 4243 0.0 0.0 112708 968 pts/1 S+ 16:42 0:00 grep --color=auto mongo
### 再次检查集群运行状态
node2操作
rs1:SECONDARY>
rs1:PRIMARY>
rs1:PRIMARY> rs.status() ## node2的shell没有关闭,当node1宕机,此时从节点自动切换为主节点
{
"set" : "rs1",
"date" : ISODate("2019-03-08T08:45:37.005Z"),
"myState" : 1,
"term" : NumberLong(2),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1552034509, 1),
"t" : NumberLong(1)
},
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1552034509, 1),
"t" : NumberLong(1)
},
"appliedOpTime" : {
"ts" : Timestamp(1552034730, 1),
"t" : NumberLong(2)
},
"durableOpTime" : {
"ts" : Timestamp(1552034730, 1),
"t" : NumberLong(2)
}
},
"lastStableCheckpointTimestamp" : Timestamp(1552034509, 1),
"members" : [
{
"_id" : 1,
"name" : "node1:27017",
"health" : 0,
"state" : 8, ## 主节点宕机
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2019-03-08T08:45:35.958Z"),
"lastHeartbeatRecv" : ISODate("2019-03-08T08:41:56.704Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Error connecting to node1:27017 (192.168.25.11:27017) :: caused by :: Connection refused",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1
},
{
"_id" : 2,
"name" : "node2:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY", ## 原secondary节点已经变为主节点
"uptime" : 4896,
"optime" : {
"ts" : Timestamp(1552034730, 1),
"t" : NumberLong(2)
},
"optimeDate" : ISODate("2019-03-08T08:45:30Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1552034529, 1),
"electionDate" : ISODate("2019-03-08T08:42:09Z"),
"configVersion" : 3,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 3,
"name" : "node3:27017",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 4080,
"lastHeartbeat" : ISODate("2019-03-08T08:45:35.485Z"),
"lastHeartbeatRecv" : ISODate("2019-03-08T08:45:36.980Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 3
}
],
"ok" : 1,
"operationTime" : Timestamp(1552034730, 1),
"$clusterTime" : {
"clusterTime" : Timestamp(1552034730, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
## 新主节点插入测试数据,一切操作正常
node2操作
rs1:PRIMARY> db
testdb
rs1:PRIMARY> show collections
students
rs1:PRIMARY> db.scores.insert({stuid:1,subobject:"math",score:99})
WriteResult({ "nInserted" : 1 })
rs1:PRIMARY> db.scores.find()
{ "_id" : ObjectId("5c822de50c4a1756741ee9f4"), "stuid" : 1, "subobject" : "math", "score" : 99 }
rs1:PRIMARY>
### 重新启动原主节点,查看集群状态
node1操作
[root@node1 ~]# nohup /fgq/base-env/mongodb/bin/mongod -f /fgq/data/mongodb/mongodb.conf > mongodlog 2>&1 &
[1] 4275
[root@node1 ~]# ps aux|grep mongo
root 4085 0.0 1.4 786668 26140 pts/0 Sl+ 15:54 0:00 mongo
root 4277 39.2 3.8 1392028 72024 ? Sl 16:58 0:01 /fgq/base-env/mongodb/bin/mongod -f /fgq/data/mongodb/mongodb.conf
root 4362 0.0 0.0 112708 968 pts/1 S+ 16:58 0:00 grep --color=auto mongo
node2查看
rs1:PRIMARY> rs.status()
{
"set" : "rs1",
"date" : ISODate("2019-03-08T08:59:54.163Z"),
"myState" : 1,
"term" : NumberLong(2),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1552035590, 1),
"t" : NumberLong(2)
},
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1552035590, 1),
"t" : NumberLong(2)
},
"appliedOpTime" : {
"ts" : Timestamp(1552035590, 1),
"t" : NumberLong(2)
},
"durableOpTime" : {
"ts" : Timestamp(1552035590, 1),
"t" : NumberLong(2)
}
},
"lastStableCheckpointTimestamp" : Timestamp(1552035542, 1),
"members" : [
{
"_id" : 1,
"name" : "node1:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY", ## 原主节点已经变为从节点
"uptime" : 97,
"optime" : {
"ts" : Timestamp(1552035590, 1),
"t" : NumberLong(2)
},
"optimeDurable" : {
"ts" : Timestamp(1552035590, 1),
"t" : NumberLong(2)
},
"optimeDate" : ISODate("2019-03-08T08:59:50Z"),
"optimeDurableDate" : ISODate("2019-03-08T08:59:50Z"),
"lastHeartbeat" : ISODate("2019-03-08T08:59:52.821Z"),
"lastHeartbeatRecv" : ISODate("2019-03-08T08:59:52.978Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "node2:27017",
"syncSourceHost" : "node2:27017",
"syncSourceId" : 2,
"infoMessage" : "",
"configVersion" : 3
},
{
"_id" : 2,
"name" : "node2:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 5753,
"optime" : {
"ts" : Timestamp(1552035590, 1),
"t" : NumberLong(2)
},
"optimeDate" : ISODate("2019-03-08T08:59:50Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1552034529, 1),
"electionDate" : ISODate("2019-03-08T08:42:09Z"),
"configVersion" : 3,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 3,
"name" : "node3:27017",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 4937,
"lastHeartbeat" : ISODate("2019-03-08T08:59:52.609Z"),
"lastHeartbeatRecv" : ISODate("2019-03-08T08:59:54.090Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 3
}
],
"ok" : 1,
"operationTime" : Timestamp(1552035590, 1),
"$clusterTime" : {
"clusterTime" : Timestamp(1552035590, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
### 在node1上查看node2刚才插入的测试数据信息
此时node1已经为从节点了,shell没关闭,自动会切换
rs1:PRIMARY>
2019-03-08T16:59:28.412+0800 I NETWORK [js] trying reconnect to 127.0.0.1:27017 failed
2019-03-08T16:59:28.413+0800 I NETWORK [js] reconnect 127.0.0.1:27017 ok
rs1:SECONDARY>
rs1:SECONDARY> db
testdb
rs1:SECONDARY> show collections
2019-03-08T17:03:02.207+0800 E QUERY [js] Error: listCollections failed: {
"operationTime" : Timestamp(1552035780, 1),
"ok" : 0,
"errmsg" : "not master and slaveOk=false",
"code" : 13435,
"codeName" : "NotMasterNoSlaveOk",
"$clusterTime" : {
"clusterTime" : Timestamp(1552035780, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
} :
_getErrorWithCode@src/mongo/shell/utils.js:25:13
DB.prototype._getCollectionInfosCommand@src/mongo/shell/db.js:943:1
DB.prototype.getCollectionInfos@src/mongo/shell/db.js:993:20
DB.prototype.getCollectionNames@src/mongo/shell/db.js:1031:16
shellHelper.show@src/mongo/shell/utils.js:869:9
shellHelper@src/mongo/shell/utils.js:766:15
@(shellhelp2):1:1
rs1:SECONDARY> rs.slaveOk()
rs1:SECONDARY> show collections
scores
students
rs1:SECONDARY> db.scores.find()
{ "_id" : ObjectId("5c822de50c4a1756741ee9f4"), "stuid" : 1, "subobject" : "math", "score" : 99 }
集群运转正常,数据已经正常同步过来!
--------------------------------------------------------------------------
--------------------------------------------------------------------------
## MongoDB的second节点宕机
通过kill second节点进行测试,此时从节点是node1
### 查看集群状态,所有节点运行正常
rs1:PRIMARY> rs.status()
{
"set" : "rs1",
"date" : ISODate("2019-03-08T09:08:00.829Z"),
"myState" : 1,
"term" : NumberLong(2),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1552036070, 1),
"t" : NumberLong(2)
},
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1552036070, 1),
"t" : NumberLong(2)
},
"appliedOpTime" : {
"ts" : Timestamp(1552036070, 1),
"t" : NumberLong(2)
},
"durableOpTime" : {
"ts" : Timestamp(1552036070, 1),
"t" : NumberLong(2)
}
},
"lastStableCheckpointTimestamp" : Timestamp(1552036030, 1),
"members" : [
{
"_id" : 1,
"name" : "node1:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 584,
"optime" : {
"ts" : Timestamp(1552036070, 1),
"t" : NumberLong(2)
},
"optimeDurable" : {
"ts" : Timestamp(1552036070, 1),
"t" : NumberLong(2)
},
"optimeDate" : ISODate("2019-03-08T09:07:50Z"),
"optimeDurableDate" : ISODate("2019-03-08T09:07:50Z"),
"lastHeartbeat" : ISODate("2019-03-08T09:07:59.417Z"),
"lastHeartbeatRecv" : ISODate("2019-03-08T09:07:59.642Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "node2:27017",
"syncSourceHost" : "node2:27017",
"syncSourceId" : 2,
"infoMessage" : "",
"configVersion" : 3
},
{
"_id" : 2,
"name" : "node2:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 6239,
"optime" : {
"ts" : Timestamp(1552036070, 1),
"t" : NumberLong(2)
},
"optimeDate" : ISODate("2019-03-08T09:07:50Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1552034529, 1),
"electionDate" : ISODate("2019-03-08T08:42:09Z"),
"configVersion" : 3,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 3,
"name" : "node3:27017",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 5423,
"lastHeartbeat" : ISODate("2019-03-08T09:07:59.227Z"),
"lastHeartbeatRecv" : ISODate("2019-03-08T09:08:00.730Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 3
}
],
"ok" : 1,
"operationTime" : Timestamp(1552036070, 1),
"$clusterTime" : {
"clusterTime" : Timestamp(1552036070, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
### kill second节点进程
[root@node1 ~]# ps aux|grep mongo
root 4085 0.0 1.4 786668 26668 pts/0 Sl+ 15:54 0:00 mongo
root 4277 1.7 5.5 1403308 104272 ? Sl 16:58 0:12 /fgq/base-env/mongodb/bin/mongod -f /fgq/data/mongodb/mongodb.conf
root 4410 0.0 0.0 112708 968 pts/1 S+ 17:09 0:00 grep --color=auto mongo
You have new mail in /var/spool/mail/root
[root@node1 ~]# kill -9 4277
[root@node1 ~]# ps aux|grep mongo
root 4085 0.0 1.4 786668 26668 pts/0 Sl+ 15:54 0:00 mongo
root 4425 0.0 0.0 112708 964 pts/1 S+ 17:13 0:00 grep --color=auto mongo
### node2上再次查看节点状态,发现second节点已经宕机
rs1:PRIMARY> rs.status()
{
"set" : "rs1",
"date" : ISODate("2019-03-08T09:16:00.469Z"),
"myState" : 1,
"term" : NumberLong(2),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1552036430, 1),
"t" : NumberLong(2)
},
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1552036430, 1),
"t" : NumberLong(2)
},
"appliedOpTime" : {
"ts" : Timestamp(1552036550, 1),
"t" : NumberLong(2)
},
"durableOpTime" : {
"ts" : Timestamp(1552036550, 1),
"t" : NumberLong(2)
}
},
"lastStableCheckpointTimestamp" : Timestamp(1552036430, 1),
"members" : [
{
"_id" : 1,
"name" : "node1:27017",
"health" : 0,
"state" : 8, ## second节点宕机
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2019-03-08T09:16:00.428Z"),
"lastHeartbeatRecv" : ISODate("2019-03-08T09:13:54.171Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Error connecting to node1:27017 (192.168.25.11:27017) :: caused by :: Connection refused",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1
},
{
"_id" : 2,
"name" : "node2:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 6719,
"optime" : {
"ts" : Timestamp(1552036550, 1),
"t" : NumberLong(2)
},
"optimeDate" : ISODate("2019-03-08T09:15:50Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1552034529, 1),
"electionDate" : ISODate("2019-03-08T08:42:09Z"),
"configVersion" : 3,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 3,
"name" : "node3:27017",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 5903,
"lastHeartbeat" : ISODate("2019-03-08T09:15:59.886Z"),
"lastHeartbeatRecv" : ISODate("2019-03-08T09:15:59.412Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 3
}
],
"ok" : 1,
"operationTime" : Timestamp(1552036550, 1),
"$clusterTime" : {
"clusterTime" : Timestamp(1552036550, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
### 主节点(node2)向测试表插入一条记录,一切操作正常
rs1:PRIMARY> db.scores.insert({stuid:2,subobject:"math",score:88})
WriteResult({ "nInserted" : 1 })
rs1:PRIMARY> db.scores.find()
{ "_id" : ObjectId("5c822de50c4a1756741ee9f4"), "stuid" : 1, "subobject" : "math", "score" : 99 }
{ "_id" : ObjectId("5c8233950c4a1756741ee9f5"), "stuid" : 2, "subobject" : "math", "score" : 88 }
### 再次启动从节点,检查上一步的node2主节点操作插入的数据是否同步,发现数据正常同步到从节点node1
node1上启动mongodb进程
[root@node1 ~]# nohup /fgq/base-env/mongodb/bin/mongod -f /fgq/data/mongodb/mongodb.conf > mongodlog 2>&1 &
[1] 4447
[root@node1 ~]# ps aux|grep mongo
root 4085 0.0 1.4 786668 26668 pts/0 Sl+ 15:54 0:00 mongo
root 4449 25.6 3.1 1374996 59604 ? Sl 17:21 0:00 /fgq/base-env/mongodb/bin/mongod -f /fgq/data/mongodb/mongodb.conf
root 4534 0.0 0.0 112708 968 pts/1 S+ 17:21 0:00 grep --color=auto mongo
[root@node1 ~]# mongo
rs1:SECONDARY> use testdb
switched to db testdb
rs1:SECONDARY> rs.slaveOk()
rs1:SECONDARY> show collections
scores
students
rs1:SECONDARY> db.scores.find()
{ "_id" : ObjectId("5c822de50c4a1756741ee9f4"), "stuid" : 1, "subobject" : "math", "score" : 99 }
{ "_id" : ObjectId("5c8233950c4a1756741ee9f5"), "stuid" : 2, "subobject" : "math", "score" : 88 }
mongod进程收到SIGINT信号或者SIGTERM信号,会做一些处理
> 关闭所有打开的连接
> 将内存数据强制刷新到磁盘
> 当前的操作执行完毕
> 安全停止
切忌kill -9
数据库直接关闭,数据丢失,数据文件损失,修复数据库(成本高,有风险)
使用kill命令关闭进程
$ kill -2 PID
原理:-2表示向mongod进程发送SIGINT信号
或
$ kill -4 PID
原理:-4表示向mognod进程发送SIGTERM信号
参考文章:http://www.cnblogs.com/clsn/p/8214194.html#_label3
网友评论