一、环境说明
192.168.5.65 master(redis、sentinel)
192.168.5.66 slave1(redis、sentinel)
192.168.5.71 slave2(redis、sentinel)
#hosts文件配置
cat>>/etc/hosts<二、修改系统参数
#修改最大可打开文件数
cat>>/etc/security/limits.conf<#TCP监听队列大小
echo"net.core.somaxconn=32767">>/etc/sysctl.conf sysctl-p#OOM相关:vm.overcommit_memory
echo"vm.overcommit_memory=1">>/etc/sysctl.conf sysctl-p#开启内核的“Transparent Huge Pages (THP)”特性
echonever>/sys/kernel/mm/transparent_hugepage/enabled echo"echonever>/sys/kernel/mm/transparent_hugepage/enabled">>/etc/rc.local chmod+x/etc/rc.local三、安装redis
[root@redis65/]#cd/opt [root@redis65opt]#wgethttp://download.redis.io/releases/redis-5.0.0.tar.gz [root@redis65opt]#tar-zxvfredis-5.0.0.tar.gz [root@redis65opt]#cdredis-5.0.0/ [root@redis65redis-5.0.0]#make [root@redis65redis-5.0.0]#makeinstallPREFIX=/usr/local/redis#创建实例目录
[root@redis65/]#mkdir-p/data/redis/redis_7001##redis65 7001配置文件(master)
cat>>/data/redis/redis_7001/redis_7001.conf<##redis66 7001配置文件(slave)
cat>>/data/redis/redis_7001/redis_7001.conf<##redis启动文件
cat>>/etc/init.d/redis_7001</dev/null sleep2 while[-x$PIDFILE] do echo"WaitingforRedistoshutdown..." sleep1 done echo"Redisstopped" fi ;; restart|force-reload) ${0}stop ${0}start ;; *) echo"Usage:/etc/init.d/redis_7001{start|stop|restart|force-reload}">&2 exit1 esac EOF chmod+x/etc/init.d/redis_7001 /etc/init.d/redis_7001start chkconfigredis_7001on# 在redis-cli -h 127.0.0.1 -a 'password'这里会有一个问题,当在shell中输入以上命令时,控制台总会输出一串“Warning: Using a password with '-a' option on the command line interface may not be safe.”
#解决办法将标准错误去除即可,加了2>/dev/null,将标准错误丢弃即可,如:redis-cli -h 192.168.5.65 -p 7001 -c -a 'zxc789' 2>/dev/null
# 同步确认
[root@redis65data]#redis-cli-h192.168.5.65-p7001-c-a'zxc789'2>/dev/null 192.168.5.65:7001>inforeplication #Replication role:master connected_slaves:3 slave0:ip=192.168.5.66,port=7001,state=online,offset=14919,lag=1 slave1:ip=192.168.5.65,port=7002,state=online,offset=14919,lag=1 slave2:ip=192.168.5.71,port=7001,state=online,offset=14919,lag=0 master_replid:c38f8eee4a6a7e71614cf4d0bf38add46a5f8cb0 master_replid2:0000000000000000000000000000000000000000 master_repl_offset:14919 second_repl_offset:-1 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:1 repl_backlog_histlen:14919[root@redis66/]#redis-cli-h192.168.5.66-p7001-c-a'zxc789'2>/dev/null192.168.5.66:7001>INFOreplication #Replication role:slave master_host:192.168.5.65 master_port:7001 master_link_status:up master_last_io_seconds_ago:9 master_sync_in_progress:0 slave_repl_offset:112 slave_priority:100 slave_read_only:1 connected_slaves:0 master_replid:c8cbd4ae635da25193060cacaaebf8a696227476 master_replid2:0000000000000000000000000000000000000000 master_repl_offset:112 second_repl_offset:-1 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:1 repl_backlog_histlen:112同期正常时:
master_link_status:up
master_repl_offset #和slave_repl_offset相等,
master_last_io_seconds_ago #在10秒内。
#shell脚本随机插入数据测试
cat>>/data/11.sh</dev/null done EOF chmod+x/data/11.sh timesh/data/11.sh# slave只读不允许写
192.168.5.66:7001>setyaya465464654 (error)READONLYYoucan'twriteagainstareadonlyreplica.#停止slave后往master里写数据
cat>>/data/11.sh</dev/null done EOF #master写完数据再开启slave,看slave是否有同步数据
192.168.5.65:7001>infokeyspace #Keyspace db0:keys=149,expires=0,avg_ttl=0 192.168.5.65:7001>DBSIZE (integer)149 192.168.5.65:7001>getname112 "64we64f54f4qwe464cvw6e4" 192.168.5.65:7001>INFOreplication #Replication role:master connected_slaves:0 master_replid:c8cbd4ae635da25193060cacaaebf8a696227476 master_replid2:0000000000000000000000000000000000000000 master_repl_offset:14549 second_repl_offset:-1 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:1 repl_backlog_histlen:14549 192.168.5.65:7001>INFOreplication #Replication role:master connected_slaves:3 slave0:ip=192.168.5.66,port=7001,state=online,offset=14563,lag=1 slave1:ip=192.168.5.65,port=7002,state=online,offset=14919,lag=1 slave2:ip=192.168.5.71,port=7001,state=online,offset=14919,lag=0 master_replid:c8cbd4ae635da25193060cacaaebf8a696227476 master_replid2:0000000000000000000000000000000000000000 master_repl_offset:14563 second_repl_offset:-1 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:1 repl_backlog_histlen:14563 192.168.5.66:7001>infokeyspace #Keyspace db0:keys=149,expires=0,avg_ttl=0 192.168.5.66:7001>DBSIZE (integer)149 192.168.5.66:7001>getname112 "64we64f54f4qwe464cvw6e4" 192.168.5.66:7001>INFOreplication #Replication role:slave master_host:192.168.5.65 master_port:7001 master_link_status:up master_last_io_seconds_ago:1 master_sync_in_progress:0 slave_repl_offset:14633 slave_priority:100 slave_read_only:1 connected_slaves:0 master_replid:c8cbd4ae635da25193060cacaaebf8a696227476 master_replid2:0000000000000000000000000000000000000000 master_repl_offset:14633 second_repl_offset:-1 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:14550 repl_backlog_histlen:84# 从上面可以看到数据已同步
三、redis哨兵模式
Redis哨兵为Redis提供了高可用性。实际上这意味着你可以使用哨兵模式创建一个可以不用人为干预而应对各种故障的Redis部署,哨兵模式还提供了其他的附加功能,如监控,通知,为客户端提供配置。
监控:哨兵不断的检查master和slave是否正常的运行。
通知:当监控的某台Redis实例发生问题时,可以通过API通知系统管理员和其他的应用程序。
自动故障转移:如果一个master不正常运行了,哨兵可以启动一个故障转移进程,将一个slave升级成为master,其他的slave被重新配置使用新的master,并且应用程序使用Redis服务端通知的新地址。
配置提供者:哨兵作为Redis客户端发现的权威来源:客户端连接到哨兵请求当前可靠的master的地址。如果发生故障,哨兵将报告新地址。
1、sentinel.conf配置文件(主从都一样)
[root@redis65data]#mkdir-p/data/redis/sentinel_27001 [root@redis65data]#cat>>/data/redis/sentinel_27001/sentinel_27001.conf< port27001 #1表示在sentinel集群中只要有两个节点检测到redis主节点出故障就进行切换 #如果3s内mymaster无响应,则认为mymaster宕机了 #如果10秒后,mysater仍没活过来,则启动failover sentinelmonitormymaster192.168.5.6570012 sentineldown-after-millisecondsmymaster3000 sentinelfailover-timeoutmymaster10000 daemonizeyes #指定工作目录 dir"/data/redis/sentinel_27001/" protected-modeno logfile"/data/redis/sentinel_27001/sentinel_27001.log" #redis主节点密码 sentinelauth-passmymasterzxc789 #GeneratedbyCONFIGREWRITE EOF2、启动所有的sentinel实例
[root@redis65data]#redis-sentinel/data/redis/sentinel_27001/sentinel_27001.conf [root@redis65data]#netstat-tnlp|grep27001 tcp000.0.0.0:270010.0.0.0:*LISTEN17731/redis-sentine tcp600:::27001:::*LISTEN17731/redis-sentine[root@redis66redis_7001]#netstat-tnlp|grep27001 tcp000.0.0.0:270010.0.0.0:*LISTEN15100/redis-sentine tcp600:::27001:::*LISTEN15100/redis-sentine[root@db71/]#netstat-tnlp|grep27001 tcp000.0.0.0:270010.0.0.0:*LISTEN13831/redis-sentine tcp600:::27001:::*LISTEN13831/redis-sentine#sentinel监控日志,通过日志可以看到一个master、三个slave
15100:X11Jan201916:33:19.813*+slaveslave192.168.5.66:7001192.168.5.667001@mymaster192.168.5.657001 15100:X11Jan201916:33:19.998*+slaveslave192.168.5.65:7002192.168.5.657002@mymaster192.168.5.657001 15100:X11Jan201916:33:20.042*+slaveslave192.168.5.71:7001192.168.5.717001@mymaster192.168.5.657001 15100:X11Jan201916:33:21.797*+sentinelsentinelbd0bf09a1dcb2cadce4bed1c19e2b0c766dbeee1192.168.5.6527001@mymaster192.168.5.657001 15100:X11Jan201916:34:00.915*+sentinelsentinel8437221283819083fba5f2f07f9fb3ec98169af4192.168.5.7127001@mymaster192.168.5.6570013、sentinel常用命令
[root@db71aa]#redis-cli-p27001 127.0.0.1:27001>sentinelmasters 1)1)"name" 2)"mymaster" 3)"ip" 4)"192.168.5.65" 5)"port" 6)"7001" 7)"runid" 8)"29541866b0235fb9d8b11ca9e8e0a334cb547e47" 127.0.0.1:27001>sentinelslavesmymaster 1)1)"name" 2)"192.168.5.71:7001" 3)"ip" 4)"192.168.5.71" 5)"port" 6)"7001" 2)1)"name" 2)"192.168.5.65:7002" 3)"ip" 4)"192.168.5.65" 5)"port" 6)"7002" 7)"runid" 8)"a27281370ab282263e9e38766d3743112b52ae99" 9)"flags" 10)"slave" 3)1)"name" 2)"192.168.5.66:7001" 3)"ip" 4)"192.168.5.66" 5)"port" 6)"7001" 7)"runid" 8)"496bc3511d0ecc4e8d4965d699f851f053c94f37" 9)"flags" 10)"slave"sentinel masters 列出所有监视的主节点
sentinel slaves
获取某主节点的所有从节点信息 sentinel get-master-addr-by-name
根据主节点名称来获取其对应的ip地址 sentinel reset 清除所有操作状态,包括故障转移
sentinel failover
手动将主节点转移到某节点 4、master宕机,故障转移
[root@redis65data]#redis-cli-h192.168.5.65-p7001-c-a'zxc789'2>/dev/null 192.168.5.65:7001>inforeplication #Replication role:master connected_slaves:3 slave0:ip=192.168.5.66,port=7001,state=online,offset=14919,lag=1 slave1:ip=192.168.5.65,port=7002,state=online,offset=14919,lag=1 slave2:ip=192.168.5.71,port=7001,state=online,offset=14919,lag=0#关闭master
[root@redis65/]#/etc/init.d/redis_7001stop#通过sentinel日志查看sentinel选举新的master的过程
cat/data/redis/sentinel_27001/sentinel_27001.log 17799:X11Jan201922:51:51.383#+sdownmastermymaster192.168.5.657001 17799:X11Jan201922:51:51.383#+odownmastermymaster192.168.5.657001#quorum1/1 17799:X11Jan201922:51:51.383#+new-epoch1 17799:X11Jan201922:51:51.383#+try-failovermastermymaster192.168.5.657001 17799:X11Jan201922:51:51.423#+vote-for-leaderbd0bf09a1dcb2cadce4bed1c19e2b0c766dbeee11 17799:X11Jan201922:51:51.443#0d39de811fec35f16192a0143b4099822837b34bvotedfor0d39de811fec35f16192a0143b4099822837b34b1 17799:X11Jan201922:51:51.462#8437221283819083fba5f2f07f9fb3ec98169af4votedforbd0bf09a1dcb2cadce4bed1c19e2b0c766dbeee11 17799:X11Jan201922:51:51.489#+elected-leadermastermymaster192.168.5.657001 17799:X11Jan201922:51:51.489#+failover-state-select-slavemastermymaster192.168.5.657001 17799:X11Jan201922:51:51.579#+selected-slaveslave192.168.5.71:7001192.168.5.717001@mymaster192.168.5.657001 17799:X11Jan201922:51:51.579*+failover-state-send-slaveof-nooneslave192.168.5.71:7001192.168.5.717001@mymaster192.168.5.657001 17799:X11Jan201922:51:51.650*+failover-state-wait-promotionslave192.168.5.71:7001192.168.5.717001@mymaster192.168.5.657001 17799:X11Jan201922:51:52.701#+promoted-slaveslave192.168.5.71:7001192.168.5.717001@mymaster192.168.5.657001 17799:X11Jan201922:51:52.701#+failover-state-reconf-slavesmastermymaster192.168.5.657001 17799:X11Jan201922:51:52.702*+slave-reconf-sentslave192.168.5.65:7002192.168.5.657002@mymaster192.168.5.657001 17799:X11Jan201922:51:53.462*+slave-reconf-inprogslave192.168.5.65:7002192.168.5.657002@mymaster192.168.5.657001 17799:X11Jan201922:51:53.462*+slave-reconf-doneslave192.168.5.65:7002192.168.5.657002@mymaster192.168.5.657001 17799:X11Jan201922:51:53.520*+slave-reconf-sentslave192.168.5.66:7001192.168.5.667001@mymaster192.168.5.657001 17799:X11Jan201922:51:54.469*+slave-reconf-inprogslave192.168.5.66:7001192.168.5.667001@mymaster192.168.5.657001 17799:X11Jan201922:51:54.469*+slave-reconf-doneslave192.168.5.66:7001192.168.5.667001@mymaster192.168.5.657001 17799:X11Jan201922:51:54.568#+failover-endmastermymaster192.168.5.657001 17799:X11Jan201922:51:54.568#+switch-mastermymaster192.168.5.657001192.168.5.717001 17799:X11Jan201922:51:54.568*+slaveslave192.168.5.65:7002192.168.5.657002@mymaster192.168.5.717001 17799:X11Jan201922:51:54.568*+slaveslave192.168.5.66:7001192.168.5.667001@mymaster192.168.5.717001 17799:X11Jan201922:51:54.568*+slaveslave192.168.5.65:7001192.168.5.657001@mymaster192.168.5.717001 17799:X11Jan201922:51:57.648#+sdownslave192.168.5.65:7001192.168.5.657001@mymaster192.168.5.717001// 通过日志可以看到选举192.168.5.71 7001为新和master,为什么是192.168.5.71 7001它为新的master咧,那是因为192.168.5.71 7001这个配置文件中replica-priority 参数设的最小
#查看新的主从信息
[root@db71aa]#redis-cli-h192.168.5.71-p7001-c-a'zxc789'2>/dev/null 192.168.5.71:7001>inforeplication #Replication role:master connected_slaves:2 slave0:ip=192.168.5.65,port=7002,state=online,offset=3341911,lag=0 slave1:ip=192.168.5.66,port=7001,state=online,offset=3341911,lag=1 master_replid:b9e65b745379e49560ae5647a2b35f59417e6451 master_replid2:c38f8eee4a6a7e71614cf4d0bf38add46a5f8cb0 master_repl_offset:3341911 second_repl_offset:3305650 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:2293336 repl_backlog_histlen:1048576#查看其它slave的配置文件,发现自动修改为同步新和master
#192.168.5.71 7001配置文件中连接master的配置已删除
#开启原来的master 192.168.5.65 7001
[root@redis65data]#/etc/init.d/redis_7001start StartingRedisserver... Redisisrunning...#通过sentinel日志查看原master的启动变化(-sdown:说明恢复服务)
cat/data/redis/sentinel_27001/sentinel_27001.log 17799:X12Jan201910:22:17.351*+rebootslave192.168.5.65:7001192.168.5.657001@mymaster192.168.5.717001 17799:X12Jan201910:22:17.402#-sdownslave192.168.5.65:7001192.168.5.657001@mymaster192.168.5.717001#在新的master实例上看看原来的master是否变成slaveb
192.168.5.71:7001>infoReplication #Replication role:master connected_slaves:3 slave0:ip=192.168.5.65,port=7002,state=online,offset=11868218,lag=0 slave1:ip=192.168.5.66,port=7001,state=online,offset=11868079,lag=1 slave2:ip=192.168.5.65,port=7001,state=online,offset=11868218,lag=0 master_replid:b9e65b745379e49560ae5647a2b35f59417e6451 master_replid2:c38f8eee4a6a7e71614cf4d0bf38add46a5f8cb0 master_repl_offset:11868218 second_repl_offset:3305650 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:10819643 repl_backlog_histlen:1048576// 可以看到原来的master变成slave了,原master 192.168.5.65:7001配置文件也发生了变化,自动添加了连接新master的命令
#如果当原master 192.168.5.65:7001重启后,因为配置文件或者网络的原因,没有变成新master下面的一个slave,客户端仍然向里面写数据,一但恢复新的主从关系,这部份写入的数据将会丢失,为了避免这种情况出现,就要做以下的设置
min-slaves-to-write 1
min-slaves-max-lag 10
// 通过上面的配置,当一个redis是master时,如果它不能向至少一个slave写数据(上面的min-slaves-to-write指定了slave的数量),它将会拒绝接受客户端的写请求。由于复制是异步的,master无法向slave写数据意味着slave要么断开连接了,要么不在指定时间内向master发送同步数据的请求了(上面的min-slaves-max-lag指定了这个时间)。