github.com/hdt3213/godis@v1.2.9/database/replication_master.go (about) 1 package database 2 3 import ( 4 "errors" 5 "fmt" 6 "io" 7 "io/ioutil" 8 "os" 9 "strconv" 10 "strings" 11 "sync" 12 "time" 13 14 "github.com/hdt3213/godis/interface/redis" 15 "github.com/hdt3213/godis/lib/logger" 16 "github.com/hdt3213/godis/lib/sync/atomic" 17 "github.com/hdt3213/godis/lib/utils" 18 "github.com/hdt3213/godis/redis/protocol" 19 ) 20 21 const ( 22 slaveStateHandShake = uint8(iota) 23 slaveStateWaitSaveEnd 24 slaveStateSendingRDB 25 slaveStateOnline 26 ) 27 28 const ( 29 bgSaveIdle = uint8(iota) 30 bgSaveRunning 31 bgSaveFinish 32 ) 33 34 const ( 35 slaveCapacityNone = 0 36 slaveCapacityEOF = 1 << iota 37 slaveCapacityPsync2 38 ) 39 40 // slaveClient stores slave status in the view of master 41 type slaveClient struct { 42 conn redis.Connection 43 state uint8 44 offset int64 45 lastAckTime time.Time 46 announceIp string 47 announcePort int 48 capacity uint8 49 } 50 51 // aofListener is currently only responsible for updating the backlog 52 type replBacklog struct { 53 buf []byte 54 beginOffset int64 55 currentOffset int64 56 } 57 58 func (backlog *replBacklog) appendBytes(bin []byte) { 59 backlog.buf = append(backlog.buf, bin...) 60 backlog.currentOffset += int64(len(bin)) 61 } 62 63 func (backlog *replBacklog) getSnapshot() ([]byte, int64) { 64 return backlog.buf[:], backlog.currentOffset 65 } 66 67 func (backlog *replBacklog) getSnapshotAfter(beginOffset int64) ([]byte, int64) { 68 beg := beginOffset - backlog.beginOffset 69 return backlog.buf[beg:], backlog.currentOffset 70 } 71 72 func (backlog *replBacklog) isValidOffset(offset int64) bool { 73 return offset >= backlog.beginOffset && offset < backlog.currentOffset 74 } 75 76 type masterStatus struct { 77 mu sync.RWMutex 78 replId string 79 backlog *replBacklog 80 slaveMap map[redis.Connection]*slaveClient 81 waitSlaves map[*slaveClient]struct{} 82 onlineSlaves map[*slaveClient]struct{} 83 bgSaveState uint8 84 rdbFilename string 85 aofListener *replAofListener 86 rewriting atomic.Boolean 87 } 88 89 // bgSaveForReplication does bg-save and send rdb to waiting slaves 90 func (server *Server) bgSaveForReplication() { 91 go func() { 92 defer func() { 93 if e := recover(); e != nil { 94 logger.Errorf("panic: %v", e) 95 } 96 }() 97 if err := server.saveForReplication(); err != nil { 98 logger.Errorf("save for replication error: %v", err) 99 } 100 }() 101 102 } 103 104 // saveForReplication does bg-save and send rdb to waiting slaves 105 func (server *Server) saveForReplication() error { 106 rdbFile, err := ioutil.TempFile("", "*.rdb") 107 if err != nil { 108 return fmt.Errorf("create temp rdb failed: %v", err) 109 } 110 rdbFilename := rdbFile.Name() 111 server.masterStatus.mu.Lock() 112 server.masterStatus.bgSaveState = bgSaveRunning 113 server.masterStatus.rdbFilename = rdbFilename // todo: can reuse config.Properties.RDBFilename? 114 aofListener := &replAofListener{ 115 mdb: server, 116 backlog: server.masterStatus.backlog, 117 } 118 server.masterStatus.aofListener = aofListener 119 server.masterStatus.mu.Unlock() 120 121 err = server.persister.GenerateRDBForReplication(rdbFilename, aofListener, nil) 122 if err != nil { 123 return err 124 } 125 aofListener.readyToSend = true 126 127 // change bgSaveState and get waitSlaves for sending 128 waitSlaves := make(map[*slaveClient]struct{}) 129 server.masterStatus.mu.Lock() 130 server.masterStatus.bgSaveState = bgSaveFinish 131 for slave := range server.masterStatus.waitSlaves { 132 waitSlaves[slave] = struct{}{} 133 } 134 server.masterStatus.waitSlaves = nil 135 server.masterStatus.mu.Unlock() 136 137 // send rdb to waiting slaves 138 for slave := range waitSlaves { 139 err = server.masterFullReSyncWithSlave(slave) 140 if err != nil { 141 server.removeSlave(slave) 142 logger.Errorf("masterFullReSyncWithSlave error: %v", err) 143 continue 144 } 145 } 146 return nil 147 } 148 149 func (server *Server) rewriteRDB() error { 150 rdbFile, err := ioutil.TempFile("", "*.rdb") 151 if err != nil { 152 return fmt.Errorf("create temp rdb failed: %v", err) 153 } 154 rdbFilename := rdbFile.Name() 155 newBacklog := &replBacklog{} 156 aofListener := &replAofListener{ 157 backlog: newBacklog, 158 mdb: server, 159 } 160 hook := func() { 161 // pausing aof first, then lock masterStatus. 162 // use the same order as replAofListener to avoid dead lock 163 server.masterStatus.mu.Lock() 164 defer server.masterStatus.mu.Unlock() 165 newBacklog.beginOffset = server.masterStatus.backlog.currentOffset 166 } 167 err = server.persister.GenerateRDBForReplication(rdbFilename, aofListener, hook) 168 if err != nil { // wait rdb result 169 return err 170 } 171 server.masterStatus.mu.Lock() 172 server.masterStatus.rdbFilename = rdbFilename 173 server.masterStatus.backlog = newBacklog 174 server.persister.RemoveListener(server.masterStatus.aofListener) 175 server.masterStatus.aofListener = aofListener 176 server.masterStatus.mu.Unlock() 177 // It is ok to know that new backlog is ready later, so we change readyToSend without sync 178 // But setting readyToSend=true must after new backlog is really ready (that means master.mu.Unlock) 179 aofListener.readyToSend = true 180 return nil 181 } 182 183 // masterFullReSyncWithSlave send replication header, rdb file and all backlogs to slave 184 func (server *Server) masterFullReSyncWithSlave(slave *slaveClient) error { 185 // write replication header 186 header := "+FULLRESYNC " + server.masterStatus.replId + " " + 187 strconv.FormatInt(server.masterStatus.backlog.beginOffset, 10) + protocol.CRLF 188 _, err := slave.conn.Write([]byte(header)) 189 if err != nil { 190 return fmt.Errorf("write replication header to slave failed: %v", err) 191 } 192 // send rdb 193 rdbFile, err := os.Open(server.masterStatus.rdbFilename) 194 if err != nil { 195 return fmt.Errorf("open rdb file %s for replication error: %v", server.masterStatus.rdbFilename, err) 196 } 197 slave.state = slaveStateSendingRDB 198 rdbInfo, _ := os.Stat(server.masterStatus.rdbFilename) 199 rdbSize := rdbInfo.Size() 200 rdbHeader := "$" + strconv.FormatInt(rdbSize, 10) + protocol.CRLF 201 _, err = slave.conn.Write([]byte(rdbHeader)) 202 if err != nil { 203 return fmt.Errorf("write rdb header to slave failed: %v", err) 204 } 205 _, err = io.Copy(slave.conn, rdbFile) 206 if err != nil { 207 return fmt.Errorf("write rdb file to slave failed: %v", err) 208 } 209 210 // send backlog 211 server.masterStatus.mu.RLock() 212 backlog, currentOffset := server.masterStatus.backlog.getSnapshot() 213 server.masterStatus.mu.RUnlock() 214 _, err = slave.conn.Write(backlog) 215 if err != nil { 216 return fmt.Errorf("full resync write backlog to slave failed: %v", err) 217 } 218 219 // set slave as online 220 server.setSlaveOnline(slave, currentOffset) 221 return nil 222 } 223 224 var cannotPartialSync = errors.New("cannot do partial sync") 225 226 func (server *Server) masterTryPartialSyncWithSlave(slave *slaveClient, replId string, slaveOffset int64) error { 227 server.masterStatus.mu.RLock() 228 if replId != server.masterStatus.replId { 229 server.masterStatus.mu.RUnlock() 230 return cannotPartialSync 231 } 232 if !server.masterStatus.backlog.isValidOffset(slaveOffset) { 233 server.masterStatus.mu.RUnlock() 234 return cannotPartialSync 235 } 236 backlog, currentOffset := server.masterStatus.backlog.getSnapshotAfter(slaveOffset) 237 server.masterStatus.mu.RUnlock() 238 239 // send replication header 240 header := "+CONTINUE " + server.masterStatus.replId + protocol.CRLF 241 _, err := slave.conn.Write([]byte(header)) 242 if err != nil { 243 return fmt.Errorf("write replication header to slave failed: %v", err) 244 } 245 // send backlog 246 _, err = slave.conn.Write(backlog) 247 if err != nil { 248 return fmt.Errorf("partial resync write backlog to slave failed: %v", err) 249 } 250 251 // set slave online 252 server.setSlaveOnline(slave, currentOffset) 253 return nil 254 } 255 256 // masterSendUpdatesToSlave only sends data to online slaves after bgSave is finished 257 // if bgSave is running, updates will be sent after the saving finished 258 func (server *Server) masterSendUpdatesToSlave() error { 259 onlineSlaves := make(map[*slaveClient]struct{}) 260 server.masterStatus.mu.RLock() 261 beginOffset := server.masterStatus.backlog.beginOffset 262 backlog, currentOffset := server.masterStatus.backlog.getSnapshot() 263 for slave := range server.masterStatus.onlineSlaves { 264 onlineSlaves[slave] = struct{}{} 265 } 266 server.masterStatus.mu.RUnlock() 267 for slave := range onlineSlaves { 268 slaveBeginOffset := slave.offset - beginOffset 269 _, err := slave.conn.Write(backlog[slaveBeginOffset:]) 270 if err != nil { 271 logger.Errorf("send updates backlog to slave failed: %v", err) 272 server.removeSlave(slave) 273 continue 274 } 275 slave.offset = currentOffset 276 } 277 return nil 278 } 279 280 func (server *Server) execPSync(c redis.Connection, args [][]byte) redis.Reply { 281 replId := string(args[0]) 282 replOffset, err := strconv.ParseInt(string(args[1]), 10, 64) 283 if err != nil { 284 return protocol.MakeErrReply("ERR value is not an integer or out of range") 285 } 286 server.masterStatus.mu.Lock() 287 defer server.masterStatus.mu.Unlock() 288 slave := server.masterStatus.slaveMap[c] 289 if slave == nil { 290 slave = &slaveClient{ 291 conn: c, 292 } 293 c.SetSlave() 294 server.masterStatus.slaveMap[c] = slave 295 } 296 if server.masterStatus.bgSaveState == bgSaveIdle { 297 slave.state = slaveStateWaitSaveEnd 298 server.masterStatus.waitSlaves[slave] = struct{}{} 299 server.bgSaveForReplication() 300 } else if server.masterStatus.bgSaveState == bgSaveRunning { 301 slave.state = slaveStateWaitSaveEnd 302 server.masterStatus.waitSlaves[slave] = struct{}{} 303 } else if server.masterStatus.bgSaveState == bgSaveFinish { 304 go func() { 305 defer func() { 306 if e := recover(); e != nil { 307 logger.Errorf("panic: %v", e) 308 } 309 }() 310 err := server.masterTryPartialSyncWithSlave(slave, replId, replOffset) 311 if err == nil { 312 return 313 } 314 if err != nil && err != cannotPartialSync { 315 server.removeSlave(slave) 316 logger.Errorf("masterTryPartialSyncWithSlave error: %v", err) 317 return 318 } 319 // assert err == cannotPartialSync 320 if err := server.masterFullReSyncWithSlave(slave); err != nil { 321 server.removeSlave(slave) 322 logger.Errorf("masterFullReSyncWithSlave error: %v", err) 323 return 324 } 325 }() 326 } 327 return &protocol.NoReply{} 328 } 329 330 func (server *Server) execReplConf(c redis.Connection, args [][]byte) redis.Reply { 331 if len(args)%2 != 0 { 332 return protocol.MakeSyntaxErrReply() 333 } 334 server.masterStatus.mu.RLock() 335 slave := server.masterStatus.slaveMap[c] 336 server.masterStatus.mu.RUnlock() 337 for i := 0; i < len(args); i += 2 { 338 key := strings.ToLower(string(args[i])) 339 value := string(args[i+1]) 340 switch key { 341 case "ack": 342 offset, err := strconv.ParseInt(value, 10, 64) 343 if err != nil { 344 return protocol.MakeErrReply("ERR value is not an integer or out of range") 345 } 346 slave.offset = offset 347 slave.lastAckTime = time.Now() 348 return &protocol.NoReply{} 349 } 350 } 351 return protocol.MakeOkReply() 352 } 353 354 func (server *Server) removeSlave(slave *slaveClient) { 355 server.masterStatus.mu.Lock() 356 defer server.masterStatus.mu.Unlock() 357 _ = slave.conn.Close() 358 delete(server.masterStatus.slaveMap, slave.conn) 359 delete(server.masterStatus.waitSlaves, slave) 360 delete(server.masterStatus.onlineSlaves, slave) 361 logger.Info("disconnect with slave " + slave.conn.Name()) 362 } 363 364 func (server *Server) setSlaveOnline(slave *slaveClient, currentOffset int64) { 365 server.masterStatus.mu.Lock() 366 defer server.masterStatus.mu.Unlock() 367 slave.state = slaveStateOnline 368 slave.offset = currentOffset 369 server.masterStatus.onlineSlaves[slave] = struct{}{} 370 } 371 372 var pingBytes = protocol.MakeMultiBulkReply(utils.ToCmdLine("ping")).ToBytes() 373 374 const maxBacklogSize = 10 * 1024 * 1024 // 10MB 375 376 func (server *Server) masterCron() { 377 server.masterStatus.mu.Lock() 378 if len(server.masterStatus.slaveMap) == 0 { // no slaves, do nothing 379 server.masterStatus.mu.Unlock() 380 return 381 } 382 if server.masterStatus.bgSaveState == bgSaveFinish { 383 server.masterStatus.backlog.appendBytes(pingBytes) 384 } 385 backlogSize := len(server.masterStatus.backlog.buf) 386 server.masterStatus.mu.Unlock() 387 if err := server.masterSendUpdatesToSlave(); err != nil { 388 logger.Errorf("masterSendUpdatesToSlave error: %v", err) 389 } 390 if backlogSize > maxBacklogSize && !server.masterStatus.rewriting.Get() { 391 go func() { 392 server.masterStatus.rewriting.Set(true) 393 defer server.masterStatus.rewriting.Set(false) 394 if err := server.rewriteRDB(); err != nil { 395 server.masterStatus.rewriting.Set(false) 396 logger.Errorf("rewrite error: %v", err) 397 } 398 }() 399 } 400 } 401 402 // replAofListener is an implementation for aof.Listener 403 type replAofListener struct { 404 mdb *Server 405 backlog *replBacklog // may NOT be mdb.masterStatus.backlog 406 readyToSend bool 407 } 408 409 func (listener *replAofListener) Callback(cmdLines []CmdLine) { 410 listener.mdb.masterStatus.mu.Lock() 411 for _, cmdLine := range cmdLines { 412 reply := protocol.MakeMultiBulkReply(cmdLine) 413 listener.backlog.appendBytes(reply.ToBytes()) 414 } 415 listener.mdb.masterStatus.mu.Unlock() 416 // listener could receive updates generated during rdb saving in progress 417 // Do not send updates to slave before rdb saving is finished 418 if listener.readyToSend { 419 if err := listener.mdb.masterSendUpdatesToSlave(); err != nil { 420 logger.Errorf("masterSendUpdatesToSlave after receive aof error: %v", err) 421 } 422 } 423 } 424 425 func (server *Server) initMaster() { 426 server.masterStatus = &masterStatus{ 427 mu: sync.RWMutex{}, 428 replId: utils.RandHexString(40), 429 backlog: &replBacklog{}, 430 slaveMap: make(map[redis.Connection]*slaveClient), 431 waitSlaves: make(map[*slaveClient]struct{}), 432 onlineSlaves: make(map[*slaveClient]struct{}), 433 bgSaveState: bgSaveIdle, 434 rdbFilename: "", 435 } 436 } 437 438 func (server *Server) stopMaster() { 439 server.masterStatus.mu.Lock() 440 defer server.masterStatus.mu.Unlock() 441 442 // disconnect with slave 443 for _, slave := range server.masterStatus.slaveMap { 444 _ = slave.conn.Close() 445 delete(server.masterStatus.slaveMap, slave.conn) 446 delete(server.masterStatus.waitSlaves, slave) 447 delete(server.masterStatus.onlineSlaves, slave) 448 } 449 450 // clean master status 451 if server.persister != nil { 452 server.persister.RemoveListener(server.masterStatus.aofListener) 453 } 454 _ = os.Remove(server.masterStatus.rdbFilename) 455 server.masterStatus.rdbFilename = "" 456 server.masterStatus.replId = "" 457 server.masterStatus.backlog = &replBacklog{} 458 server.masterStatus.slaveMap = make(map[redis.Connection]*slaveClient) 459 server.masterStatus.waitSlaves = make(map[*slaveClient]struct{}) 460 server.masterStatus.onlineSlaves = make(map[*slaveClient]struct{}) 461 server.masterStatus.bgSaveState = bgSaveIdle 462 }