github.com/hdt3213/godis@v1.2.9/database/replication_slave.go (about) 1 package database 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "fmt" 8 "io/ioutil" 9 "net" 10 "os" 11 "strconv" 12 "strings" 13 "sync" 14 "sync/atomic" 15 "time" 16 17 "github.com/hdt3213/godis/aof" 18 "github.com/hdt3213/godis/config" 19 "github.com/hdt3213/godis/interface/redis" 20 "github.com/hdt3213/godis/lib/logger" 21 rdb "github.com/hdt3213/rdb/parser" 22 "github.com/hdt3213/godis/lib/utils" 23 "github.com/hdt3213/godis/redis/connection" 24 "github.com/hdt3213/godis/redis/parser" 25 "github.com/hdt3213/godis/redis/protocol" 26 ) 27 28 const ( 29 masterRole = iota 30 slaveRole 31 ) 32 33 type slaveStatus struct { 34 mutex sync.Mutex 35 ctx context.Context 36 cancel context.CancelFunc 37 38 // configVersion stands for the version of slaveStatus config. Any change of master host/port will cause configVersion increment 39 // If configVersion change has been found during slaveStatus current slaveStatus procedure will stop. 40 // It is designed to abort a running slaveStatus procedure 41 configVersion int32 42 43 masterHost string 44 masterPort int 45 46 masterConn net.Conn 47 masterChan <-chan *parser.Payload 48 replId string 49 replOffset int64 50 lastRecvTime time.Time 51 running sync.WaitGroup 52 } 53 54 var configChangedErr = errors.New("slaveStatus config changed") 55 56 func initReplSlaveStatus() *slaveStatus { 57 return &slaveStatus{} 58 } 59 60 func (server *Server) execSlaveOf(c redis.Connection, args [][]byte) redis.Reply { 61 if strings.ToLower(string(args[0])) == "no" && 62 strings.ToLower(string(args[1])) == "one" { 63 server.slaveOfNone() 64 return protocol.MakeOkReply() 65 } 66 host := string(args[0]) 67 port, err := strconv.Atoi(string(args[1])) 68 if err != nil { 69 return protocol.MakeErrReply("ERR value is not an integer or out of range") 70 } 71 server.slaveStatus.mutex.Lock() 72 atomic.StoreInt32(&server.role, slaveRole) 73 server.slaveStatus.masterHost = host 74 server.slaveStatus.masterPort = port 75 atomic.AddInt32(&server.slaveStatus.configVersion, 1) 76 server.slaveStatus.mutex.Unlock() 77 go server.setupMaster() 78 return protocol.MakeOkReply() 79 } 80 81 func (server *Server) slaveOfNone() { 82 server.slaveStatus.mutex.Lock() 83 defer server.slaveStatus.mutex.Unlock() 84 server.slaveStatus.masterHost = "" 85 server.slaveStatus.masterPort = 0 86 server.slaveStatus.replId = "" 87 server.slaveStatus.replOffset = -1 88 server.slaveStatus.stopSlaveWithMutex() 89 server.role = masterRole 90 } 91 92 // stopSlaveWithMutex stops in-progress connectWithMaster/fullSync/receiveAOF 93 // invoker should have slaveStatus mutex 94 func (repl *slaveStatus) stopSlaveWithMutex() { 95 // update configVersion to stop connectWithMaster and fullSync 96 atomic.AddInt32(&repl.configVersion, 1) 97 // send cancel to receiveAOF 98 if repl.cancel != nil { 99 repl.cancel() 100 repl.running.Wait() 101 } 102 repl.ctx = context.Background() 103 repl.cancel = nil 104 if repl.masterConn != nil { 105 _ = repl.masterConn.Close() // parser.ParseStream will close masterChan 106 } 107 repl.masterConn = nil 108 repl.masterChan = nil 109 } 110 111 func (repl *slaveStatus) close() error { 112 repl.mutex.Lock() 113 defer repl.mutex.Unlock() 114 repl.stopSlaveWithMutex() 115 return nil 116 } 117 118 // setupMaster connects to master and starts full sync 119 func (server *Server) setupMaster() { 120 defer func() { 121 if err := recover(); err != nil { 122 logger.Error(err) 123 } 124 }() 125 var configVersion int32 126 ctx, cancel := context.WithCancel(context.Background()) 127 server.slaveStatus.mutex.Lock() 128 server.slaveStatus.ctx = ctx 129 server.slaveStatus.cancel = cancel 130 configVersion = server.slaveStatus.configVersion 131 server.slaveStatus.mutex.Unlock() 132 isFullReSync, err := server.connectWithMaster(configVersion) 133 if err != nil { 134 // connect failed, abort master 135 logger.Error(err) 136 server.slaveOfNone() 137 return 138 } 139 if isFullReSync { 140 err = server.loadMasterRDB(configVersion) 141 if err != nil { 142 // load failed, abort master 143 logger.Error(err) 144 server.slaveOfNone() 145 return 146 } 147 } 148 err = server.receiveAOF(ctx, configVersion) 149 if err != nil { 150 // full sync failed, abort 151 logger.Error(err) 152 return 153 } 154 } 155 156 // connectWithMaster finishes handshake with master 157 // returns: isFullReSync, error 158 func (server *Server) connectWithMaster(configVersion int32) (bool, error) { 159 addr := server.slaveStatus.masterHost + ":" + strconv.Itoa(server.slaveStatus.masterPort) 160 conn, err := net.Dial("tcp", addr) 161 if err != nil { 162 server.slaveOfNone() // abort 163 return false, errors.New("connect master failed " + err.Error()) 164 } 165 masterChan := parser.ParseStream(conn) 166 167 // ping 168 pingCmdLine := utils.ToCmdLine("ping") 169 pingReq := protocol.MakeMultiBulkReply(pingCmdLine) 170 _, err = conn.Write(pingReq.ToBytes()) 171 if err != nil { 172 return false, errors.New("send failed " + err.Error()) 173 } 174 pingResp := <-masterChan 175 if pingResp.Err != nil { 176 return false, errors.New("read response failed: " + pingResp.Err.Error()) 177 } 178 switch reply := pingResp.Data.(type) { 179 case *protocol.StandardErrReply: 180 if !strings.HasPrefix(reply.Error(), "NOAUTH") && 181 !strings.HasPrefix(reply.Error(), "NOPERM") && 182 !strings.HasPrefix(reply.Error(), "ERR operation not permitted") { 183 logger.Error("Error reply to PING from master: " + string(reply.ToBytes())) 184 server.slaveOfNone() // abort 185 return false, nil 186 } 187 } 188 189 // just to reduce duplication of code 190 sendCmdToMaster := func(conn net.Conn, cmdLine CmdLine, masterChan <-chan *parser.Payload) error { 191 req := protocol.MakeMultiBulkReply(cmdLine) 192 _, err := conn.Write(req.ToBytes()) 193 if err != nil { 194 server.slaveOfNone() // abort 195 return errors.New("send failed " + err.Error()) 196 } 197 resp := <-masterChan 198 if resp.Err != nil { 199 server.slaveOfNone() // abort 200 return errors.New("read response failed: " + resp.Err.Error()) 201 } 202 if !protocol.IsOKReply(resp.Data) { 203 server.slaveOfNone() // abort 204 return errors.New("unexpected auth response: " + string(resp.Data.ToBytes())) 205 } 206 return nil 207 } 208 209 // auth 210 if config.Properties.MasterAuth != "" { 211 authCmdLine := utils.ToCmdLine("auth", config.Properties.MasterAuth) 212 err = sendCmdToMaster(conn, authCmdLine, masterChan) 213 if err != nil { 214 return false, err 215 } 216 } 217 218 // announce port 219 var port int 220 if config.Properties.SlaveAnnouncePort != 0 { 221 port = config.Properties.SlaveAnnouncePort 222 } else { 223 port = config.Properties.Port 224 } 225 portCmdLine := utils.ToCmdLine("REPLCONF", "listening-port", strconv.Itoa(port)) 226 err = sendCmdToMaster(conn, portCmdLine, masterChan) 227 if err != nil { 228 return false, err 229 } 230 231 // announce ip 232 if config.Properties.SlaveAnnounceIP != "" { 233 ipCmdLine := utils.ToCmdLine("REPLCONF", "ip-address", config.Properties.SlaveAnnounceIP) 234 err = sendCmdToMaster(conn, ipCmdLine, masterChan) 235 if err != nil { 236 return false, err 237 } 238 } 239 240 // announce capacity 241 capaCmdLine := utils.ToCmdLine("REPLCONF", "capa", "psync2") 242 err = sendCmdToMaster(conn, capaCmdLine, masterChan) 243 if err != nil { 244 return false, err 245 } 246 247 // update connection 248 server.slaveStatus.mutex.Lock() 249 defer server.slaveStatus.mutex.Unlock() 250 if server.slaveStatus.configVersion != configVersion { 251 // slaveStatus conf changed during connecting and waiting mutex 252 return false, configChangedErr 253 } 254 server.slaveStatus.masterConn = conn 255 server.slaveStatus.masterChan = masterChan 256 server.slaveStatus.lastRecvTime = time.Now() 257 return server.psyncHandshake() 258 } 259 260 // psyncHandshake send `psync` to master and sync repl-id/offset with master 261 // invoker should provide with slaveStatus.mutex 262 func (server *Server) psyncHandshake() (bool, error) { 263 replId := "?" 264 var replOffset int64 = -1 265 if server.slaveStatus.replId != "" { 266 replId = server.slaveStatus.replId 267 replOffset = server.slaveStatus.replOffset 268 } 269 psyncCmdLine := utils.ToCmdLine("psync", replId, strconv.FormatInt(replOffset, 10)) 270 psyncReq := protocol.MakeMultiBulkReply(psyncCmdLine) 271 _, err := server.slaveStatus.masterConn.Write(psyncReq.ToBytes()) 272 if err != nil { 273 return false, errors.New("send failed " + err.Error()) 274 } 275 psyncPayload := <-server.slaveStatus.masterChan 276 if psyncPayload.Err != nil { 277 return false, errors.New("read response failed: " + psyncPayload.Err.Error()) 278 } 279 psyncHeader, ok := psyncPayload.Data.(*protocol.StatusReply) 280 if !ok { 281 return false, errors.New("illegal payload header not a status reply: " + string(psyncPayload.Data.ToBytes())) 282 } 283 headers := strings.Split(psyncHeader.Status, " ") 284 if len(headers) != 3 && len(headers) != 2 { 285 return false, errors.New("illegal payload header: " + psyncHeader.Status) 286 } 287 288 logger.Info("receive psync header from master") 289 var isFullReSync bool 290 if headers[0] == "FULLRESYNC" { 291 logger.Info("full re-sync with master") 292 server.slaveStatus.replId = headers[1] 293 server.slaveStatus.replOffset, err = strconv.ParseInt(headers[2], 10, 64) 294 isFullReSync = true 295 } else if headers[0] == "CONTINUE" { 296 logger.Info("continue partial sync") 297 server.slaveStatus.replId = headers[1] 298 isFullReSync = false 299 } else { 300 return false, errors.New("illegal psync resp: " + psyncHeader.Status) 301 } 302 303 if err != nil { 304 return false, errors.New("get illegal repl offset: " + headers[2]) 305 } 306 logger.Info(fmt.Sprintf("repl id: %s, current offset: %d", server.slaveStatus.replId, server.slaveStatus.replOffset)) 307 return isFullReSync, nil 308 } 309 310 func makeRdbLoader(upgradeAof bool) (*Server, string, error) { 311 rdbLoader := MakeAuxiliaryServer() 312 if !upgradeAof { 313 return rdbLoader, "", nil 314 } 315 // make aof handler to generate new aof file during loading rdb 316 newAofFile, err := ioutil.TempFile("", "*.aof") 317 if err != nil { 318 return nil, "", fmt.Errorf("create temp rdb failed: %v", err) 319 } 320 newAofFilename := newAofFile.Name() 321 aofHandler, err := NewPersister(rdbLoader, newAofFilename, false, aof.FsyncNo) 322 if err != nil { 323 return nil, "", err 324 } 325 rdbLoader.bindPersister(aofHandler) 326 return rdbLoader, newAofFilename, nil 327 } 328 329 // loadMasterRDB downloads rdb after handshake has been done 330 func (server *Server) loadMasterRDB(configVersion int32) error { 331 rdbPayload := <-server.slaveStatus.masterChan 332 if rdbPayload.Err != nil { 333 return errors.New("read response failed: " + rdbPayload.Err.Error()) 334 } 335 rdbReply, ok := rdbPayload.Data.(*protocol.BulkReply) 336 if !ok { 337 return errors.New("illegal payload header: " + string(rdbPayload.Data.ToBytes())) 338 } 339 340 logger.Info(fmt.Sprintf("receive %d bytes of rdb from master", len(rdbReply.Arg))) 341 rdbDec := rdb.NewDecoder(bytes.NewReader(rdbReply.Arg)) 342 343 rdbLoader, newAofFilename, err := makeRdbLoader(config.Properties.AppendOnly) 344 if err != nil { 345 return err 346 } 347 err = rdbLoader.LoadRDB(rdbDec) 348 if err != nil { 349 return errors.New("dump rdb failed: " + err.Error()) 350 } 351 352 server.slaveStatus.mutex.Lock() 353 defer server.slaveStatus.mutex.Unlock() 354 if server.slaveStatus.configVersion != configVersion { 355 // slaveStatus conf changed during connecting and waiting mutex 356 return configChangedErr 357 } 358 for i, h := range rdbLoader.dbSet { 359 newDB := h.Load().(*DB) 360 server.loadDB(i, newDB) 361 } 362 363 if config.Properties.AppendOnly { 364 // use new aof file 365 server.persister.Close() 366 err = os.Rename(newAofFilename, config.Properties.AppendFilename) 367 if err != nil { 368 return err 369 } 370 persister, err := NewPersister(server, config.Properties.AppendFilename, false, config.Properties.AppendFsync) 371 if err != nil { 372 return err 373 } 374 server.bindPersister(persister) 375 } 376 377 return nil 378 } 379 380 func (server *Server) receiveAOF(ctx context.Context, configVersion int32) error { 381 conn := connection.NewConn(server.slaveStatus.masterConn) 382 conn.SetMaster() 383 server.slaveStatus.running.Add(1) 384 defer server.slaveStatus.running.Done() 385 for { 386 select { 387 case payload, open := <-server.slaveStatus.masterChan: 388 if !open { 389 return errors.New("master channel unexpected close") 390 } 391 if payload.Err != nil { 392 return payload.Err 393 } 394 cmdLine, ok := payload.Data.(*protocol.MultiBulkReply) 395 if !ok { 396 return errors.New("unexpected payload: " + string(payload.Data.ToBytes())) 397 } 398 server.slaveStatus.mutex.Lock() 399 if server.slaveStatus.configVersion != configVersion { 400 // slaveStatus conf changed during connecting and waiting mutex 401 return configChangedErr 402 } 403 server.Exec(conn, cmdLine.Args) 404 n := len(cmdLine.ToBytes()) // todo: directly get size from socket 405 server.slaveStatus.replOffset += int64(n) 406 server.slaveStatus.lastRecvTime = time.Now() 407 logger.Info(fmt.Sprintf("receive %d bytes from master, current offset %d, %s", 408 n, server.slaveStatus.replOffset, strconv.Quote(string(cmdLine.ToBytes())))) 409 server.slaveStatus.mutex.Unlock() 410 case <-ctx.Done(): 411 _ = conn.Close() 412 return nil 413 } 414 } 415 } 416 417 func (server *Server) slaveCron() { 418 repl := server.slaveStatus 419 if repl.masterConn == nil { 420 return 421 } 422 423 // check master timeout 424 replTimeout := 60 * time.Second 425 if config.Properties.ReplTimeout != 0 { 426 replTimeout = time.Duration(config.Properties.ReplTimeout) * time.Second 427 } 428 minLastRecvTime := time.Now().Add(-replTimeout) 429 if repl.lastRecvTime.Before(minLastRecvTime) { 430 // reconnect with master 431 err := server.reconnectWithMaster() 432 if err != nil { 433 logger.Error("send failed " + err.Error()) 434 } 435 return 436 } 437 // send ack to master 438 err := repl.sendAck2Master() 439 if err != nil { 440 logger.Error("send failed " + err.Error()) 441 } 442 } 443 444 // Send a REPLCONF ACK command to the master to inform it about the current processed offset 445 func (repl *slaveStatus) sendAck2Master() error { 446 psyncCmdLine := utils.ToCmdLine("REPLCONF", "ACK", 447 strconv.FormatInt(repl.replOffset, 10)) 448 psyncReq := protocol.MakeMultiBulkReply(psyncCmdLine) 449 _, err := repl.masterConn.Write(psyncReq.ToBytes()) 450 // logger.Info("send ack to master") 451 return err 452 } 453 454 func (server *Server) reconnectWithMaster() error { 455 logger.Info("reconnecting with master") 456 server.slaveStatus.mutex.Lock() 457 defer server.slaveStatus.mutex.Unlock() 458 server.slaveStatus.stopSlaveWithMutex() 459 go server.setupMaster() 460 return nil 461 }