github.com/hdt3213/godis@v1.2.9/database/replication_slave.go (about)

     1  package database
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"errors"
     7  	"fmt"
     8  	"io/ioutil"
     9  	"net"
    10  	"os"
    11  	"strconv"
    12  	"strings"
    13  	"sync"
    14  	"sync/atomic"
    15  	"time"
    16  
    17  	"github.com/hdt3213/godis/aof"
    18  	"github.com/hdt3213/godis/config"
    19  	"github.com/hdt3213/godis/interface/redis"
    20  	"github.com/hdt3213/godis/lib/logger"
    21  	rdb "github.com/hdt3213/rdb/parser"
    22  	"github.com/hdt3213/godis/lib/utils"
    23  	"github.com/hdt3213/godis/redis/connection"
    24  	"github.com/hdt3213/godis/redis/parser"
    25  	"github.com/hdt3213/godis/redis/protocol"
    26  )
    27  
    28  const (
    29  	masterRole = iota
    30  	slaveRole
    31  )
    32  
    33  type slaveStatus struct {
    34  	mutex  sync.Mutex
    35  	ctx    context.Context
    36  	cancel context.CancelFunc
    37  
    38  	// configVersion stands for the version of slaveStatus config. Any change of master host/port will cause configVersion increment
    39  	// If configVersion change has been found during slaveStatus current slaveStatus procedure will stop.
    40  	// It is designed to abort a running slaveStatus procedure
    41  	configVersion int32
    42  
    43  	masterHost string
    44  	masterPort int
    45  
    46  	masterConn   net.Conn
    47  	masterChan   <-chan *parser.Payload
    48  	replId       string
    49  	replOffset   int64
    50  	lastRecvTime time.Time
    51  	running      sync.WaitGroup
    52  }
    53  
    54  var configChangedErr = errors.New("slaveStatus config changed")
    55  
    56  func initReplSlaveStatus() *slaveStatus {
    57  	return &slaveStatus{}
    58  }
    59  
    60  func (server *Server) execSlaveOf(c redis.Connection, args [][]byte) redis.Reply {
    61  	if strings.ToLower(string(args[0])) == "no" &&
    62  		strings.ToLower(string(args[1])) == "one" {
    63  		server.slaveOfNone()
    64  		return protocol.MakeOkReply()
    65  	}
    66  	host := string(args[0])
    67  	port, err := strconv.Atoi(string(args[1]))
    68  	if err != nil {
    69  		return protocol.MakeErrReply("ERR value is not an integer or out of range")
    70  	}
    71  	server.slaveStatus.mutex.Lock()
    72  	atomic.StoreInt32(&server.role, slaveRole)
    73  	server.slaveStatus.masterHost = host
    74  	server.slaveStatus.masterPort = port
    75  	atomic.AddInt32(&server.slaveStatus.configVersion, 1)
    76  	server.slaveStatus.mutex.Unlock()
    77  	go server.setupMaster()
    78  	return protocol.MakeOkReply()
    79  }
    80  
    81  func (server *Server) slaveOfNone() {
    82  	server.slaveStatus.mutex.Lock()
    83  	defer server.slaveStatus.mutex.Unlock()
    84  	server.slaveStatus.masterHost = ""
    85  	server.slaveStatus.masterPort = 0
    86  	server.slaveStatus.replId = ""
    87  	server.slaveStatus.replOffset = -1
    88  	server.slaveStatus.stopSlaveWithMutex()
    89  	server.role = masterRole
    90  }
    91  
    92  // stopSlaveWithMutex stops in-progress connectWithMaster/fullSync/receiveAOF
    93  // invoker should have slaveStatus mutex
    94  func (repl *slaveStatus) stopSlaveWithMutex() {
    95  	// update configVersion to stop connectWithMaster and fullSync
    96  	atomic.AddInt32(&repl.configVersion, 1)
    97  	// send cancel to receiveAOF
    98  	if repl.cancel != nil {
    99  		repl.cancel()
   100  		repl.running.Wait()
   101  	}
   102  	repl.ctx = context.Background()
   103  	repl.cancel = nil
   104  	if repl.masterConn != nil {
   105  		_ = repl.masterConn.Close() // parser.ParseStream will close masterChan
   106  	}
   107  	repl.masterConn = nil
   108  	repl.masterChan = nil
   109  }
   110  
   111  func (repl *slaveStatus) close() error {
   112  	repl.mutex.Lock()
   113  	defer repl.mutex.Unlock()
   114  	repl.stopSlaveWithMutex()
   115  	return nil
   116  }
   117  
   118  // setupMaster connects to master and starts full sync
   119  func (server *Server) setupMaster() {
   120  	defer func() {
   121  		if err := recover(); err != nil {
   122  			logger.Error(err)
   123  		}
   124  	}()
   125  	var configVersion int32
   126  	ctx, cancel := context.WithCancel(context.Background())
   127  	server.slaveStatus.mutex.Lock()
   128  	server.slaveStatus.ctx = ctx
   129  	server.slaveStatus.cancel = cancel
   130  	configVersion = server.slaveStatus.configVersion
   131  	server.slaveStatus.mutex.Unlock()
   132  	isFullReSync, err := server.connectWithMaster(configVersion)
   133  	if err != nil {
   134  		// connect failed, abort master
   135  		logger.Error(err)
   136  		server.slaveOfNone()
   137  		return
   138  	}
   139  	if isFullReSync {
   140  		err = server.loadMasterRDB(configVersion)
   141  		if err != nil {
   142  			// load failed, abort master
   143  			logger.Error(err)
   144  			server.slaveOfNone()
   145  			return
   146  		}
   147  	}
   148  	err = server.receiveAOF(ctx, configVersion)
   149  	if err != nil {
   150  		// full sync failed, abort
   151  		logger.Error(err)
   152  		return
   153  	}
   154  }
   155  
   156  // connectWithMaster finishes handshake with master
   157  // returns: isFullReSync, error
   158  func (server *Server) connectWithMaster(configVersion int32) (bool, error) {
   159  	addr := server.slaveStatus.masterHost + ":" + strconv.Itoa(server.slaveStatus.masterPort)
   160  	conn, err := net.Dial("tcp", addr)
   161  	if err != nil {
   162  		server.slaveOfNone() // abort
   163  		return false, errors.New("connect master failed " + err.Error())
   164  	}
   165  	masterChan := parser.ParseStream(conn)
   166  
   167  	// ping
   168  	pingCmdLine := utils.ToCmdLine("ping")
   169  	pingReq := protocol.MakeMultiBulkReply(pingCmdLine)
   170  	_, err = conn.Write(pingReq.ToBytes())
   171  	if err != nil {
   172  		return false, errors.New("send failed " + err.Error())
   173  	}
   174  	pingResp := <-masterChan
   175  	if pingResp.Err != nil {
   176  		return false, errors.New("read response failed: " + pingResp.Err.Error())
   177  	}
   178  	switch reply := pingResp.Data.(type) {
   179  	case *protocol.StandardErrReply:
   180  		if !strings.HasPrefix(reply.Error(), "NOAUTH") &&
   181  			!strings.HasPrefix(reply.Error(), "NOPERM") &&
   182  			!strings.HasPrefix(reply.Error(), "ERR operation not permitted") {
   183  			logger.Error("Error reply to PING from master: " + string(reply.ToBytes()))
   184  			server.slaveOfNone() // abort
   185  			return false, nil
   186  		}
   187  	}
   188  
   189  	// just to reduce duplication of code
   190  	sendCmdToMaster := func(conn net.Conn, cmdLine CmdLine, masterChan <-chan *parser.Payload) error {
   191  		req := protocol.MakeMultiBulkReply(cmdLine)
   192  		_, err := conn.Write(req.ToBytes())
   193  		if err != nil {
   194  			server.slaveOfNone() // abort
   195  			return errors.New("send failed " + err.Error())
   196  		}
   197  		resp := <-masterChan
   198  		if resp.Err != nil {
   199  			server.slaveOfNone() // abort
   200  			return errors.New("read response failed: " + resp.Err.Error())
   201  		}
   202  		if !protocol.IsOKReply(resp.Data) {
   203  			server.slaveOfNone() // abort
   204  			return errors.New("unexpected auth response: " + string(resp.Data.ToBytes()))
   205  		}
   206  		return nil
   207  	}
   208  
   209  	// auth
   210  	if config.Properties.MasterAuth != "" {
   211  		authCmdLine := utils.ToCmdLine("auth", config.Properties.MasterAuth)
   212  		err = sendCmdToMaster(conn, authCmdLine, masterChan)
   213  		if err != nil {
   214  			return false, err
   215  		}
   216  	}
   217  
   218  	// announce port
   219  	var port int
   220  	if config.Properties.SlaveAnnouncePort != 0 {
   221  		port = config.Properties.SlaveAnnouncePort
   222  	} else {
   223  		port = config.Properties.Port
   224  	}
   225  	portCmdLine := utils.ToCmdLine("REPLCONF", "listening-port", strconv.Itoa(port))
   226  	err = sendCmdToMaster(conn, portCmdLine, masterChan)
   227  	if err != nil {
   228  		return false, err
   229  	}
   230  
   231  	// announce ip
   232  	if config.Properties.SlaveAnnounceIP != "" {
   233  		ipCmdLine := utils.ToCmdLine("REPLCONF", "ip-address", config.Properties.SlaveAnnounceIP)
   234  		err = sendCmdToMaster(conn, ipCmdLine, masterChan)
   235  		if err != nil {
   236  			return false, err
   237  		}
   238  	}
   239  
   240  	// announce capacity
   241  	capaCmdLine := utils.ToCmdLine("REPLCONF", "capa", "psync2")
   242  	err = sendCmdToMaster(conn, capaCmdLine, masterChan)
   243  	if err != nil {
   244  		return false, err
   245  	}
   246  
   247  	// update connection
   248  	server.slaveStatus.mutex.Lock()
   249  	defer server.slaveStatus.mutex.Unlock()
   250  	if server.slaveStatus.configVersion != configVersion {
   251  		// slaveStatus conf changed during connecting and waiting mutex
   252  		return false, configChangedErr
   253  	}
   254  	server.slaveStatus.masterConn = conn
   255  	server.slaveStatus.masterChan = masterChan
   256  	server.slaveStatus.lastRecvTime = time.Now()
   257  	return server.psyncHandshake()
   258  }
   259  
   260  // psyncHandshake send `psync` to master and sync repl-id/offset with master
   261  // invoker should provide with slaveStatus.mutex
   262  func (server *Server) psyncHandshake() (bool, error) {
   263  	replId := "?"
   264  	var replOffset int64 = -1
   265  	if server.slaveStatus.replId != "" {
   266  		replId = server.slaveStatus.replId
   267  		replOffset = server.slaveStatus.replOffset
   268  	}
   269  	psyncCmdLine := utils.ToCmdLine("psync", replId, strconv.FormatInt(replOffset, 10))
   270  	psyncReq := protocol.MakeMultiBulkReply(psyncCmdLine)
   271  	_, err := server.slaveStatus.masterConn.Write(psyncReq.ToBytes())
   272  	if err != nil {
   273  		return false, errors.New("send failed " + err.Error())
   274  	}
   275  	psyncPayload := <-server.slaveStatus.masterChan
   276  	if psyncPayload.Err != nil {
   277  		return false, errors.New("read response failed: " + psyncPayload.Err.Error())
   278  	}
   279  	psyncHeader, ok := psyncPayload.Data.(*protocol.StatusReply)
   280  	if !ok {
   281  		return false, errors.New("illegal payload header not a status reply: " + string(psyncPayload.Data.ToBytes()))
   282  	}
   283  	headers := strings.Split(psyncHeader.Status, " ")
   284  	if len(headers) != 3 && len(headers) != 2 {
   285  		return false, errors.New("illegal payload header: " + psyncHeader.Status)
   286  	}
   287  
   288  	logger.Info("receive psync header from master")
   289  	var isFullReSync bool
   290  	if headers[0] == "FULLRESYNC" {
   291  		logger.Info("full re-sync with master")
   292  		server.slaveStatus.replId = headers[1]
   293  		server.slaveStatus.replOffset, err = strconv.ParseInt(headers[2], 10, 64)
   294  		isFullReSync = true
   295  	} else if headers[0] == "CONTINUE" {
   296  		logger.Info("continue partial sync")
   297  		server.slaveStatus.replId = headers[1]
   298  		isFullReSync = false
   299  	} else {
   300  		return false, errors.New("illegal psync resp: " + psyncHeader.Status)
   301  	}
   302  
   303  	if err != nil {
   304  		return false, errors.New("get illegal repl offset: " + headers[2])
   305  	}
   306  	logger.Info(fmt.Sprintf("repl id: %s, current offset: %d", server.slaveStatus.replId, server.slaveStatus.replOffset))
   307  	return isFullReSync, nil
   308  }
   309  
   310  func makeRdbLoader(upgradeAof bool) (*Server, string, error) {
   311  	rdbLoader := MakeAuxiliaryServer()
   312  	if !upgradeAof {
   313  		return rdbLoader, "", nil
   314  	}
   315  	// make aof handler to generate new aof file during loading rdb
   316  	newAofFile, err := ioutil.TempFile("", "*.aof")
   317  	if err != nil {
   318  		return nil, "", fmt.Errorf("create temp rdb failed: %v", err)
   319  	}
   320  	newAofFilename := newAofFile.Name()
   321  	aofHandler, err := NewPersister(rdbLoader, newAofFilename, false, aof.FsyncNo)
   322  	if err != nil {
   323  		return nil, "", err
   324  	}
   325  	rdbLoader.bindPersister(aofHandler)
   326  	return rdbLoader, newAofFilename, nil
   327  }
   328  
   329  // loadMasterRDB downloads rdb after handshake has been done
   330  func (server *Server) loadMasterRDB(configVersion int32) error {
   331  	rdbPayload := <-server.slaveStatus.masterChan
   332  	if rdbPayload.Err != nil {
   333  		return errors.New("read response failed: " + rdbPayload.Err.Error())
   334  	}
   335  	rdbReply, ok := rdbPayload.Data.(*protocol.BulkReply)
   336  	if !ok {
   337  		return errors.New("illegal payload header: " + string(rdbPayload.Data.ToBytes()))
   338  	}
   339  
   340  	logger.Info(fmt.Sprintf("receive %d bytes of rdb from master", len(rdbReply.Arg)))
   341  	rdbDec := rdb.NewDecoder(bytes.NewReader(rdbReply.Arg))
   342  
   343  	rdbLoader, newAofFilename, err := makeRdbLoader(config.Properties.AppendOnly)
   344  	if err != nil {
   345  		return err
   346  	}
   347  	err = rdbLoader.LoadRDB(rdbDec)
   348  	if err != nil {
   349  		return errors.New("dump rdb failed: " + err.Error())
   350  	}
   351  
   352  	server.slaveStatus.mutex.Lock()
   353  	defer server.slaveStatus.mutex.Unlock()
   354  	if server.slaveStatus.configVersion != configVersion {
   355  		// slaveStatus conf changed during connecting and waiting mutex
   356  		return configChangedErr
   357  	}
   358  	for i, h := range rdbLoader.dbSet {
   359  		newDB := h.Load().(*DB)
   360  		server.loadDB(i, newDB)
   361  	}
   362  
   363  	if config.Properties.AppendOnly {
   364  		// use new aof file
   365  		server.persister.Close()
   366  		err = os.Rename(newAofFilename, config.Properties.AppendFilename)
   367  		if err != nil {
   368  			return err
   369  		}
   370  		persister, err := NewPersister(server, config.Properties.AppendFilename, false, config.Properties.AppendFsync)
   371  		if err != nil {
   372  			return err
   373  		}
   374  		server.bindPersister(persister)
   375  	}
   376  
   377  	return nil
   378  }
   379  
   380  func (server *Server) receiveAOF(ctx context.Context, configVersion int32) error {
   381  	conn := connection.NewConn(server.slaveStatus.masterConn)
   382  	conn.SetMaster()
   383  	server.slaveStatus.running.Add(1)
   384  	defer server.slaveStatus.running.Done()
   385  	for {
   386  		select {
   387  		case payload, open := <-server.slaveStatus.masterChan:
   388  			if !open {
   389  				return errors.New("master channel unexpected close")
   390  			}
   391  			if payload.Err != nil {
   392  				return payload.Err
   393  			}
   394  			cmdLine, ok := payload.Data.(*protocol.MultiBulkReply)
   395  			if !ok {
   396  				return errors.New("unexpected payload: " + string(payload.Data.ToBytes()))
   397  			}
   398  			server.slaveStatus.mutex.Lock()
   399  			if server.slaveStatus.configVersion != configVersion {
   400  				// slaveStatus conf changed during connecting and waiting mutex
   401  				return configChangedErr
   402  			}
   403  			server.Exec(conn, cmdLine.Args)
   404  			n := len(cmdLine.ToBytes()) // todo: directly get size from socket
   405  			server.slaveStatus.replOffset += int64(n)
   406  			server.slaveStatus.lastRecvTime = time.Now()
   407  			logger.Info(fmt.Sprintf("receive %d bytes from master, current offset %d, %s",
   408  				n, server.slaveStatus.replOffset, strconv.Quote(string(cmdLine.ToBytes()))))
   409  			server.slaveStatus.mutex.Unlock()
   410  		case <-ctx.Done():
   411  			_ = conn.Close()
   412  			return nil
   413  		}
   414  	}
   415  }
   416  
   417  func (server *Server) slaveCron() {
   418  	repl := server.slaveStatus
   419  	if repl.masterConn == nil {
   420  		return
   421  	}
   422  
   423  	// check master timeout
   424  	replTimeout := 60 * time.Second
   425  	if config.Properties.ReplTimeout != 0 {
   426  		replTimeout = time.Duration(config.Properties.ReplTimeout) * time.Second
   427  	}
   428  	minLastRecvTime := time.Now().Add(-replTimeout)
   429  	if repl.lastRecvTime.Before(minLastRecvTime) {
   430  		// reconnect with master
   431  		err := server.reconnectWithMaster()
   432  		if err != nil {
   433  			logger.Error("send failed " + err.Error())
   434  		}
   435  		return
   436  	}
   437  	// send ack to master
   438  	err := repl.sendAck2Master()
   439  	if err != nil {
   440  		logger.Error("send failed " + err.Error())
   441  	}
   442  }
   443  
   444  // Send a REPLCONF ACK command to the master to inform it about the current processed offset
   445  func (repl *slaveStatus) sendAck2Master() error {
   446  	psyncCmdLine := utils.ToCmdLine("REPLCONF", "ACK",
   447  		strconv.FormatInt(repl.replOffset, 10))
   448  	psyncReq := protocol.MakeMultiBulkReply(psyncCmdLine)
   449  	_, err := repl.masterConn.Write(psyncReq.ToBytes())
   450  	// logger.Info("send ack to master")
   451  	return err
   452  }
   453  
   454  func (server *Server) reconnectWithMaster() error {
   455  	logger.Info("reconnecting with master")
   456  	server.slaveStatus.mutex.Lock()
   457  	defer server.slaveStatus.mutex.Unlock()
   458  	server.slaveStatus.stopSlaveWithMutex()
   459  	go server.setupMaster()
   460  	return nil
   461  }