github.com/hdt3213/godis@v1.2.9/database/replication_master.go (about)

     1  package database
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"io/ioutil"
     8  	"os"
     9  	"strconv"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/hdt3213/godis/interface/redis"
    15  	"github.com/hdt3213/godis/lib/logger"
    16  	"github.com/hdt3213/godis/lib/sync/atomic"
    17  	"github.com/hdt3213/godis/lib/utils"
    18  	"github.com/hdt3213/godis/redis/protocol"
    19  )
    20  
    21  const (
    22  	slaveStateHandShake = uint8(iota)
    23  	slaveStateWaitSaveEnd
    24  	slaveStateSendingRDB
    25  	slaveStateOnline
    26  )
    27  
    28  const (
    29  	bgSaveIdle = uint8(iota)
    30  	bgSaveRunning
    31  	bgSaveFinish
    32  )
    33  
    34  const (
    35  	slaveCapacityNone = 0
    36  	slaveCapacityEOF  = 1 << iota
    37  	slaveCapacityPsync2
    38  )
    39  
    40  // slaveClient stores slave status in the view of master
    41  type slaveClient struct {
    42  	conn         redis.Connection
    43  	state        uint8
    44  	offset       int64
    45  	lastAckTime  time.Time
    46  	announceIp   string
    47  	announcePort int
    48  	capacity     uint8
    49  }
    50  
    51  // aofListener is currently only responsible for updating the backlog
    52  type replBacklog struct {
    53  	buf           []byte
    54  	beginOffset   int64
    55  	currentOffset int64
    56  }
    57  
    58  func (backlog *replBacklog) appendBytes(bin []byte) {
    59  	backlog.buf = append(backlog.buf, bin...)
    60  	backlog.currentOffset += int64(len(bin))
    61  }
    62  
    63  func (backlog *replBacklog) getSnapshot() ([]byte, int64) {
    64  	return backlog.buf[:], backlog.currentOffset
    65  }
    66  
    67  func (backlog *replBacklog) getSnapshotAfter(beginOffset int64) ([]byte, int64) {
    68  	beg := beginOffset - backlog.beginOffset
    69  	return backlog.buf[beg:], backlog.currentOffset
    70  }
    71  
    72  func (backlog *replBacklog) isValidOffset(offset int64) bool {
    73  	return offset >= backlog.beginOffset && offset < backlog.currentOffset
    74  }
    75  
    76  type masterStatus struct {
    77  	mu           sync.RWMutex
    78  	replId       string
    79  	backlog      *replBacklog
    80  	slaveMap     map[redis.Connection]*slaveClient
    81  	waitSlaves   map[*slaveClient]struct{}
    82  	onlineSlaves map[*slaveClient]struct{}
    83  	bgSaveState  uint8
    84  	rdbFilename  string
    85  	aofListener  *replAofListener
    86  	rewriting    atomic.Boolean
    87  }
    88  
    89  // bgSaveForReplication does bg-save and send rdb to waiting slaves
    90  func (server *Server) bgSaveForReplication() {
    91  	go func() {
    92  		defer func() {
    93  			if e := recover(); e != nil {
    94  				logger.Errorf("panic: %v", e)
    95  			}
    96  		}()
    97  		if err := server.saveForReplication(); err != nil {
    98  			logger.Errorf("save for replication error: %v", err)
    99  		}
   100  	}()
   101  
   102  }
   103  
   104  // saveForReplication does bg-save and send rdb to waiting slaves
   105  func (server *Server) saveForReplication() error {
   106  	rdbFile, err := ioutil.TempFile("", "*.rdb")
   107  	if err != nil {
   108  		return fmt.Errorf("create temp rdb failed: %v", err)
   109  	}
   110  	rdbFilename := rdbFile.Name()
   111  	server.masterStatus.mu.Lock()
   112  	server.masterStatus.bgSaveState = bgSaveRunning
   113  	server.masterStatus.rdbFilename = rdbFilename // todo: can reuse config.Properties.RDBFilename?
   114  	aofListener := &replAofListener{
   115  		mdb:     server,
   116  		backlog: server.masterStatus.backlog,
   117  	}
   118  	server.masterStatus.aofListener = aofListener
   119  	server.masterStatus.mu.Unlock()
   120  
   121  	err = server.persister.GenerateRDBForReplication(rdbFilename, aofListener, nil)
   122  	if err != nil {
   123  		return err
   124  	}
   125  	aofListener.readyToSend = true
   126  
   127  	// change bgSaveState and get waitSlaves for sending
   128  	waitSlaves := make(map[*slaveClient]struct{})
   129  	server.masterStatus.mu.Lock()
   130  	server.masterStatus.bgSaveState = bgSaveFinish
   131  	for slave := range server.masterStatus.waitSlaves {
   132  		waitSlaves[slave] = struct{}{}
   133  	}
   134  	server.masterStatus.waitSlaves = nil
   135  	server.masterStatus.mu.Unlock()
   136  
   137  	// send rdb to waiting slaves
   138  	for slave := range waitSlaves {
   139  		err = server.masterFullReSyncWithSlave(slave)
   140  		if err != nil {
   141  			server.removeSlave(slave)
   142  			logger.Errorf("masterFullReSyncWithSlave error: %v", err)
   143  			continue
   144  		}
   145  	}
   146  	return nil
   147  }
   148  
   149  func (server *Server) rewriteRDB() error {
   150  	rdbFile, err := ioutil.TempFile("", "*.rdb")
   151  	if err != nil {
   152  		return fmt.Errorf("create temp rdb failed: %v", err)
   153  	}
   154  	rdbFilename := rdbFile.Name()
   155  	newBacklog := &replBacklog{}
   156  	aofListener := &replAofListener{
   157  		backlog: newBacklog,
   158  		mdb:     server,
   159  	}
   160  	hook := func() {
   161  		// pausing aof first, then lock masterStatus.
   162  		// use the same order as replAofListener to avoid dead lock
   163  		server.masterStatus.mu.Lock()
   164  		defer server.masterStatus.mu.Unlock()
   165  		newBacklog.beginOffset = server.masterStatus.backlog.currentOffset
   166  	}
   167  	err = server.persister.GenerateRDBForReplication(rdbFilename, aofListener, hook)
   168  	if err != nil { // wait rdb result
   169  		return err
   170  	}
   171  	server.masterStatus.mu.Lock()
   172  	server.masterStatus.rdbFilename = rdbFilename
   173  	server.masterStatus.backlog = newBacklog
   174  	server.persister.RemoveListener(server.masterStatus.aofListener)
   175  	server.masterStatus.aofListener = aofListener
   176  	server.masterStatus.mu.Unlock()
   177  	// It is ok to know that new backlog is ready later, so we change readyToSend without sync
   178  	// But setting readyToSend=true must after new backlog is really ready (that means master.mu.Unlock)
   179  	aofListener.readyToSend = true
   180  	return nil
   181  }
   182  
   183  // masterFullReSyncWithSlave send replication header, rdb file and all backlogs to slave
   184  func (server *Server) masterFullReSyncWithSlave(slave *slaveClient) error {
   185  	// write replication header
   186  	header := "+FULLRESYNC " + server.masterStatus.replId + " " +
   187  		strconv.FormatInt(server.masterStatus.backlog.beginOffset, 10) + protocol.CRLF
   188  	_, err := slave.conn.Write([]byte(header))
   189  	if err != nil {
   190  		return fmt.Errorf("write replication header to slave failed: %v", err)
   191  	}
   192  	// send rdb
   193  	rdbFile, err := os.Open(server.masterStatus.rdbFilename)
   194  	if err != nil {
   195  		return fmt.Errorf("open rdb file %s for replication error: %v", server.masterStatus.rdbFilename, err)
   196  	}
   197  	slave.state = slaveStateSendingRDB
   198  	rdbInfo, _ := os.Stat(server.masterStatus.rdbFilename)
   199  	rdbSize := rdbInfo.Size()
   200  	rdbHeader := "$" + strconv.FormatInt(rdbSize, 10) + protocol.CRLF
   201  	_, err = slave.conn.Write([]byte(rdbHeader))
   202  	if err != nil {
   203  		return fmt.Errorf("write rdb header to slave failed: %v", err)
   204  	}
   205  	_, err = io.Copy(slave.conn, rdbFile)
   206  	if err != nil {
   207  		return fmt.Errorf("write rdb file to slave failed: %v", err)
   208  	}
   209  
   210  	// send backlog
   211  	server.masterStatus.mu.RLock()
   212  	backlog, currentOffset := server.masterStatus.backlog.getSnapshot()
   213  	server.masterStatus.mu.RUnlock()
   214  	_, err = slave.conn.Write(backlog)
   215  	if err != nil {
   216  		return fmt.Errorf("full resync write backlog to slave failed: %v", err)
   217  	}
   218  
   219  	// set slave as online
   220  	server.setSlaveOnline(slave, currentOffset)
   221  	return nil
   222  }
   223  
   224  var cannotPartialSync = errors.New("cannot do partial sync")
   225  
   226  func (server *Server) masterTryPartialSyncWithSlave(slave *slaveClient, replId string, slaveOffset int64) error {
   227  	server.masterStatus.mu.RLock()
   228  	if replId != server.masterStatus.replId {
   229  		server.masterStatus.mu.RUnlock()
   230  		return cannotPartialSync
   231  	}
   232  	if !server.masterStatus.backlog.isValidOffset(slaveOffset) {
   233  		server.masterStatus.mu.RUnlock()
   234  		return cannotPartialSync
   235  	}
   236  	backlog, currentOffset := server.masterStatus.backlog.getSnapshotAfter(slaveOffset)
   237  	server.masterStatus.mu.RUnlock()
   238  
   239  	// send replication header
   240  	header := "+CONTINUE " + server.masterStatus.replId + protocol.CRLF
   241  	_, err := slave.conn.Write([]byte(header))
   242  	if err != nil {
   243  		return fmt.Errorf("write replication header to slave failed: %v", err)
   244  	}
   245  	// send backlog
   246  	_, err = slave.conn.Write(backlog)
   247  	if err != nil {
   248  		return fmt.Errorf("partial resync write backlog to slave failed: %v", err)
   249  	}
   250  
   251  	// set slave online
   252  	server.setSlaveOnline(slave, currentOffset)
   253  	return nil
   254  }
   255  
   256  // masterSendUpdatesToSlave only sends data to online slaves after bgSave is finished
   257  // if bgSave is running, updates will be sent after the saving finished
   258  func (server *Server) masterSendUpdatesToSlave() error {
   259  	onlineSlaves := make(map[*slaveClient]struct{})
   260  	server.masterStatus.mu.RLock()
   261  	beginOffset := server.masterStatus.backlog.beginOffset
   262  	backlog, currentOffset := server.masterStatus.backlog.getSnapshot()
   263  	for slave := range server.masterStatus.onlineSlaves {
   264  		onlineSlaves[slave] = struct{}{}
   265  	}
   266  	server.masterStatus.mu.RUnlock()
   267  	for slave := range onlineSlaves {
   268  		slaveBeginOffset := slave.offset - beginOffset
   269  		_, err := slave.conn.Write(backlog[slaveBeginOffset:])
   270  		if err != nil {
   271  			logger.Errorf("send updates backlog to slave failed: %v", err)
   272  			server.removeSlave(slave)
   273  			continue
   274  		}
   275  		slave.offset = currentOffset
   276  	}
   277  	return nil
   278  }
   279  
   280  func (server *Server) execPSync(c redis.Connection, args [][]byte) redis.Reply {
   281  	replId := string(args[0])
   282  	replOffset, err := strconv.ParseInt(string(args[1]), 10, 64)
   283  	if err != nil {
   284  		return protocol.MakeErrReply("ERR value is not an integer or out of range")
   285  	}
   286  	server.masterStatus.mu.Lock()
   287  	defer server.masterStatus.mu.Unlock()
   288  	slave := server.masterStatus.slaveMap[c]
   289  	if slave == nil {
   290  		slave = &slaveClient{
   291  			conn: c,
   292  		}
   293  		c.SetSlave()
   294  		server.masterStatus.slaveMap[c] = slave
   295  	}
   296  	if server.masterStatus.bgSaveState == bgSaveIdle {
   297  		slave.state = slaveStateWaitSaveEnd
   298  		server.masterStatus.waitSlaves[slave] = struct{}{}
   299  		server.bgSaveForReplication()
   300  	} else if server.masterStatus.bgSaveState == bgSaveRunning {
   301  		slave.state = slaveStateWaitSaveEnd
   302  		server.masterStatus.waitSlaves[slave] = struct{}{}
   303  	} else if server.masterStatus.bgSaveState == bgSaveFinish {
   304  		go func() {
   305  			defer func() {
   306  				if e := recover(); e != nil {
   307  					logger.Errorf("panic: %v", e)
   308  				}
   309  			}()
   310  			err := server.masterTryPartialSyncWithSlave(slave, replId, replOffset)
   311  			if err == nil {
   312  				return
   313  			}
   314  			if err != nil && err != cannotPartialSync {
   315  				server.removeSlave(slave)
   316  				logger.Errorf("masterTryPartialSyncWithSlave error: %v", err)
   317  				return
   318  			}
   319  			// assert err == cannotPartialSync
   320  			if err := server.masterFullReSyncWithSlave(slave); err != nil {
   321  				server.removeSlave(slave)
   322  				logger.Errorf("masterFullReSyncWithSlave error: %v", err)
   323  				return
   324  			}
   325  		}()
   326  	}
   327  	return &protocol.NoReply{}
   328  }
   329  
   330  func (server *Server) execReplConf(c redis.Connection, args [][]byte) redis.Reply {
   331  	if len(args)%2 != 0 {
   332  		return protocol.MakeSyntaxErrReply()
   333  	}
   334  	server.masterStatus.mu.RLock()
   335  	slave := server.masterStatus.slaveMap[c]
   336  	server.masterStatus.mu.RUnlock()
   337  	for i := 0; i < len(args); i += 2 {
   338  		key := strings.ToLower(string(args[i]))
   339  		value := string(args[i+1])
   340  		switch key {
   341  		case "ack":
   342  			offset, err := strconv.ParseInt(value, 10, 64)
   343  			if err != nil {
   344  				return protocol.MakeErrReply("ERR value is not an integer or out of range")
   345  			}
   346  			slave.offset = offset
   347  			slave.lastAckTime = time.Now()
   348  			return &protocol.NoReply{}
   349  		}
   350  	}
   351  	return protocol.MakeOkReply()
   352  }
   353  
   354  func (server *Server) removeSlave(slave *slaveClient) {
   355  	server.masterStatus.mu.Lock()
   356  	defer server.masterStatus.mu.Unlock()
   357  	_ = slave.conn.Close()
   358  	delete(server.masterStatus.slaveMap, slave.conn)
   359  	delete(server.masterStatus.waitSlaves, slave)
   360  	delete(server.masterStatus.onlineSlaves, slave)
   361  	logger.Info("disconnect with slave " + slave.conn.Name())
   362  }
   363  
   364  func (server *Server) setSlaveOnline(slave *slaveClient, currentOffset int64) {
   365  	server.masterStatus.mu.Lock()
   366  	defer server.masterStatus.mu.Unlock()
   367  	slave.state = slaveStateOnline
   368  	slave.offset = currentOffset
   369  	server.masterStatus.onlineSlaves[slave] = struct{}{}
   370  }
   371  
   372  var pingBytes = protocol.MakeMultiBulkReply(utils.ToCmdLine("ping")).ToBytes()
   373  
   374  const maxBacklogSize = 10 * 1024 * 1024 // 10MB
   375  
   376  func (server *Server) masterCron() {
   377  	server.masterStatus.mu.Lock()
   378  	if len(server.masterStatus.slaveMap) == 0 { // no slaves, do nothing
   379  		server.masterStatus.mu.Unlock()
   380  		return
   381  	}
   382  	if server.masterStatus.bgSaveState == bgSaveFinish {
   383  		server.masterStatus.backlog.appendBytes(pingBytes)
   384  	}
   385  	backlogSize := len(server.masterStatus.backlog.buf)
   386  	server.masterStatus.mu.Unlock()
   387  	if err := server.masterSendUpdatesToSlave(); err != nil {
   388  		logger.Errorf("masterSendUpdatesToSlave error: %v", err)
   389  	}
   390  	if backlogSize > maxBacklogSize && !server.masterStatus.rewriting.Get() {
   391  		go func() {
   392  			server.masterStatus.rewriting.Set(true)
   393  			defer server.masterStatus.rewriting.Set(false)
   394  			if err := server.rewriteRDB(); err != nil {
   395  				server.masterStatus.rewriting.Set(false)
   396  				logger.Errorf("rewrite error: %v", err)
   397  			}
   398  		}()
   399  	}
   400  }
   401  
   402  // replAofListener is an implementation for aof.Listener
   403  type replAofListener struct {
   404  	mdb         *Server
   405  	backlog     *replBacklog // may NOT be mdb.masterStatus.backlog
   406  	readyToSend bool
   407  }
   408  
   409  func (listener *replAofListener) Callback(cmdLines []CmdLine) {
   410  	listener.mdb.masterStatus.mu.Lock()
   411  	for _, cmdLine := range cmdLines {
   412  		reply := protocol.MakeMultiBulkReply(cmdLine)
   413  		listener.backlog.appendBytes(reply.ToBytes())
   414  	}
   415  	listener.mdb.masterStatus.mu.Unlock()
   416  	// listener could receive updates generated during rdb saving in progress
   417  	// Do not send updates to slave before rdb saving is finished
   418  	if listener.readyToSend {
   419  		if err := listener.mdb.masterSendUpdatesToSlave(); err != nil {
   420  			logger.Errorf("masterSendUpdatesToSlave after receive aof error: %v", err)
   421  		}
   422  	}
   423  }
   424  
   425  func (server *Server) initMaster() {
   426  	server.masterStatus = &masterStatus{
   427  		mu:           sync.RWMutex{},
   428  		replId:       utils.RandHexString(40),
   429  		backlog:      &replBacklog{},
   430  		slaveMap:     make(map[redis.Connection]*slaveClient),
   431  		waitSlaves:   make(map[*slaveClient]struct{}),
   432  		onlineSlaves: make(map[*slaveClient]struct{}),
   433  		bgSaveState:  bgSaveIdle,
   434  		rdbFilename:  "",
   435  	}
   436  }
   437  
   438  func (server *Server) stopMaster() {
   439  	server.masterStatus.mu.Lock()
   440  	defer server.masterStatus.mu.Unlock()
   441  
   442  	// disconnect with slave
   443  	for _, slave := range server.masterStatus.slaveMap {
   444  		_ = slave.conn.Close()
   445  		delete(server.masterStatus.slaveMap, slave.conn)
   446  		delete(server.masterStatus.waitSlaves, slave)
   447  		delete(server.masterStatus.onlineSlaves, slave)
   448  	}
   449  
   450  	// clean master status
   451  	if server.persister != nil {
   452  		server.persister.RemoveListener(server.masterStatus.aofListener)
   453  	}
   454  	_ = os.Remove(server.masterStatus.rdbFilename)
   455  	server.masterStatus.rdbFilename = ""
   456  	server.masterStatus.replId = ""
   457  	server.masterStatus.backlog = &replBacklog{}
   458  	server.masterStatus.slaveMap = make(map[redis.Connection]*slaveClient)
   459  	server.masterStatus.waitSlaves = make(map[*slaveClient]struct{})
   460  	server.masterStatus.onlineSlaves = make(map[*slaveClient]struct{})
   461  	server.masterStatus.bgSaveState = bgSaveIdle
   462  }