
     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    15  package logservice
    17  import (
    18  	"fmt"
    19  	"path/filepath"
    20  	"sort"
    21  	"sync"
    23  	""
    24  	""
    25  )
    27  const (
    28  	defaultExportedDirMode = 0755
    29  	snapshotPathPattern    = "snapshot-%016X"
    30  )
    32  // ISnapshotManager is an interface that managers snapshots.
    33  type ISnapshotManager interface {
    34  	// Init initialize snapshots by loading exported snapshots.
    35  	Init(shardID uint64, replicaID uint64) error
    36  	// Count returns the number of snapshots in the manager.
    37  	Count(shardID uint64, replicaID uint64) int
    38  	// Add adds a new snapshot for specified shard.
    39  	Add(shardID uint64, replicaID uint64, index uint64) error
    40  	// Remove removes snapshots whose index is LE than index.
    41  	Remove(shardID uint64, replicaID uint64, index uint64) error
    42  	// EvalImportSnapshot returns the source directory and index of
    43  	// the biggest snapshot of the shard.
    44  	EvalImportSnapshot(shardID uint64, replicaID uint64, index uint64) (string, uint64)
    45  }
    47  // ISnapshotItem is an interface that represents a snapshot item.
    48  type ISnapshotItem interface {
    49  	// Exists returns if the snapshot item exists.
    50  	Exists() bool
    51  	// Remove removes the snapshot item.
    52  	Remove() error
    53  	// Valid check the legality of the snapshot item. Only the names of
    54  	// the files in it are checked.
    55  	Valid() (bool, error)
    56  }
    58  // snapshotIndex is type indicates the index of snapshot.
    59  type snapshotIndex uint64
    61  // nodeID contains shardID and replicaID.
    62  type nodeID struct {
    63  	shardID   uint64
    64  	replicaID uint64
    65  }
    67  // snapshotItem represents a snapshot item, with index and directory in it.
    68  type snapshotItem struct {
    69  	fs    vfs.FS
    70  	index snapshotIndex
    71  	dir   string
    72  }
    74  var snapshotItemPool = sync.Pool{
    75  	New: func() interface{} {
    76  		return new(snapshotItem)
    77  	},
    78  }
    80  func getSnapshotItem(si snapshotItem) *snapshotItem {
    81  	item := snapshotItemPool.Get().(*snapshotItem)
    82  	*item = si
    83  	return item
    84  }
    86  func putSnapshotItem(item *snapshotItem) {
    87  	*item = snapshotItem{}
    88  	snapshotItemPool.Put(item)
    89  }
    91  // Exists implements the ISnapshotItem interface.
    92  func (si *snapshotItem) Exists() bool {
    93  	_, err := si.fs.Stat(si.dir)
    94  	return err == nil
    95  }
    97  // Remove implements the ISnapshotItem interface.
    98  func (si *snapshotItem) Remove() error {
    99  	return si.fs.RemoveAll(si.dir)
   100  }
   102  // Valid implements the ISnapshotItem interface.
   103  func (si *snapshotItem) Valid() (bool, error) {
   104  	names, err := si.fs.List(si.dir)
   105  	if err != nil {
   106  		return false, err
   107  	}
   108  	if len(names) != 2 {
   109  		return false, moerr.NewInternalErrorNoCtx("file number is not correct: %d", len(names))
   110  	}
   111  	sort.Strings(names)
   112  	var index uint64
   113  	_, err = fmt.Sscanf(names[0], snapshotPathPattern+".gbsnap", &index)
   114  	if err != nil {
   115  		return false, err
   116  	}
   117  	if snapshotIndex(index) != si.index {
   118  		return false, moerr.NewInternalErrorNoCtx("index of dir %d and file %d are different",
   119  			si.index, index)
   120  	}
   121  	if names[1] != "snapshot.metadata" {
   122  		return false, moerr.NewInternalErrorNoCtx("no snapshot.metadata file")
   123  	}
   124  	return true, nil
   125  }
   127  // snapshotRecord contains snapshot items for the specified shard.
   128  type snapshotRecord struct {
   129  	fs     vfs.FS
   130  	nodeID nodeID
   131  	// items are sorted by .index
   132  	items []*snapshotItem
   133  }
   135  func newNodeSnapshot(fs vfs.FS, nodeID nodeID) *snapshotRecord {
   136  	return &snapshotRecord{
   137  		fs:     fs,
   138  		nodeID: nodeID,
   139  		items:  make([]*snapshotItem, 0),
   140  	}
   141  }
   143  func (ss *snapshotRecord) first() *snapshotItem {
   144  	if len(ss.items) == 0 {
   145  		return nil
   146  	}
   147  	return ss.items[0]
   148  }
   150  func (ss *snapshotRecord) last() *snapshotItem {
   151  	l := len(ss.items)
   152  	if l == 0 {
   153  		return nil
   154  	}
   155  	return ss.items[l-1]
   156  }
   158  func (ss *snapshotRecord) add(index snapshotIndex, dir string) error {
   159  	last := ss.last()
   160  	if last != nil && index < last.index {
   161  		return moerr.NewInternalErrorNoCtx("snapshot with smaller index %d than current biggest one %d",
   162  			index, last.index)
   163  	}
   164  	si := getSnapshotItem(snapshotItem{fs: ss.fs, index: index, dir: dir})
   165  	if !si.Exists() {
   166  		return moerr.NewInternalErrorNoCtx("snapshot file does not exist for shard-replica %d-%d, index %d, dir %s",
   167  			ss.nodeID.shardID, ss.nodeID.replicaID, index, dir)
   168  	}
   169  	v, err := si.Valid()
   170  	if err != nil {
   171  		return err
   172  	}
   173  	if v {
   174  		ss.items = append(ss.items, si)
   175  	}
   176  	return nil
   177  }
   179  // removeFirst removes the first snapshot from snapshot item list.
   180  func (ss *snapshotRecord) removeFirst() error {
   181  	if first := ss.first(); first != nil {
   182  		if err := first.Remove(); err != nil {
   183  			return err
   184  		}
   185  	}
   186  	putSnapshotItem(ss.items[0])
   187  	ss.items = ss.items[1:]
   188  	return nil
   189  }
   191  // remove the snapshots whose index is LE than the index.
   192  func (ss *snapshotRecord) remove(index snapshotIndex) error {
   193  	items := make([]*snapshotItem, len(ss.items))
   194  	copy(items, ss.items)
   195  	for _, si := range items {
   196  		if si.index <= index {
   197  			if err := ss.removeFirst(); err != nil {
   198  				return err
   199  			}
   200  		} else {
   201  			break
   202  		}
   203  	}
   204  	return nil
   205  }
   207  // snapshotManager manages the exported snapshots created by dragonboat.
   208  // In dragonboat, snapshot are taken with method SyncRequestSnapshot,
   209  // which accepts a snapshot option. By default, a new snapshot will be
   210  // taken at the applied index and log entries are removed at the LSN
   211  // parameter which is in snapshot option. LSN must be less than the applied
   212  // index. This works in normal state machine.
   213  // Unfortunately, there are two separate state machines in MO. One is
   214  // in logservice and the other is in DN. The snapshot cannot be taken
   215  // like that in this case, because when a replica starts, it applies the
   216  // latest snapshot, whose index is greater than truncate LSN. As a result,
   217  // the TN cannot read the log entries between truncate LSN and snapshot
   218  // index to replay.
   219  // The solution is, set Exported to true in snapshot option. This prevents
   220  // taking an active snapshot, and just export the snapshot files to an external
   221  // directory. When truncate LSN is greater than any snapshot index in the
   222  // external directory, the snapshot would be imported to system. And then,
   223  // the log entries can be removed safely.
   224  type snapshotManager struct {
   225  	cfg       *Config
   226  	snapshots map[nodeID]*snapshotRecord // shardID => *snapshotRecord
   227  }
   229  // newSnapshotManager makes a new snapshot.
   230  func newSnapshotManager(cfg *Config) *snapshotManager {
   231  	return &snapshotManager{
   232  		cfg:       cfg,
   233  		snapshots: make(map[nodeID]*snapshotRecord),
   234  	}
   235  }
   237  func (sm *snapshotManager) exportPath(shardID uint64, replicaID uint64) string {
   238  	parts := make([]string, 3)
   239  	dir := sm.cfg.SnapshotExportDir
   240  	shardPart := fmt.Sprintf("shard-%d", shardID)
   241  	replicaPart := fmt.Sprintf("replica-%d", replicaID)
   242  	parts = append(parts, dir, shardPart, replicaPart)
   243  	return filepath.Join(parts...)
   244  }
   246  func (sm *snapshotManager) snapshotPath(nodeID nodeID, index snapshotIndex) string {
   247  	parts := make([]string, 2)
   248  	snapshotPart := fmt.Sprintf(snapshotPathPattern, index)
   249  	parts = append(parts, sm.exportPath(nodeID.shardID, nodeID.replicaID), snapshotPart)
   250  	return filepath.Join(parts...)
   251  }
   253  func (sm *snapshotManager) prepareDir(path string) error {
   254  	s, err := sm.cfg.FS.Stat(path)
   255  	if err != nil {
   256  		if e := sm.cfg.FS.MkdirAll(path, defaultExportedDirMode); e != nil {
   257  			return e
   258  		}
   259  		return nil
   260  	}
   261  	if !s.IsDir() {
   262  		return moerr.NewInternalErrorNoCtx("%s is not a dir", path)
   263  	}
   264  	return nil
   265  }
   267  func (sm *snapshotManager) parse(dir string) (int, error) {
   268  	var index int
   269  	_, err := fmt.Sscanf(dir, snapshotPathPattern, &index)
   270  	if err != nil {
   271  		return 0, err
   272  	}
   273  	return index, nil
   274  }
   276  // Init implements the ISnapshotManager interface.
   277  func (sm *snapshotManager) Init(shardID uint64, replicaID uint64) error {
   278  	path := sm.exportPath(shardID, replicaID)
   279  	if err := sm.prepareDir(path); err != nil {
   280  		return err
   281  	}
   282  	names, err := sm.cfg.FS.List(path)
   283  	if err != nil {
   284  		return err
   285  	}
   286  	indexes := make([]int, len(names))
   287  	for _, name := range names {
   288  		index, err := sm.parse(name)
   289  		if err != nil {
   290  			continue
   291  		}
   292  		indexes = append(indexes, index)
   293  	}
   294  	// The snapshots in the manager must be sorted.
   295  	sort.Ints(indexes)
   296  	for _, idx := range indexes {
   297  		if idx > 0 {
   298  			_ = sm.Add(shardID, replicaID, uint64(idx))
   299  		}
   300  	}
   301  	return nil
   302  }
   304  // Count implements the ISnapshotManager interface.
   305  func (sm *snapshotManager) Count(shardID uint64, replicaID uint64) int {
   306  	nid := nodeID{shardID: shardID, replicaID: replicaID}
   307  	if s, ok := sm.snapshots[nid]; ok {
   308  		return len(s.items)
   309  	}
   310  	return 0
   311  }
   313  // Add implements the ISnapshotManager interface.
   314  func (sm *snapshotManager) Add(shardID uint64, replicaID uint64, index uint64) error {
   315  	si := snapshotIndex(index)
   316  	nid := nodeID{shardID: shardID, replicaID: replicaID}
   317  	dir := sm.snapshotPath(nid, si)
   318  	_, ok := sm.snapshots[nid]
   319  	if !ok {
   320  		sm.snapshots[nid] = newNodeSnapshot(sm.cfg.FS, nid)
   321  	}
   322  	return sm.snapshots[nid].add(si, dir)
   323  }
   325  // Remove implements the ISnapshotManager interface.
   326  func (sm *snapshotManager) Remove(shardID uint64, replicaID uint64, index uint64) error {
   327  	si := snapshotIndex(index)
   328  	nid := nodeID{shardID: shardID, replicaID: replicaID}
   329  	if ss, ok := sm.snapshots[nid]; ok {
   330  		return ss.remove(si)
   331  	}
   332  	return nil
   333  }
   335  // EvalImportSnapshot implements the ISnapshotManager interface.
   336  func (sm *snapshotManager) EvalImportSnapshot(shardID uint64, replicaID uint64, index uint64) (string, uint64) {
   337  	nid := nodeID{shardID: shardID, replicaID: replicaID}
   338  	ss, ok := sm.snapshots[nid]
   339  	if !ok {
   340  		return "", 0
   341  	}
   343  	var dir string
   344  	var si snapshotIndex
   345  	for _, item := range ss.items {
   346  		if item == nil {
   347  			return "", 0
   348  		}
   349  		// Find the bigger one, break and return the smaller one.
   350  		if uint64(item.index) > index {
   351  			break
   352  		}
   353  		dir = item.dir
   354  		si = item.index
   355  	}
   356  	return dir, uint64(si)
   357  }