vitess.io/vitess@v0.16.2/go/vt/topo/conn.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package topo
    18  
    19  import (
    20  	"context"
    21  	"sort"
    22  )
    23  
    24  // Conn defines the interface that must be implemented by topology
    25  // plug-ins to be used with Vitess.
    26  //
    27  // Zookeeper is a good example of an implementation, as defined in
    28  // go/vt/topo/zk2topo.
    29  //
    30  // This API is very generic, and key/value store oriented.  We use
    31  // regular paths for object names, and we can list all immediate
    32  // children of a path. All paths sent through this API are relative
    33  // paths, from the root directory of the cell.
    34  //
    35  // The Conn objects are created by the Factory implementations.
    36  type Conn interface {
    37  	//
    38  	// Directory support
    39  	//
    40  
    41  	// ListDir returns the entries in a directory.  The returned
    42  	// list should be sorted by entry.Name.
    43  	// If there are no files under the provided path, returns ErrNoNode.
    44  	// dirPath is a path relative to the root directory of the cell.
    45  	// If full is set, we want all the fields in DirEntry to be filled in.
    46  	// If full is not set, only Name will be used. This is intended for
    47  	// implementations where getting more than the names is more expensive,
    48  	// as in most cases only the names are needed.
    49  	ListDir(ctx context.Context, dirPath string, full bool) ([]DirEntry, error)
    50  
    51  	//
    52  	// File support
    53  	// if version == nil, then it’s an unconditional update / delete.
    54  	//
    55  
    56  	// Create creates the initial version of a file.
    57  	// Returns ErrNodeExists if the file exists.
    58  	// filePath is a path relative to the root directory of the cell.
    59  	Create(ctx context.Context, filePath string, contents []byte) (Version, error)
    60  
    61  	// Update updates the file with the provided filename with the
    62  	// new content.
    63  	// If version is nil, it is an unconditional update
    64  	// (which is then the same as a Create is the file doesn't exist).
    65  	// filePath is a path relative to the root directory of the cell.
    66  	// It returns the new Version of the file after update.
    67  	// Returns ErrBadVersion if the provided version is not current.
    68  	Update(ctx context.Context, filePath string, contents []byte, version Version) (Version, error)
    69  
    70  	// Get returns the content and version of a file.
    71  	// filePath is a path relative to the root directory of the cell.
    72  	// Can return ErrNoNode if the file doesn't exist.
    73  	Get(ctx context.Context, filePath string) ([]byte, Version, error)
    74  
    75  	// List returns KV pairs, along with metadata like the version, for
    76  	// entries where the key contains the specified prefix.
    77  	// filePathPrefix is a path relative to the root directory of the cell.
    78  	// Can return ErrNoNode if there are no matches.
    79  	List(ctx context.Context, filePathPrefix string) ([]KVInfo, error)
    80  
    81  	// Delete deletes the provided file.
    82  	// If version is nil, it is an unconditional delete.
    83  	// If the last entry of a directory is deleted, using ListDir
    84  	// on its parent directory should not return the directory.
    85  	// For instance, when deleting /keyspaces/aaa/Keyspace, and if
    86  	// there is no other file in /keyspaces/aaa, then aaa should not
    87  	// appear any more when listing /keyspaces.
    88  	// filePath is a path relative to the root directory of the cell.
    89  	//
    90  	// Delete will never be called on a directory.
    91  	// Returns ErrNodeExists if the file doesn't exist.
    92  	// Returns ErrBadVersion if the provided version is not current.
    93  	Delete(ctx context.Context, filePath string, version Version) error
    94  
    95  	//
    96  	// Locks
    97  	//
    98  
    99  	// Lock takes a lock on the given directory.
   100  	// It does not prevent any modification to any file in the topology.
   101  	// It just prevents two concurrent processes (wherever they are)
   102  	// to run concurrently. It is used for instance to make sure only
   103  	// one reparent operation is running on a Shard at a given time.
   104  	// dirPath is the directory associated with a resource, for instance
   105  	// a Keyspace or a Shard. It is not a file location.
   106  	// (this means the implementation can for instance create a
   107  	// file in this directory to materialize the lock).
   108  	// contents describes the lock holder and purpose, but has no other
   109  	// meaning, so it can be used as a lock file contents, for instance.
   110  	// Returns ErrNoNode if the directory doesn't exist (meaning
   111  	//   there is no existing file under that directory).
   112  	// Returns ErrTimeout if ctx expires.
   113  	// Returns ErrInterrupted if ctx is canceled.
   114  	Lock(ctx context.Context, dirPath, contents string) (LockDescriptor, error)
   115  
   116  	// TryLock takes lock on the given directory with a fail-fast approach.
   117  	// It is similar to `Lock` but the difference is it attempts to acquire the lock
   118  	// if it is likely to succeed. If there is already a lock on given path, then unlike `Lock`
   119  	// instead of waiting and blocking the client it returns with `Lock already exists` error.
   120  	// With current implementation it may not be able to fail-fast for some scenarios.
   121  	// For example there is a possibility that a thread checks for lock for a given path
   122  	// but by the time it acquires the lock, some other thread has already acquired it,
   123  	// in this case the client will block until the other caller releases the lock or the
   124  	// client call times out (just like standard `Lock' implementation). In short the lock checking
   125  	// and acquiring is not under the same mutex in current implementation of `TryLock`.
   126  	TryLock(ctx context.Context, dirPath, contents string) (LockDescriptor, error)
   127  
   128  	//
   129  	// Watches
   130  	//
   131  
   132  	// Watch starts watching a file in the provided cell.  It
   133  	// returns the current value, a 'changes' channel to read the
   134  	// changes from, and an error.
   135  	// If the initial read fails, or the file doesn't
   136  	// exist, an error is returned.
   137  	//
   138  	// To stop the watch, cancel the provided context.
   139  	// This will eventually result in a final WatchData result with Err =
   140  	// ErrInterrupted. It should be safe to cancel the context
   141  	// multiple times, or after the Watch already errored out.
   142  	//
   143  	// The 'changes' channel may return a record with Err != nil.
   144  	// In that case, the channel will also be closed right after
   145  	// that record.  In any case, 'changes' has to be drained of
   146  	// all events, even when 'stop' is closed.
   147  	//
   148  	// Note the 'changes' channel can return twice the same
   149  	// Version/Contents (for instance, if the watch is interrupted
   150  	// and restarted within the Conn implementation).
   151  	// Similarly, the 'changes' channel may skip versions / changes
   152  	// (that is, if value goes [A, B, C, D, E, F], the watch may only
   153  	// receive [A, B, F]). This should only happen for rapidly
   154  	// changing values though. Usually, the initial value will come
   155  	// back right away. And a stable value (that hasn't changed for
   156  	// a while) should be seen shortly.
   157  	//
   158  	// The Watch call is not guaranteed to return exactly up to
   159  	// date data right away. For instance, if a file is created
   160  	// and saved, and then a watch is set on that file, it may
   161  	// return ErrNoNode (as the underlying configuration service
   162  	// may use asynchronous caches that are not up to date
   163  	// yet). The only guarantee is that the watch data will
   164  	// eventually converge. Vitess doesn't explicitly depend on the data
   165  	// being correct quickly, as long as it eventually gets there.
   166  	//
   167  	// filePath is a path relative to the root directory of the cell.
   168  	Watch(ctx context.Context, filePath string) (current *WatchData, changes <-chan *WatchData, err error)
   169  
   170  	// WatchRecursive starts watching a file prefix in the provided cell. It
   171  	// returns all the current values for existing files with the given
   172  	// prefix, a 'changes' channel  to read the changes from and an error.
   173  	//
   174  	// The provided context should be canceled when stopping WatchRecursive().
   175  	// This API is different from Watch() and Watch() will be changed
   176  	// to match this API as well in the future.
   177  	//
   178  	// Canceling will eventually result in a final WatchDataRecursive result with Err =
   179  	// ErrInterrupted.
   180  	//
   181  	// The 'changes' channel may return a record with Err != nil.
   182  	// In that case, the channel will also be closed right after
   183  	// that record.  In any case, 'changes' has to be drained of
   184  	// all events, even when 'stop' is closed.
   185  	//
   186  	// Note the 'changes' channel can return twice the same
   187  	// Version/Contents (for instance, if the watch is interrupted
   188  	// and restarted within the Conn implementation).
   189  	// Similarly, the 'changes' channel may skip versions / changes
   190  	// (that is, if value goes [A, B, C, D, E, F], the watch may only
   191  	// receive [A, B, F]). This should only happen for rapidly
   192  	// changing values though. Usually, the initial value will come
   193  	// back right away. And a stable value (that hasn't changed for
   194  	// a while) should be seen shortly.
   195  	//
   196  	// The WatchRecursive call is not guaranteed to return exactly up to
   197  	// date data right away. For instance, if a file is created
   198  	// and saved, and then a watch is set on that file, it may
   199  	// return ErrNoNode (as the underlying configuration service
   200  	// may use asynchronous caches that are not up to date
   201  	// yet). The only guarantee is that the watch data will
   202  	// eventually converge. Vitess doesn't explicitly depend on the data
   203  	// being correct quickly, as long as it eventually gets there.
   204  	//
   205  	// path is a path relative to the root directory of the cell.
   206  	WatchRecursive(ctx context.Context, path string) ([]*WatchDataRecursive, <-chan *WatchDataRecursive, error)
   207  
   208  	//
   209  	// Leader election methods. This is meant to have a small
   210  	// number of processes elect a primary within a group. The
   211  	// backend storage for this can either be the global topo
   212  	// server, or a resilient quorum of individual cells, to
   213  	// reduce the load / dependency on the global topo server.
   214  	//
   215  
   216  	// NewLeaderParticipation creates a LeaderParticipation
   217  	// object, used to become the Leader in an election for the
   218  	// provided group name.  Id is the name of the local process,
   219  	// passing in the hostname:port of the current process as id
   220  	// is the common usage. Id must be unique for each process
   221  	// calling this, for a given name. Calling this function does
   222  	// not make the current process a candidate for the election.
   223  	NewLeaderParticipation(name, id string) (LeaderParticipation, error)
   224  
   225  	// Close closes the connection to the server.
   226  	Close()
   227  }
   228  
   229  // DirEntryType is the type of an entry in a directory.
   230  type DirEntryType int
   231  
   232  const (
   233  	// TypeDirectory describes a directory.
   234  	TypeDirectory DirEntryType = iota
   235  
   236  	// TypeFile describes a file.
   237  	TypeFile
   238  )
   239  
   240  // DirEntry is an entry in a directory, as returned by ListDir.
   241  type DirEntry struct {
   242  	// Name is the name of the entry.
   243  	// Always filled in.
   244  	Name string
   245  
   246  	// Type is the DirEntryType of the entry.
   247  	// Only filled in if full is true.
   248  	Type DirEntryType
   249  
   250  	// Ephemeral is set if the directory / file only contains
   251  	// data that was not set by the file API, like lock files
   252  	// or primary-election related files.
   253  	// Only filled in if full is true.
   254  	Ephemeral bool
   255  }
   256  
   257  // DirEntriesToStringArray is a helper method to extract the names
   258  // from an []DirEntry
   259  func DirEntriesToStringArray(entries []DirEntry) []string {
   260  	result := make([]string, len(entries))
   261  	for i, e := range entries {
   262  		result[i] = e.Name
   263  	}
   264  	return result
   265  }
   266  
   267  // dirEntries is used for sorting.
   268  type dirEntries []DirEntry
   269  
   270  func (e dirEntries) Len() int           { return len(e) }
   271  func (e dirEntries) Swap(i, j int)      { e[i], e[j] = e[j], e[i] }
   272  func (e dirEntries) Less(i, j int) bool { return e[i].Name < e[j].Name }
   273  
   274  // DirEntriesSortByName sorts a slice of DirEntry objects by Name.
   275  func DirEntriesSortByName(entries []DirEntry) {
   276  	sort.Sort(dirEntries(entries))
   277  }
   278  
   279  // Version is an interface that describes a file version.
   280  type Version interface {
   281  	// String returns a text representation of the version.
   282  	String() string
   283  }
   284  
   285  // LockDescriptor is an interface that describes a lock.
   286  // It will be returned by Lock().
   287  type LockDescriptor interface {
   288  	// Check returns an error if the lock was lost.
   289  	// Some topology implementations use a keep-alive mechanism, and
   290  	// sometimes it fails. The users of the lock are responsible for
   291  	// checking on it when convenient.
   292  	Check(ctx context.Context) error
   293  
   294  	// Unlock releases the lock.
   295  	Unlock(ctx context.Context) error
   296  }
   297  
   298  // CancelFunc is returned by the Watch method.
   299  type CancelFunc func()
   300  
   301  // WatchData is the structure returned by the Watch() API.
   302  // It can contain:
   303  // a) an error in Err if the call failed (or if the watch was terminated).
   304  // b) the current or new version of the data.
   305  type WatchData struct {
   306  	// Contents has the bytes that were stored by Create
   307  	// or Update.
   308  	Contents []byte
   309  
   310  	// Version contains an opaque representation of the Version
   311  	// of that file.
   312  	Version Version
   313  
   314  	// Err is set the same way for both the 'current' value
   315  	// returned by Watch, or the values read on the 'changes'
   316  	// channel. It can be:
   317  	// - nil, then Contents and Version are set.
   318  	// - ErrNoNode if the file doesn't exist.
   319  	// - ErrInterrupted if 'cancel' was called.
   320  	// - any other platform-specific error.
   321  	Err error
   322  }
   323  
   324  // WatchDataRecursive is the structure returned by the WatchRecursive() API.
   325  // It contains the same data as WatchData, but additionally also the specific
   326  // path of the entry that the recursive watch applies to, since an entire
   327  // file prefix can be watched.
   328  type WatchDataRecursive struct {
   329  	// Path is the path that has changed
   330  	Path string
   331  
   332  	WatchData
   333  }
   334  
   335  // KVInfo is a structure that contains a generic key/value pair from
   336  // the topo server, along with important metadata about it.
   337  // This should be used to provide multiple entries in List like calls
   338  // that return N KVs based on a key prefix, so that you don't lose
   339  // information or context you would otherwise have when using Get for
   340  // a single key.
   341  type KVInfo struct {
   342  	Key     []byte
   343  	Value   []byte
   344  	Version Version // version - used to prevent stomping concurrent writes
   345  }
   346  
   347  // LeaderParticipation is the object returned by NewLeaderParticipation.
   348  // Sample usage:
   349  //
   350  // mp := server.NewLeaderParticipation("vtctld", "hostname:8080")
   351  // job := NewJob()
   352  //
   353  //	go func() {
   354  //	  for {
   355  //	    ctx, err := mp.WaitForLeadership()
   356  //	    switch err {
   357  //	    case nil:
   358  //	      job.RunUntilContextDone(ctx)
   359  //	    case topo.ErrInterrupted:
   360  //	      return
   361  //	    default:
   362  //	      log.Errorf("Got error while waiting for primary, will retry in 5s: %v", err)
   363  //	      time.Sleep(5 * time.Second)
   364  //	    }
   365  //	  }
   366  //	}()
   367  //
   368  //	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
   369  //	  if job.Running() {
   370  //	    job.WriteStatus(w, r)
   371  //	  } else {
   372  //	    http.Redirect(w, r, mp.GetCurrentLeaderID(context.Background()), http.StatusFound)
   373  //	  }
   374  //	})
   375  //
   376  //	servenv.OnTermSync(func() {
   377  //	  mp.Stop()
   378  //	})
   379  type LeaderParticipation interface {
   380  	// WaitForLeadership makes the current process a candidate
   381  	// for election, and waits until this process is the primary.
   382  	// After we become the primary, we may lose primaryship. In that case,
   383  	// the returned context will be canceled. If Stop was called,
   384  	// WaitForLeadership will return nil, ErrInterrupted.
   385  	WaitForLeadership() (context.Context, error)
   386  
   387  	// Stop is called when we don't want to participate in the
   388  	// primary election any more. Typically, that is when the
   389  	// hosting process is terminating.  We will relinquish
   390  	// primaryship at that point, if we had it. Stop should
   391  	// not return until everything has been done.
   392  	// The LeaderParticipation object should be discarded
   393  	// after Stop has been called. Any call to WaitForLeadership
   394  	// after Stop() will return nil, ErrInterrupted.
   395  	// If WaitForLeadership() was running, it will return
   396  	// nil, ErrInterrupted as soon as possible.
   397  	Stop()
   398  
   399  	// GetCurrentLeaderID returns the current primary id.
   400  	// This may not work after Stop has been called.
   401  	GetCurrentLeaderID(ctx context.Context) (string, error)
   402  
   403  	// WaitForNewLeader allows for nodes to wait until a leadership
   404  	// election cycle completes and to get subsequent updates of
   405  	// leadership changes. This way logic that needs to know if leadership
   406  	// changes also if we're not the leader ourselves doesn't need to
   407  	// poll for leadership status.
   408  	//
   409  	// For topo implementation that have this, it can be used more
   410  	// efficiently than needing a busy wait loop.
   411  	WaitForNewLeader(ctx context.Context) (<-chan string, error)
   412  }