vitess.io/vitess@v0.16.2/go/vt/topo/conn.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package topo 18 19 import ( 20 "context" 21 "sort" 22 ) 23 24 // Conn defines the interface that must be implemented by topology 25 // plug-ins to be used with Vitess. 26 // 27 // Zookeeper is a good example of an implementation, as defined in 28 // go/vt/topo/zk2topo. 29 // 30 // This API is very generic, and key/value store oriented. We use 31 // regular paths for object names, and we can list all immediate 32 // children of a path. All paths sent through this API are relative 33 // paths, from the root directory of the cell. 34 // 35 // The Conn objects are created by the Factory implementations. 36 type Conn interface { 37 // 38 // Directory support 39 // 40 41 // ListDir returns the entries in a directory. The returned 42 // list should be sorted by entry.Name. 43 // If there are no files under the provided path, returns ErrNoNode. 44 // dirPath is a path relative to the root directory of the cell. 45 // If full is set, we want all the fields in DirEntry to be filled in. 46 // If full is not set, only Name will be used. This is intended for 47 // implementations where getting more than the names is more expensive, 48 // as in most cases only the names are needed. 49 ListDir(ctx context.Context, dirPath string, full bool) ([]DirEntry, error) 50 51 // 52 // File support 53 // if version == nil, then it’s an unconditional update / delete. 54 // 55 56 // Create creates the initial version of a file. 57 // Returns ErrNodeExists if the file exists. 58 // filePath is a path relative to the root directory of the cell. 59 Create(ctx context.Context, filePath string, contents []byte) (Version, error) 60 61 // Update updates the file with the provided filename with the 62 // new content. 63 // If version is nil, it is an unconditional update 64 // (which is then the same as a Create is the file doesn't exist). 65 // filePath is a path relative to the root directory of the cell. 66 // It returns the new Version of the file after update. 67 // Returns ErrBadVersion if the provided version is not current. 68 Update(ctx context.Context, filePath string, contents []byte, version Version) (Version, error) 69 70 // Get returns the content and version of a file. 71 // filePath is a path relative to the root directory of the cell. 72 // Can return ErrNoNode if the file doesn't exist. 73 Get(ctx context.Context, filePath string) ([]byte, Version, error) 74 75 // List returns KV pairs, along with metadata like the version, for 76 // entries where the key contains the specified prefix. 77 // filePathPrefix is a path relative to the root directory of the cell. 78 // Can return ErrNoNode if there are no matches. 79 List(ctx context.Context, filePathPrefix string) ([]KVInfo, error) 80 81 // Delete deletes the provided file. 82 // If version is nil, it is an unconditional delete. 83 // If the last entry of a directory is deleted, using ListDir 84 // on its parent directory should not return the directory. 85 // For instance, when deleting /keyspaces/aaa/Keyspace, and if 86 // there is no other file in /keyspaces/aaa, then aaa should not 87 // appear any more when listing /keyspaces. 88 // filePath is a path relative to the root directory of the cell. 89 // 90 // Delete will never be called on a directory. 91 // Returns ErrNodeExists if the file doesn't exist. 92 // Returns ErrBadVersion if the provided version is not current. 93 Delete(ctx context.Context, filePath string, version Version) error 94 95 // 96 // Locks 97 // 98 99 // Lock takes a lock on the given directory. 100 // It does not prevent any modification to any file in the topology. 101 // It just prevents two concurrent processes (wherever they are) 102 // to run concurrently. It is used for instance to make sure only 103 // one reparent operation is running on a Shard at a given time. 104 // dirPath is the directory associated with a resource, for instance 105 // a Keyspace or a Shard. It is not a file location. 106 // (this means the implementation can for instance create a 107 // file in this directory to materialize the lock). 108 // contents describes the lock holder and purpose, but has no other 109 // meaning, so it can be used as a lock file contents, for instance. 110 // Returns ErrNoNode if the directory doesn't exist (meaning 111 // there is no existing file under that directory). 112 // Returns ErrTimeout if ctx expires. 113 // Returns ErrInterrupted if ctx is canceled. 114 Lock(ctx context.Context, dirPath, contents string) (LockDescriptor, error) 115 116 // TryLock takes lock on the given directory with a fail-fast approach. 117 // It is similar to `Lock` but the difference is it attempts to acquire the lock 118 // if it is likely to succeed. If there is already a lock on given path, then unlike `Lock` 119 // instead of waiting and blocking the client it returns with `Lock already exists` error. 120 // With current implementation it may not be able to fail-fast for some scenarios. 121 // For example there is a possibility that a thread checks for lock for a given path 122 // but by the time it acquires the lock, some other thread has already acquired it, 123 // in this case the client will block until the other caller releases the lock or the 124 // client call times out (just like standard `Lock' implementation). In short the lock checking 125 // and acquiring is not under the same mutex in current implementation of `TryLock`. 126 TryLock(ctx context.Context, dirPath, contents string) (LockDescriptor, error) 127 128 // 129 // Watches 130 // 131 132 // Watch starts watching a file in the provided cell. It 133 // returns the current value, a 'changes' channel to read the 134 // changes from, and an error. 135 // If the initial read fails, or the file doesn't 136 // exist, an error is returned. 137 // 138 // To stop the watch, cancel the provided context. 139 // This will eventually result in a final WatchData result with Err = 140 // ErrInterrupted. It should be safe to cancel the context 141 // multiple times, or after the Watch already errored out. 142 // 143 // The 'changes' channel may return a record with Err != nil. 144 // In that case, the channel will also be closed right after 145 // that record. In any case, 'changes' has to be drained of 146 // all events, even when 'stop' is closed. 147 // 148 // Note the 'changes' channel can return twice the same 149 // Version/Contents (for instance, if the watch is interrupted 150 // and restarted within the Conn implementation). 151 // Similarly, the 'changes' channel may skip versions / changes 152 // (that is, if value goes [A, B, C, D, E, F], the watch may only 153 // receive [A, B, F]). This should only happen for rapidly 154 // changing values though. Usually, the initial value will come 155 // back right away. And a stable value (that hasn't changed for 156 // a while) should be seen shortly. 157 // 158 // The Watch call is not guaranteed to return exactly up to 159 // date data right away. For instance, if a file is created 160 // and saved, and then a watch is set on that file, it may 161 // return ErrNoNode (as the underlying configuration service 162 // may use asynchronous caches that are not up to date 163 // yet). The only guarantee is that the watch data will 164 // eventually converge. Vitess doesn't explicitly depend on the data 165 // being correct quickly, as long as it eventually gets there. 166 // 167 // filePath is a path relative to the root directory of the cell. 168 Watch(ctx context.Context, filePath string) (current *WatchData, changes <-chan *WatchData, err error) 169 170 // WatchRecursive starts watching a file prefix in the provided cell. It 171 // returns all the current values for existing files with the given 172 // prefix, a 'changes' channel to read the changes from and an error. 173 // 174 // The provided context should be canceled when stopping WatchRecursive(). 175 // This API is different from Watch() and Watch() will be changed 176 // to match this API as well in the future. 177 // 178 // Canceling will eventually result in a final WatchDataRecursive result with Err = 179 // ErrInterrupted. 180 // 181 // The 'changes' channel may return a record with Err != nil. 182 // In that case, the channel will also be closed right after 183 // that record. In any case, 'changes' has to be drained of 184 // all events, even when 'stop' is closed. 185 // 186 // Note the 'changes' channel can return twice the same 187 // Version/Contents (for instance, if the watch is interrupted 188 // and restarted within the Conn implementation). 189 // Similarly, the 'changes' channel may skip versions / changes 190 // (that is, if value goes [A, B, C, D, E, F], the watch may only 191 // receive [A, B, F]). This should only happen for rapidly 192 // changing values though. Usually, the initial value will come 193 // back right away. And a stable value (that hasn't changed for 194 // a while) should be seen shortly. 195 // 196 // The WatchRecursive call is not guaranteed to return exactly up to 197 // date data right away. For instance, if a file is created 198 // and saved, and then a watch is set on that file, it may 199 // return ErrNoNode (as the underlying configuration service 200 // may use asynchronous caches that are not up to date 201 // yet). The only guarantee is that the watch data will 202 // eventually converge. Vitess doesn't explicitly depend on the data 203 // being correct quickly, as long as it eventually gets there. 204 // 205 // path is a path relative to the root directory of the cell. 206 WatchRecursive(ctx context.Context, path string) ([]*WatchDataRecursive, <-chan *WatchDataRecursive, error) 207 208 // 209 // Leader election methods. This is meant to have a small 210 // number of processes elect a primary within a group. The 211 // backend storage for this can either be the global topo 212 // server, or a resilient quorum of individual cells, to 213 // reduce the load / dependency on the global topo server. 214 // 215 216 // NewLeaderParticipation creates a LeaderParticipation 217 // object, used to become the Leader in an election for the 218 // provided group name. Id is the name of the local process, 219 // passing in the hostname:port of the current process as id 220 // is the common usage. Id must be unique for each process 221 // calling this, for a given name. Calling this function does 222 // not make the current process a candidate for the election. 223 NewLeaderParticipation(name, id string) (LeaderParticipation, error) 224 225 // Close closes the connection to the server. 226 Close() 227 } 228 229 // DirEntryType is the type of an entry in a directory. 230 type DirEntryType int 231 232 const ( 233 // TypeDirectory describes a directory. 234 TypeDirectory DirEntryType = iota 235 236 // TypeFile describes a file. 237 TypeFile 238 ) 239 240 // DirEntry is an entry in a directory, as returned by ListDir. 241 type DirEntry struct { 242 // Name is the name of the entry. 243 // Always filled in. 244 Name string 245 246 // Type is the DirEntryType of the entry. 247 // Only filled in if full is true. 248 Type DirEntryType 249 250 // Ephemeral is set if the directory / file only contains 251 // data that was not set by the file API, like lock files 252 // or primary-election related files. 253 // Only filled in if full is true. 254 Ephemeral bool 255 } 256 257 // DirEntriesToStringArray is a helper method to extract the names 258 // from an []DirEntry 259 func DirEntriesToStringArray(entries []DirEntry) []string { 260 result := make([]string, len(entries)) 261 for i, e := range entries { 262 result[i] = e.Name 263 } 264 return result 265 } 266 267 // dirEntries is used for sorting. 268 type dirEntries []DirEntry 269 270 func (e dirEntries) Len() int { return len(e) } 271 func (e dirEntries) Swap(i, j int) { e[i], e[j] = e[j], e[i] } 272 func (e dirEntries) Less(i, j int) bool { return e[i].Name < e[j].Name } 273 274 // DirEntriesSortByName sorts a slice of DirEntry objects by Name. 275 func DirEntriesSortByName(entries []DirEntry) { 276 sort.Sort(dirEntries(entries)) 277 } 278 279 // Version is an interface that describes a file version. 280 type Version interface { 281 // String returns a text representation of the version. 282 String() string 283 } 284 285 // LockDescriptor is an interface that describes a lock. 286 // It will be returned by Lock(). 287 type LockDescriptor interface { 288 // Check returns an error if the lock was lost. 289 // Some topology implementations use a keep-alive mechanism, and 290 // sometimes it fails. The users of the lock are responsible for 291 // checking on it when convenient. 292 Check(ctx context.Context) error 293 294 // Unlock releases the lock. 295 Unlock(ctx context.Context) error 296 } 297 298 // CancelFunc is returned by the Watch method. 299 type CancelFunc func() 300 301 // WatchData is the structure returned by the Watch() API. 302 // It can contain: 303 // a) an error in Err if the call failed (or if the watch was terminated). 304 // b) the current or new version of the data. 305 type WatchData struct { 306 // Contents has the bytes that were stored by Create 307 // or Update. 308 Contents []byte 309 310 // Version contains an opaque representation of the Version 311 // of that file. 312 Version Version 313 314 // Err is set the same way for both the 'current' value 315 // returned by Watch, or the values read on the 'changes' 316 // channel. It can be: 317 // - nil, then Contents and Version are set. 318 // - ErrNoNode if the file doesn't exist. 319 // - ErrInterrupted if 'cancel' was called. 320 // - any other platform-specific error. 321 Err error 322 } 323 324 // WatchDataRecursive is the structure returned by the WatchRecursive() API. 325 // It contains the same data as WatchData, but additionally also the specific 326 // path of the entry that the recursive watch applies to, since an entire 327 // file prefix can be watched. 328 type WatchDataRecursive struct { 329 // Path is the path that has changed 330 Path string 331 332 WatchData 333 } 334 335 // KVInfo is a structure that contains a generic key/value pair from 336 // the topo server, along with important metadata about it. 337 // This should be used to provide multiple entries in List like calls 338 // that return N KVs based on a key prefix, so that you don't lose 339 // information or context you would otherwise have when using Get for 340 // a single key. 341 type KVInfo struct { 342 Key []byte 343 Value []byte 344 Version Version // version - used to prevent stomping concurrent writes 345 } 346 347 // LeaderParticipation is the object returned by NewLeaderParticipation. 348 // Sample usage: 349 // 350 // mp := server.NewLeaderParticipation("vtctld", "hostname:8080") 351 // job := NewJob() 352 // 353 // go func() { 354 // for { 355 // ctx, err := mp.WaitForLeadership() 356 // switch err { 357 // case nil: 358 // job.RunUntilContextDone(ctx) 359 // case topo.ErrInterrupted: 360 // return 361 // default: 362 // log.Errorf("Got error while waiting for primary, will retry in 5s: %v", err) 363 // time.Sleep(5 * time.Second) 364 // } 365 // } 366 // }() 367 // 368 // http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { 369 // if job.Running() { 370 // job.WriteStatus(w, r) 371 // } else { 372 // http.Redirect(w, r, mp.GetCurrentLeaderID(context.Background()), http.StatusFound) 373 // } 374 // }) 375 // 376 // servenv.OnTermSync(func() { 377 // mp.Stop() 378 // }) 379 type LeaderParticipation interface { 380 // WaitForLeadership makes the current process a candidate 381 // for election, and waits until this process is the primary. 382 // After we become the primary, we may lose primaryship. In that case, 383 // the returned context will be canceled. If Stop was called, 384 // WaitForLeadership will return nil, ErrInterrupted. 385 WaitForLeadership() (context.Context, error) 386 387 // Stop is called when we don't want to participate in the 388 // primary election any more. Typically, that is when the 389 // hosting process is terminating. We will relinquish 390 // primaryship at that point, if we had it. Stop should 391 // not return until everything has been done. 392 // The LeaderParticipation object should be discarded 393 // after Stop has been called. Any call to WaitForLeadership 394 // after Stop() will return nil, ErrInterrupted. 395 // If WaitForLeadership() was running, it will return 396 // nil, ErrInterrupted as soon as possible. 397 Stop() 398 399 // GetCurrentLeaderID returns the current primary id. 400 // This may not work after Stop has been called. 401 GetCurrentLeaderID(ctx context.Context) (string, error) 402 403 // WaitForNewLeader allows for nodes to wait until a leadership 404 // election cycle completes and to get subsequent updates of 405 // leadership changes. This way logic that needs to know if leadership 406 // changes also if we're not the leader ourselves doesn't need to 407 // poll for leadership status. 408 // 409 // For topo implementation that have this, it can be used more 410 // efficiently than needing a busy wait loop. 411 WaitForNewLeader(ctx context.Context) (<-chan string, error) 412 }