github.com/macb/etcd@v0.3.1-0.20140227003422-a60481c6b1a0/store/store.go (about) 1 /* 2 Copyright 2013 CoreOS Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package store 18 19 import ( 20 "encoding/json" 21 "fmt" 22 "path" 23 "strconv" 24 "strings" 25 "sync" 26 "time" 27 28 etcdErr "github.com/coreos/etcd/error" 29 ustrings "github.com/coreos/etcd/pkg/strings" 30 ) 31 32 // The default version to set when the store is first initialized. 33 const defaultVersion = 2 34 35 var minExpireTime time.Time 36 37 func init() { 38 minExpireTime, _ = time.Parse(time.RFC3339, "2000-01-01T00:00:00Z") 39 } 40 41 type Store interface { 42 Version() int 43 CommandFactory() CommandFactory 44 Index() uint64 45 46 Get(nodePath string, recursive, sorted bool) (*Event, error) 47 Set(nodePath string, dir bool, value string, expireTime time.Time) (*Event, error) 48 Update(nodePath string, newValue string, expireTime time.Time) (*Event, error) 49 Create(nodePath string, dir bool, value string, unique bool, 50 expireTime time.Time) (*Event, error) 51 CompareAndSwap(nodePath string, prevValue string, prevIndex uint64, 52 value string, expireTime time.Time) (*Event, error) 53 Delete(nodePath string, recursive, dir bool) (*Event, error) 54 CompareAndDelete(nodePath string, prevValue string, prevIndex uint64) (*Event, error) 55 56 Watch(prefix string, recursive, stream bool, sinceIndex uint64) (*Watcher, error) 57 58 Save() ([]byte, error) 59 Recovery(state []byte) error 60 61 TotalTransactions() uint64 62 JsonStats() []byte 63 DeleteExpiredKeys(cutoff time.Time) 64 } 65 66 type store struct { 67 Root *node 68 WatcherHub *watcherHub 69 CurrentIndex uint64 70 Stats *Stats 71 CurrentVersion int 72 ttlKeyHeap *ttlKeyHeap // need to recovery manually 73 worldLock sync.RWMutex // stop the world lock 74 } 75 76 func New() Store { 77 return newStore() 78 } 79 80 func newStore() *store { 81 s := new(store) 82 s.CurrentVersion = defaultVersion 83 s.Root = newDir(s, "/", s.CurrentIndex, nil, "", Permanent) 84 s.Stats = newStats() 85 s.WatcherHub = newWatchHub(1000) 86 s.ttlKeyHeap = newTtlKeyHeap() 87 return s 88 } 89 90 // Version retrieves current version of the store. 91 func (s *store) Version() int { 92 return s.CurrentVersion 93 } 94 95 // Retrieves current of the store 96 func (s *store) Index() uint64 { 97 return s.CurrentIndex 98 } 99 100 // CommandFactory retrieves the command factory for the current version of the store. 101 func (s *store) CommandFactory() CommandFactory { 102 return GetCommandFactory(s.Version()) 103 } 104 105 // Get returns a get event. 106 // If recursive is true, it will return all the content under the node path. 107 // If sorted is true, it will sort the content by keys. 108 func (s *store) Get(nodePath string, recursive, sorted bool) (*Event, error) { 109 s.worldLock.RLock() 110 defer s.worldLock.RUnlock() 111 112 nodePath = path.Clean(path.Join("/", nodePath)) 113 114 n, err := s.internalGet(nodePath) 115 116 if err != nil { 117 s.Stats.Inc(GetFail) 118 return nil, err 119 } 120 121 e := newEvent(Get, nodePath, n.ModifiedIndex, n.CreatedIndex) 122 e.Node.loadInternalNode(n, recursive, sorted) 123 124 s.Stats.Inc(GetSuccess) 125 126 return e, nil 127 } 128 129 // Create creates the node at nodePath. Create will help to create intermediate directories with no ttl. 130 // If the node has already existed, create will fail. 131 // If any node on the path is a file, create will fail. 132 func (s *store) Create(nodePath string, dir bool, value string, unique bool, expireTime time.Time) (*Event, error) { 133 s.worldLock.Lock() 134 defer s.worldLock.Unlock() 135 e, err := s.internalCreate(nodePath, dir, value, unique, false, expireTime, Create) 136 137 if err == nil { 138 s.Stats.Inc(CreateSuccess) 139 } else { 140 s.Stats.Inc(CreateFail) 141 } 142 143 return e, err 144 } 145 146 // Set creates or replace the node at nodePath. 147 func (s *store) Set(nodePath string, dir bool, value string, expireTime time.Time) (*Event, error) { 148 var err error 149 150 s.worldLock.Lock() 151 defer s.worldLock.Unlock() 152 153 defer func() { 154 if err == nil { 155 s.Stats.Inc(SetSuccess) 156 } else { 157 s.Stats.Inc(SetFail) 158 } 159 }() 160 161 // Get prevNode value 162 n, getErr := s.internalGet(nodePath) 163 if getErr != nil && getErr.ErrorCode != etcdErr.EcodeKeyNotFound { 164 err = getErr 165 return nil, err 166 } 167 168 // Set new value 169 e, err := s.internalCreate(nodePath, dir, value, false, true, expireTime, Set) 170 if err != nil { 171 return nil, err 172 } 173 174 // Put prevNode into event 175 if getErr == nil { 176 prev := newEvent(Get, nodePath, n.ModifiedIndex, n.CreatedIndex) 177 prev.Node.loadInternalNode(n, false, false) 178 e.PrevNode = prev.Node 179 } 180 181 return e, nil 182 } 183 184 func (s *store) CompareAndSwap(nodePath string, prevValue string, prevIndex uint64, 185 value string, expireTime time.Time) (*Event, error) { 186 187 nodePath = path.Clean(path.Join("/", nodePath)) 188 // we do not allow the user to change "/" 189 if nodePath == "/" { 190 return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", s.CurrentIndex) 191 } 192 193 s.worldLock.Lock() 194 defer s.worldLock.Unlock() 195 196 n, err := s.internalGet(nodePath) 197 198 if err != nil { 199 s.Stats.Inc(CompareAndSwapFail) 200 return nil, err 201 } 202 203 if n.IsDir() { // can only compare and swap file 204 s.Stats.Inc(CompareAndSwapFail) 205 return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, s.CurrentIndex) 206 } 207 208 // If both of the prevValue and prevIndex are given, we will test both of them. 209 // Command will be executed, only if both of the tests are successful. 210 if !n.Compare(prevValue, prevIndex) { 211 cause := fmt.Sprintf("[%v != %v] [%v != %v]", prevValue, n.Value, prevIndex, n.ModifiedIndex) 212 s.Stats.Inc(CompareAndSwapFail) 213 return nil, etcdErr.NewError(etcdErr.EcodeTestFailed, cause, s.CurrentIndex) 214 } 215 216 // update etcd index 217 s.CurrentIndex++ 218 219 e := newEvent(CompareAndSwap, nodePath, s.CurrentIndex, n.CreatedIndex) 220 e.PrevNode = n.Repr(false, false) 221 eNode := e.Node 222 223 // if test succeed, write the value 224 n.Write(value, s.CurrentIndex) 225 n.UpdateTTL(expireTime) 226 227 // copy the value for safety 228 valueCopy := ustrings.Clone(value) 229 eNode.Value = &valueCopy 230 eNode.Expiration, eNode.TTL = n.ExpirationAndTTL() 231 232 s.WatcherHub.notify(e) 233 s.Stats.Inc(CompareAndSwapSuccess) 234 return e, nil 235 } 236 237 // Delete deletes the node at the given path. 238 // If the node is a directory, recursive must be true to delete it. 239 func (s *store) Delete(nodePath string, dir, recursive bool) (*Event, error) { 240 nodePath = path.Clean(path.Join("/", nodePath)) 241 // we do not allow the user to change "/" 242 if nodePath == "/" { 243 return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", s.CurrentIndex) 244 } 245 246 s.worldLock.Lock() 247 defer s.worldLock.Unlock() 248 249 // recursive implies dir 250 if recursive == true { 251 dir = true 252 } 253 254 n, err := s.internalGet(nodePath) 255 256 if err != nil { // if the node does not exist, return error 257 s.Stats.Inc(DeleteFail) 258 return nil, err 259 } 260 261 nextIndex := s.CurrentIndex + 1 262 e := newEvent(Delete, nodePath, nextIndex, n.CreatedIndex) 263 e.PrevNode = n.Repr(false, false) 264 eNode := e.Node 265 266 if n.IsDir() { 267 eNode.Dir = true 268 } 269 270 callback := func(path string) { // notify function 271 // notify the watchers with deleted set true 272 s.WatcherHub.notifyWatchers(e, path, true) 273 } 274 275 err = n.Remove(dir, recursive, callback) 276 277 if err != nil { 278 s.Stats.Inc(DeleteFail) 279 return nil, err 280 } 281 282 // update etcd index 283 s.CurrentIndex++ 284 285 s.WatcherHub.notify(e) 286 287 s.Stats.Inc(DeleteSuccess) 288 289 return e, nil 290 } 291 292 func (s *store) CompareAndDelete(nodePath string, prevValue string, prevIndex uint64) (*Event, error) { 293 nodePath = path.Clean(path.Join("/", nodePath)) 294 295 s.worldLock.Lock() 296 defer s.worldLock.Unlock() 297 298 n, err := s.internalGet(nodePath) 299 300 if err != nil { // if the node does not exist, return error 301 s.Stats.Inc(CompareAndDeleteFail) 302 return nil, err 303 } 304 305 if n.IsDir() { // can only compare and delete file 306 s.Stats.Inc(CompareAndSwapFail) 307 return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, s.CurrentIndex) 308 } 309 310 // If both of the prevValue and prevIndex are given, we will test both of them. 311 // Command will be executed, only if both of the tests are successful. 312 if !n.Compare(prevValue, prevIndex) { 313 cause := fmt.Sprintf("[%v != %v] [%v != %v]", prevValue, n.Value, prevIndex, n.ModifiedIndex) 314 s.Stats.Inc(CompareAndDeleteFail) 315 return nil, etcdErr.NewError(etcdErr.EcodeTestFailed, cause, s.CurrentIndex) 316 } 317 318 // update etcd index 319 s.CurrentIndex++ 320 321 e := newEvent(CompareAndDelete, nodePath, s.CurrentIndex, n.CreatedIndex) 322 e.PrevNode = n.Repr(false, false) 323 324 callback := func(path string) { // notify function 325 // notify the watchers with deleted set true 326 s.WatcherHub.notifyWatchers(e, path, true) 327 } 328 329 // delete a key-value pair, no error should happen 330 n.Remove(false, false, callback) 331 332 s.WatcherHub.notify(e) 333 s.Stats.Inc(CompareAndDeleteSuccess) 334 return e, nil 335 } 336 337 func (s *store) Watch(key string, recursive, stream bool, sinceIndex uint64) (*Watcher, error) { 338 key = path.Clean(path.Join("/", key)) 339 nextIndex := s.CurrentIndex + 1 340 341 s.worldLock.RLock() 342 defer s.worldLock.RUnlock() 343 344 var w *Watcher 345 var err *etcdErr.Error 346 347 if sinceIndex == 0 { 348 w, err = s.WatcherHub.watch(key, recursive, stream, nextIndex) 349 350 } else { 351 w, err = s.WatcherHub.watch(key, recursive, stream, sinceIndex) 352 } 353 354 if err != nil { 355 // watchhub do not know the current Index 356 // we need to attach the currentIndex here 357 err.Index = s.CurrentIndex 358 return nil, err 359 } 360 361 return w, nil 362 } 363 364 // walk walks all the nodePath and apply the walkFunc on each directory 365 func (s *store) walk(nodePath string, walkFunc func(prev *node, component string) (*node, *etcdErr.Error)) (*node, *etcdErr.Error) { 366 components := strings.Split(nodePath, "/") 367 368 curr := s.Root 369 var err *etcdErr.Error 370 371 for i := 1; i < len(components); i++ { 372 if len(components[i]) == 0 { // ignore empty string 373 return curr, nil 374 } 375 376 curr, err = walkFunc(curr, components[i]) 377 if err != nil { 378 return nil, err 379 } 380 381 } 382 383 return curr, nil 384 } 385 386 // Update updates the value/ttl of the node. 387 // If the node is a file, the value and the ttl can be updated. 388 // If the node is a directory, only the ttl can be updated. 389 func (s *store) Update(nodePath string, newValue string, expireTime time.Time) (*Event, error) { 390 nodePath = path.Clean(path.Join("/", nodePath)) 391 // we do not allow the user to change "/" 392 if nodePath == "/" { 393 return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", s.CurrentIndex) 394 } 395 396 s.worldLock.Lock() 397 defer s.worldLock.Unlock() 398 399 currIndex, nextIndex := s.CurrentIndex, s.CurrentIndex+1 400 401 n, err := s.internalGet(nodePath) 402 403 if err != nil { // if the node does not exist, return error 404 s.Stats.Inc(UpdateFail) 405 return nil, err 406 } 407 408 e := newEvent(Update, nodePath, nextIndex, n.CreatedIndex) 409 e.PrevNode = n.Repr(false, false) 410 eNode := e.Node 411 412 if n.IsDir() && len(newValue) != 0 { 413 // if the node is a directory, we cannot update value to non-empty 414 s.Stats.Inc(UpdateFail) 415 return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, currIndex) 416 } 417 418 n.Write(newValue, nextIndex) 419 420 // copy the value for safety 421 newValueCopy := ustrings.Clone(newValue) 422 eNode.Value = &newValueCopy 423 424 // update ttl 425 n.UpdateTTL(expireTime) 426 427 eNode.Expiration, eNode.TTL = n.ExpirationAndTTL() 428 429 s.WatcherHub.notify(e) 430 431 s.Stats.Inc(UpdateSuccess) 432 433 s.CurrentIndex = nextIndex 434 435 return e, nil 436 } 437 438 func (s *store) internalCreate(nodePath string, dir bool, value string, unique, replace bool, 439 expireTime time.Time, action string) (*Event, error) { 440 441 currIndex, nextIndex := s.CurrentIndex, s.CurrentIndex+1 442 443 if unique { // append unique item under the node path 444 nodePath += "/" + strconv.FormatUint(nextIndex, 10) 445 } 446 447 nodePath = path.Clean(path.Join("/", nodePath)) 448 449 // we do not allow the user to change "/" 450 if nodePath == "/" { 451 return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", currIndex) 452 } 453 454 // Assume expire times that are way in the past are not valid. 455 // This can occur when the time is serialized to JSON and read back in. 456 if expireTime.Before(minExpireTime) { 457 expireTime = Permanent 458 } 459 460 dirName, nodeName := path.Split(nodePath) 461 462 // walk through the nodePath, create dirs and get the last directory node 463 d, err := s.walk(dirName, s.checkDir) 464 465 if err != nil { 466 s.Stats.Inc(SetFail) 467 err.Index = currIndex 468 return nil, err 469 } 470 471 e := newEvent(action, nodePath, nextIndex, nextIndex) 472 eNode := e.Node 473 474 n, _ := d.GetChild(nodeName) 475 476 // force will try to replace a existing file 477 if n != nil { 478 if replace { 479 if n.IsDir() { 480 return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, currIndex) 481 } 482 e.PrevNode = n.Repr(false, false) 483 484 n.Remove(false, false, nil) 485 } else { 486 return nil, etcdErr.NewError(etcdErr.EcodeNodeExist, nodePath, currIndex) 487 } 488 } 489 490 if !dir { // create file 491 // copy the value for safety 492 valueCopy := ustrings.Clone(value) 493 eNode.Value = &valueCopy 494 495 n = newKV(s, nodePath, value, nextIndex, d, "", expireTime) 496 497 } else { // create directory 498 eNode.Dir = true 499 500 n = newDir(s, nodePath, nextIndex, d, "", expireTime) 501 } 502 503 // we are sure d is a directory and does not have the children with name n.Name 504 d.Add(n) 505 506 // node with TTL 507 if !n.IsPermanent() { 508 s.ttlKeyHeap.push(n) 509 510 eNode.Expiration, eNode.TTL = n.ExpirationAndTTL() 511 } 512 513 s.CurrentIndex = nextIndex 514 515 s.WatcherHub.notify(e) 516 517 return e, nil 518 } 519 520 // InternalGet gets the node of the given nodePath. 521 func (s *store) internalGet(nodePath string) (*node, *etcdErr.Error) { 522 nodePath = path.Clean(path.Join("/", nodePath)) 523 524 walkFunc := func(parent *node, name string) (*node, *etcdErr.Error) { 525 526 if !parent.IsDir() { 527 err := etcdErr.NewError(etcdErr.EcodeNotDir, parent.Path, s.CurrentIndex) 528 return nil, err 529 } 530 531 child, ok := parent.Children[name] 532 if ok { 533 return child, nil 534 } 535 536 return nil, etcdErr.NewError(etcdErr.EcodeKeyNotFound, path.Join(parent.Path, name), s.CurrentIndex) 537 } 538 539 f, err := s.walk(nodePath, walkFunc) 540 541 if err != nil { 542 return nil, err 543 } 544 return f, nil 545 } 546 547 // deleteExpiredKyes will delete all 548 func (s *store) DeleteExpiredKeys(cutoff time.Time) { 549 s.worldLock.Lock() 550 defer s.worldLock.Unlock() 551 552 for { 553 node := s.ttlKeyHeap.top() 554 if node == nil || node.ExpireTime.After(cutoff) { 555 break 556 } 557 558 s.CurrentIndex++ 559 e := newEvent(Expire, node.Path, s.CurrentIndex, node.CreatedIndex) 560 e.PrevNode = node.Repr(false, false) 561 562 callback := func(path string) { // notify function 563 // notify the watchers with deleted set true 564 s.WatcherHub.notifyWatchers(e, path, true) 565 } 566 567 s.ttlKeyHeap.pop() 568 node.Remove(true, true, callback) 569 570 s.Stats.Inc(ExpireCount) 571 572 s.WatcherHub.notify(e) 573 } 574 575 } 576 577 // checkDir will check whether the component is a directory under parent node. 578 // If it is a directory, this function will return the pointer to that node. 579 // If it does not exist, this function will create a new directory and return the pointer to that node. 580 // If it is a file, this function will return error. 581 func (s *store) checkDir(parent *node, dirName string) (*node, *etcdErr.Error) { 582 node, ok := parent.Children[dirName] 583 584 if ok { 585 if node.IsDir() { 586 return node, nil 587 } 588 589 return nil, etcdErr.NewError(etcdErr.EcodeNotDir, node.Path, s.CurrentIndex) 590 } 591 592 n := newDir(s, path.Join(parent.Path, dirName), s.CurrentIndex+1, parent, parent.ACL, Permanent) 593 594 parent.Children[dirName] = n 595 596 return n, nil 597 } 598 599 // Save saves the static state of the store system. 600 // It will not be able to save the state of watchers. 601 // It will not save the parent field of the node. Or there will 602 // be cyclic dependencies issue for the json package. 603 func (s *store) Save() ([]byte, error) { 604 s.worldLock.Lock() 605 606 clonedStore := newStore() 607 clonedStore.CurrentIndex = s.CurrentIndex 608 clonedStore.Root = s.Root.Clone() 609 clonedStore.WatcherHub = s.WatcherHub.clone() 610 clonedStore.Stats = s.Stats.clone() 611 clonedStore.CurrentVersion = s.CurrentVersion 612 613 s.worldLock.Unlock() 614 615 b, err := json.Marshal(clonedStore) 616 617 if err != nil { 618 return nil, err 619 } 620 621 return b, nil 622 } 623 624 // Recovery recovers the store system from a static state 625 // It needs to recover the parent field of the nodes. 626 // It needs to delete the expired nodes since the saved time and also 627 // needs to create monitoring go routines. 628 func (s *store) Recovery(state []byte) error { 629 s.worldLock.Lock() 630 defer s.worldLock.Unlock() 631 err := json.Unmarshal(state, s) 632 633 if err != nil { 634 return err 635 } 636 637 s.ttlKeyHeap = newTtlKeyHeap() 638 639 s.Root.recoverAndclean() 640 return nil 641 } 642 643 func (s *store) JsonStats() []byte { 644 s.Stats.Watchers = uint64(s.WatcherHub.count) 645 return s.Stats.toJson() 646 } 647 648 func (s *store) TotalTransactions() uint64 { 649 return s.Stats.TotalTranscations() 650 }