github.com/kula/etcd@v0.2.1-0.20131226070625-e96234382ac0/store/store.go (about) 1 /* 2 Copyright 2013 CoreOS Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package store 18 19 import ( 20 "encoding/json" 21 "fmt" 22 "path" 23 "sort" 24 "strconv" 25 "strings" 26 "sync" 27 "time" 28 29 etcdErr "github.com/coreos/etcd/error" 30 ) 31 32 // The default version to set when the store is first initialized. 33 const defaultVersion = 2 34 35 var minExpireTime time.Time 36 37 func init() { 38 minExpireTime, _ = time.Parse(time.RFC3339, "2000-01-01T00:00:00Z") 39 } 40 41 type Store interface { 42 Version() int 43 CommandFactory() CommandFactory 44 Index() uint64 45 46 Get(nodePath string, recursive, sorted bool) (*Event, error) 47 Set(nodePath string, dir bool, value string, expireTime time.Time) (*Event, error) 48 Update(nodePath string, newValue string, expireTime time.Time) (*Event, error) 49 Create(nodePath string, dir bool, value string, unique bool, 50 expireTime time.Time) (*Event, error) 51 CompareAndSwap(nodePath string, prevValue string, prevIndex uint64, 52 value string, expireTime time.Time) (*Event, error) 53 Delete(nodePath string, recursive, dir bool) (*Event, error) 54 CompareAndDelete(nodePath string, prevValue string, prevIndex uint64) (*Event, error) 55 Watch(prefix string, recursive bool, sinceIndex uint64) (<-chan *Event, error) 56 57 Save() ([]byte, error) 58 Recovery(state []byte) error 59 60 TotalTransactions() uint64 61 JsonStats() []byte 62 DeleteExpiredKeys(cutoff time.Time) 63 } 64 65 type store struct { 66 Root *node 67 WatcherHub *watcherHub 68 CurrentIndex uint64 69 Stats *Stats 70 CurrentVersion int 71 ttlKeyHeap *ttlKeyHeap // need to recovery manually 72 worldLock sync.RWMutex // stop the world lock 73 } 74 75 func New() Store { 76 return newStore() 77 } 78 79 func newStore() *store { 80 s := new(store) 81 s.CurrentVersion = defaultVersion 82 s.Root = newDir(s, "/", s.CurrentIndex, nil, "", Permanent) 83 s.Stats = newStats() 84 s.WatcherHub = newWatchHub(1000) 85 s.ttlKeyHeap = newTtlKeyHeap() 86 return s 87 } 88 89 // Version retrieves current version of the store. 90 func (s *store) Version() int { 91 return s.CurrentVersion 92 } 93 94 // Retrieves current of the store 95 func (s *store) Index() uint64 { 96 return s.CurrentIndex 97 } 98 99 // CommandFactory retrieves the command factory for the current version of the store. 100 func (s *store) CommandFactory() CommandFactory { 101 return GetCommandFactory(s.Version()) 102 } 103 104 // Get function returns a get event. 105 // If recursive is true, it will return all the content under the node path. 106 // If sorted is true, it will sort the content by keys. 107 func (s *store) Get(nodePath string, recursive, sorted bool) (*Event, error) { 108 s.worldLock.RLock() 109 defer s.worldLock.RUnlock() 110 111 nodePath = path.Clean(path.Join("/", nodePath)) 112 113 n, err := s.internalGet(nodePath) 114 115 if err != nil { 116 s.Stats.Inc(GetFail) 117 return nil, err 118 } 119 120 e := newEvent(Get, nodePath, n.ModifiedIndex, n.CreatedIndex) 121 eNode := e.Node 122 123 if n.IsDir() { // node is a directory 124 eNode.Dir = true 125 126 children, _ := n.List() 127 eNode.Nodes = make(NodeExterns, len(children)) 128 129 // we do not use the index in the children slice directly 130 // we need to skip the hidden one 131 i := 0 132 133 for _, child := range children { 134 if child.IsHidden() { // get will not return hidden nodes 135 continue 136 } 137 138 eNode.Nodes[i] = child.Repr(recursive, sorted) 139 i++ 140 } 141 142 // eliminate hidden nodes 143 eNode.Nodes = eNode.Nodes[:i] 144 145 if sorted { 146 sort.Sort(eNode.Nodes) 147 } 148 149 } else { // node is a file 150 eNode.Value, _ = n.Read() 151 } 152 153 eNode.Expiration, eNode.TTL = n.ExpirationAndTTL() 154 155 s.Stats.Inc(GetSuccess) 156 157 return e, nil 158 } 159 160 // Create function creates the node at nodePath. Create will help to create intermediate directories with no ttl. 161 // If the node has already existed, create will fail. 162 // If any node on the path is a file, create will fail. 163 func (s *store) Create(nodePath string, dir bool, value string, unique bool, expireTime time.Time) (*Event, error) { 164 s.worldLock.Lock() 165 defer s.worldLock.Unlock() 166 e, err := s.internalCreate(nodePath, dir, value, unique, false, expireTime, Create) 167 168 if err == nil { 169 s.Stats.Inc(CreateSuccess) 170 } else { 171 s.Stats.Inc(CreateFail) 172 } 173 174 return e, err 175 } 176 177 // Set function creates or replace the node at nodePath. 178 func (s *store) Set(nodePath string, dir bool, value string, expireTime time.Time) (*Event, error) { 179 s.worldLock.Lock() 180 defer s.worldLock.Unlock() 181 e, err := s.internalCreate(nodePath, dir, value, false, true, expireTime, Set) 182 183 if err == nil { 184 s.Stats.Inc(SetSuccess) 185 } else { 186 s.Stats.Inc(SetFail) 187 } 188 189 return e, err 190 } 191 192 func (s *store) CompareAndSwap(nodePath string, prevValue string, prevIndex uint64, 193 value string, expireTime time.Time) (*Event, error) { 194 195 nodePath = path.Clean(path.Join("/", nodePath)) 196 // we do not allow the user to change "/" 197 if nodePath == "/" { 198 return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", s.CurrentIndex) 199 } 200 201 s.worldLock.Lock() 202 defer s.worldLock.Unlock() 203 204 n, err := s.internalGet(nodePath) 205 206 if err != nil { 207 s.Stats.Inc(CompareAndSwapFail) 208 return nil, err 209 } 210 211 if n.IsDir() { // can only compare and swap file 212 s.Stats.Inc(CompareAndSwapFail) 213 return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, s.CurrentIndex) 214 } 215 216 // If both of the prevValue and prevIndex are given, we will test both of them. 217 // Command will be executed, only if both of the tests are successful. 218 if !n.Compare(prevValue, prevIndex) { 219 cause := fmt.Sprintf("[%v != %v] [%v != %v]", prevValue, n.Value, prevIndex, n.ModifiedIndex) 220 s.Stats.Inc(CompareAndSwapFail) 221 return nil, etcdErr.NewError(etcdErr.EcodeTestFailed, cause, s.CurrentIndex) 222 } 223 224 // update etcd index 225 s.CurrentIndex++ 226 227 e := newEvent(CompareAndSwap, nodePath, s.CurrentIndex, n.CreatedIndex) 228 eNode := e.Node 229 230 eNode.PrevValue = n.Value 231 232 // if test succeed, write the value 233 n.Write(value, s.CurrentIndex) 234 n.UpdateTTL(expireTime) 235 236 eNode.Value = value 237 eNode.Expiration, eNode.TTL = n.ExpirationAndTTL() 238 239 s.WatcherHub.notify(e) 240 s.Stats.Inc(CompareAndSwapSuccess) 241 return e, nil 242 } 243 244 // Delete function deletes the node at the given path. 245 // If the node is a directory, recursive must be true to delete it. 246 func (s *store) Delete(nodePath string, dir, recursive bool) (*Event, error) { 247 nodePath = path.Clean(path.Join("/", nodePath)) 248 // we do not allow the user to change "/" 249 if nodePath == "/" { 250 return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", s.CurrentIndex) 251 } 252 253 s.worldLock.Lock() 254 defer s.worldLock.Unlock() 255 256 // recursive implies dir 257 if recursive == true { 258 dir = true 259 } 260 261 n, err := s.internalGet(nodePath) 262 263 if err != nil { // if the node does not exist, return error 264 s.Stats.Inc(DeleteFail) 265 return nil, err 266 } 267 268 nextIndex := s.CurrentIndex + 1 269 e := newEvent(Delete, nodePath, nextIndex, n.CreatedIndex) 270 eNode := e.Node 271 272 if n.IsDir() { 273 eNode.Dir = true 274 } else { 275 eNode.PrevValue = n.Value 276 } 277 278 callback := func(path string) { // notify function 279 // notify the watchers with deleted set true 280 s.WatcherHub.notifyWatchers(e, path, true) 281 } 282 283 err = n.Remove(dir, recursive, callback) 284 285 if err != nil { 286 s.Stats.Inc(DeleteFail) 287 return nil, err 288 } 289 290 // update etcd index 291 s.CurrentIndex++ 292 293 s.WatcherHub.notify(e) 294 s.Stats.Inc(DeleteSuccess) 295 296 return e, nil 297 } 298 299 func (s *store) CompareAndDelete(nodePath string, prevValue string, prevIndex uint64) (*Event, error) { 300 nodePath = path.Clean(path.Join("/", nodePath)) 301 302 s.worldLock.Lock() 303 defer s.worldLock.Unlock() 304 305 n, err := s.internalGet(nodePath) 306 307 if err != nil { // if the node does not exist, return error 308 s.Stats.Inc(CompareAndDeleteFail) 309 return nil, err 310 } 311 312 if n.IsDir() { // can only compare and delete file 313 s.Stats.Inc(CompareAndSwapFail) 314 return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, s.CurrentIndex) 315 } 316 317 // If both of the prevValue and prevIndex are given, we will test both of them. 318 // Command will be executed, only if both of the tests are successful. 319 if !n.Compare(prevValue, prevIndex) { 320 cause := fmt.Sprintf("[%v != %v] [%v != %v]", prevValue, n.Value, prevIndex, n.ModifiedIndex) 321 s.Stats.Inc(CompareAndDeleteFail) 322 return nil, etcdErr.NewError(etcdErr.EcodeTestFailed, cause, s.CurrentIndex) 323 } 324 325 // update etcd index 326 s.CurrentIndex++ 327 328 e := newEvent(CompareAndDelete, nodePath, s.CurrentIndex, n.CreatedIndex) 329 330 callback := func(path string) { // notify function 331 // notify the watchers with deleted set true 332 s.WatcherHub.notifyWatchers(e, path, true) 333 } 334 335 // delete a key-value pair, no error should happen 336 n.Remove(false, false, callback) 337 338 s.WatcherHub.notify(e) 339 s.Stats.Inc(CompareAndDeleteSuccess) 340 return e, nil 341 } 342 343 func (s *store) Watch(key string, recursive bool, sinceIndex uint64) (<-chan *Event, error) { 344 key = path.Clean(path.Join("/", key)) 345 nextIndex := s.CurrentIndex + 1 346 347 s.worldLock.RLock() 348 defer s.worldLock.RUnlock() 349 350 var c <-chan *Event 351 var err *etcdErr.Error 352 353 if sinceIndex == 0 { 354 c, err = s.WatcherHub.watch(key, recursive, nextIndex) 355 356 } else { 357 c, err = s.WatcherHub.watch(key, recursive, sinceIndex) 358 } 359 360 if err != nil { 361 // watchhub do not know the current Index 362 // we need to attach the currentIndex here 363 err.Index = s.CurrentIndex 364 return nil, err 365 } 366 367 return c, nil 368 } 369 370 // walk function walks all the nodePath and apply the walkFunc on each directory 371 func (s *store) walk(nodePath string, walkFunc func(prev *node, component string) (*node, *etcdErr.Error)) (*node, *etcdErr.Error) { 372 components := strings.Split(nodePath, "/") 373 374 curr := s.Root 375 var err *etcdErr.Error 376 377 for i := 1; i < len(components); i++ { 378 if len(components[i]) == 0 { // ignore empty string 379 return curr, nil 380 } 381 382 curr, err = walkFunc(curr, components[i]) 383 if err != nil { 384 return nil, err 385 } 386 387 } 388 389 return curr, nil 390 } 391 392 // Update function updates the value/ttl of the node. 393 // If the node is a file, the value and the ttl can be updated. 394 // If the node is a directory, only the ttl can be updated. 395 func (s *store) Update(nodePath string, newValue string, expireTime time.Time) (*Event, error) { 396 nodePath = path.Clean(path.Join("/", nodePath)) 397 // we do not allow the user to change "/" 398 if nodePath == "/" { 399 return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", s.CurrentIndex) 400 } 401 402 s.worldLock.Lock() 403 defer s.worldLock.Unlock() 404 405 currIndex, nextIndex := s.CurrentIndex, s.CurrentIndex+1 406 407 n, err := s.internalGet(nodePath) 408 409 if err != nil { // if the node does not exist, return error 410 s.Stats.Inc(UpdateFail) 411 return nil, err 412 } 413 414 e := newEvent(Update, nodePath, nextIndex, n.CreatedIndex) 415 eNode := e.Node 416 417 if n.IsDir() && len(newValue) != 0 { 418 // if the node is a directory, we cannot update value to non-empty 419 s.Stats.Inc(UpdateFail) 420 return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, currIndex) 421 } 422 423 eNode.PrevValue = n.Value 424 n.Write(newValue, nextIndex) 425 eNode.Value = newValue 426 427 // update ttl 428 n.UpdateTTL(expireTime) 429 430 eNode.Expiration, eNode.TTL = n.ExpirationAndTTL() 431 432 s.WatcherHub.notify(e) 433 434 s.Stats.Inc(UpdateSuccess) 435 436 s.CurrentIndex = nextIndex 437 438 return e, nil 439 } 440 441 func (s *store) internalCreate(nodePath string, dir bool, value string, unique, replace bool, 442 expireTime time.Time, action string) (*Event, error) { 443 444 currIndex, nextIndex := s.CurrentIndex, s.CurrentIndex+1 445 446 if unique { // append unique item under the node path 447 nodePath += "/" + strconv.FormatUint(nextIndex, 10) 448 } 449 450 nodePath = path.Clean(path.Join("/", nodePath)) 451 452 // we do not allow the user to change "/" 453 if nodePath == "/" { 454 return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", currIndex) 455 } 456 457 // Assume expire times that are way in the past are not valid. 458 // This can occur when the time is serialized to JSON and read back in. 459 if expireTime.Before(minExpireTime) { 460 expireTime = Permanent 461 } 462 463 dirName, nodeName := path.Split(nodePath) 464 465 // walk through the nodePath, create dirs and get the last directory node 466 d, err := s.walk(dirName, s.checkDir) 467 468 if err != nil { 469 s.Stats.Inc(SetFail) 470 err.Index = currIndex 471 return nil, err 472 } 473 474 e := newEvent(action, nodePath, nextIndex, nextIndex) 475 eNode := e.Node 476 477 n, _ := d.GetChild(nodeName) 478 479 // force will try to replace a existing file 480 if n != nil { 481 if replace { 482 if n.IsDir() { 483 return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, currIndex) 484 } 485 eNode.PrevValue, _ = n.Read() 486 487 n.Remove(false, false, nil) 488 } else { 489 return nil, etcdErr.NewError(etcdErr.EcodeNodeExist, nodePath, currIndex) 490 } 491 } 492 493 if !dir { // create file 494 eNode.Value = value 495 496 n = newKV(s, nodePath, value, nextIndex, d, "", expireTime) 497 498 } else { // create directory 499 eNode.Dir = true 500 501 n = newDir(s, nodePath, nextIndex, d, "", expireTime) 502 } 503 504 // we are sure d is a directory and does not have the children with name n.Name 505 d.Add(n) 506 507 // node with TTL 508 if !n.IsPermanent() { 509 s.ttlKeyHeap.push(n) 510 511 eNode.Expiration, eNode.TTL = n.ExpirationAndTTL() 512 } 513 514 s.CurrentIndex = nextIndex 515 516 s.WatcherHub.notify(e) 517 return e, nil 518 } 519 520 // InternalGet function get the node of the given nodePath. 521 func (s *store) internalGet(nodePath string) (*node, *etcdErr.Error) { 522 nodePath = path.Clean(path.Join("/", nodePath)) 523 524 walkFunc := func(parent *node, name string) (*node, *etcdErr.Error) { 525 526 if !parent.IsDir() { 527 err := etcdErr.NewError(etcdErr.EcodeNotDir, parent.Path, s.CurrentIndex) 528 return nil, err 529 } 530 531 child, ok := parent.Children[name] 532 if ok { 533 return child, nil 534 } 535 536 return nil, etcdErr.NewError(etcdErr.EcodeKeyNotFound, path.Join(parent.Path, name), s.CurrentIndex) 537 } 538 539 f, err := s.walk(nodePath, walkFunc) 540 541 if err != nil { 542 return nil, err 543 } 544 return f, nil 545 } 546 547 // deleteExpiredKyes will delete all 548 func (s *store) DeleteExpiredKeys(cutoff time.Time) { 549 s.worldLock.Lock() 550 defer s.worldLock.Unlock() 551 552 for { 553 node := s.ttlKeyHeap.top() 554 if node == nil || node.ExpireTime.After(cutoff) { 555 break 556 } 557 558 s.CurrentIndex++ 559 e := newEvent(Expire, node.Path, s.CurrentIndex, node.CreatedIndex) 560 561 callback := func(path string) { // notify function 562 // notify the watchers with deleted set true 563 s.WatcherHub.notifyWatchers(e, path, true) 564 } 565 566 s.ttlKeyHeap.pop() 567 node.Remove(true, true, callback) 568 569 s.Stats.Inc(ExpireCount) 570 s.WatcherHub.notify(e) 571 } 572 573 } 574 575 // checkDir function will check whether the component is a directory under parent node. 576 // If it is a directory, this function will return the pointer to that node. 577 // If it does not exist, this function will create a new directory and return the pointer to that node. 578 // If it is a file, this function will return error. 579 func (s *store) checkDir(parent *node, dirName string) (*node, *etcdErr.Error) { 580 node, ok := parent.Children[dirName] 581 582 if ok { 583 if node.IsDir() { 584 return node, nil 585 } 586 587 return nil, etcdErr.NewError(etcdErr.EcodeNotDir, node.Path, s.CurrentIndex) 588 } 589 590 n := newDir(s, path.Join(parent.Path, dirName), s.CurrentIndex+1, parent, parent.ACL, Permanent) 591 592 parent.Children[dirName] = n 593 594 return n, nil 595 } 596 597 // Save function saves the static state of the store system. 598 // Save function will not be able to save the state of watchers. 599 // Save function will not save the parent field of the node. Or there will 600 // be cyclic dependencies issue for the json package. 601 func (s *store) Save() ([]byte, error) { 602 s.worldLock.Lock() 603 604 clonedStore := newStore() 605 clonedStore.CurrentIndex = s.CurrentIndex 606 clonedStore.Root = s.Root.Clone() 607 clonedStore.WatcherHub = s.WatcherHub.clone() 608 clonedStore.Stats = s.Stats.clone() 609 clonedStore.CurrentVersion = s.CurrentVersion 610 611 s.worldLock.Unlock() 612 613 b, err := json.Marshal(clonedStore) 614 615 if err != nil { 616 return nil, err 617 } 618 619 return b, nil 620 } 621 622 // recovery function recovery the store system from a static state. 623 // It needs to recovery the parent field of the nodes. 624 // It needs to delete the expired nodes since the saved time and also 625 // need to create monitor go routines. 626 func (s *store) Recovery(state []byte) error { 627 s.worldLock.Lock() 628 defer s.worldLock.Unlock() 629 err := json.Unmarshal(state, s) 630 631 if err != nil { 632 return err 633 } 634 635 s.ttlKeyHeap = newTtlKeyHeap() 636 637 s.Root.recoverAndclean() 638 return nil 639 } 640 641 func (s *store) JsonStats() []byte { 642 s.Stats.Watchers = uint64(s.WatcherHub.count) 643 return s.Stats.toJson() 644 } 645 646 func (s *store) TotalTransactions() uint64 { 647 return s.Stats.TotalTranscations() 648 }