github.com/alpe/etcd@v0.1.2-0.20130915230056-09f31af88aeb/store/store.go (about) 1 package store 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "path" 7 "strconv" 8 "sync" 9 "time" 10 11 etcdErr "github.com/coreos/etcd/error" 12 ) 13 14 //------------------------------------------------------------------------------ 15 // 16 // Typedefs 17 // 18 //------------------------------------------------------------------------------ 19 20 // The main struct of the Key-Value store 21 type Store struct { 22 23 // key-value store structure 24 Tree *tree 25 26 // This mutex protects everything except add watcher member. 27 // Add watch member does not depend on the current state of the store. 28 // And watch will return when other protected function is called and reach 29 // the watching condition. 30 // It is needed so that clone() can atomically replicate the Store 31 // and do the log snapshot in a go routine. 32 mutex sync.RWMutex 33 34 // WatcherHub is where we register all the clients 35 // who issue a watch request 36 watcher *WatcherHub 37 38 // The string channel to send messages to the outside world 39 // Now we use it to send changes to the hub of the web service 40 messager chan<- string 41 42 // A map to keep the recent response to the clients 43 ResponseMap map[string]*Response 44 45 // The max number of the recent responses we can record 46 ResponseMaxSize int 47 48 // The current number of the recent responses we have recorded 49 ResponseCurrSize uint 50 51 // The index of the first recent responses we have 52 ResponseStartIndex uint64 53 54 // Current index of the raft machine 55 Index uint64 56 57 // Basic statistics information of etcd storage 58 BasicStats EtcdStats 59 } 60 61 // A Node represents a Value in the Key-Value pair in the store 62 // It has its value, expire time and a channel used to update the 63 // expire time (since we do countdown in a go routine, we need to 64 // communicate with it via channel) 65 type Node struct { 66 // The string value of the node 67 Value string `json:"value"` 68 69 // If the node is a permanent one the ExprieTime will be Unix(0,0) 70 // Otherwise after the expireTime, the node will be deleted 71 ExpireTime time.Time `json:"expireTime"` 72 73 // A channel to update the expireTime of the node 74 update chan time.Time `json:"-"` 75 } 76 77 // The response from the store to the user who issue a command 78 type Response struct { 79 Action string `json:"action"` 80 Key string `json:"key"` 81 Dir bool `json:"dir,omitempty"` 82 PrevValue string `json:"prevValue,omitempty"` 83 Value string `json:"value,omitempty"` 84 85 // If the key did not exist before the action, 86 // this field should be set to true 87 NewKey bool `json:"newKey,omitempty"` 88 89 Expiration *time.Time `json:"expiration,omitempty"` 90 91 // Time to live in second 92 TTL int64 `json:"ttl,omitempty"` 93 94 // The command index of the raft machine when the command is executed 95 Index uint64 `json:"index"` 96 } 97 98 // A listNode represent the simplest Key-Value pair with its type 99 // It is only used when do list opeartion 100 // We want to have a file system like store, thus we distingush "file" 101 // and "directory" 102 type ListNode struct { 103 Key string 104 Value string 105 Type string 106 } 107 108 var PERMANENT = time.Unix(0, 0) 109 110 //------------------------------------------------------------------------------ 111 // 112 // Methods 113 // 114 //------------------------------------------------------------------------------ 115 116 // Create a new stroe 117 // Arguement max is the max number of response we want to record 118 func CreateStore(max int) *Store { 119 s := new(Store) 120 121 s.messager = nil 122 123 s.ResponseMap = make(map[string]*Response) 124 s.ResponseStartIndex = 0 125 s.ResponseMaxSize = max 126 s.ResponseCurrSize = 0 127 128 s.Tree = &tree{ 129 &treeNode{ 130 Node{ 131 "/", 132 time.Unix(0, 0), 133 nil, 134 }, 135 true, 136 make(map[string]*treeNode), 137 }, 138 } 139 140 s.watcher = newWatcherHub() 141 142 return s 143 } 144 145 // Set the messager of the store 146 func (s *Store) SetMessager(messager chan<- string) { 147 s.messager = messager 148 } 149 150 func (s *Store) Set(key string, value string, expireTime time.Time, index uint64) ([]byte, error) { 151 s.mutex.Lock() 152 defer s.mutex.Unlock() 153 154 return s.internalSet(key, value, expireTime, index) 155 156 } 157 158 // Set the key to value with expiration time 159 func (s *Store) internalSet(key string, value string, expireTime time.Time, index uint64) ([]byte, error) { 160 //Update index 161 s.Index = index 162 163 //Update stats 164 s.BasicStats.Sets++ 165 166 key = path.Clean("/" + key) 167 168 isExpire := !expireTime.Equal(PERMANENT) 169 170 // base response 171 resp := Response{ 172 Action: "SET", 173 Key: key, 174 Value: value, 175 Index: index, 176 } 177 178 // When the slow follower receive the set command 179 // the key may be expired, we should not add the node 180 // also if the node exist, we need to delete the node 181 if isExpire && expireTime.Sub(time.Now()) < 0 { 182 return s.internalDelete(key, index) 183 } 184 185 var TTL int64 186 187 // Update ttl 188 if isExpire { 189 TTL = int64(expireTime.Sub(time.Now()) / time.Second) 190 resp.Expiration = &expireTime 191 resp.TTL = TTL 192 } 193 194 // Get the node 195 node, ok := s.Tree.get(key) 196 197 if ok { 198 // Update when node exists 199 200 // Node is not permanent 201 if !node.ExpireTime.Equal(PERMANENT) { 202 203 // If node is not permanent 204 // Update its expireTime 205 node.update <- expireTime 206 207 } else { 208 209 // If we want the permanent node to have expire time 210 // We need to create a go routine with a channel 211 if isExpire { 212 node.update = make(chan time.Time) 213 go s.monitorExpiration(key, node.update, expireTime) 214 } 215 216 } 217 218 // Update the information of the node 219 s.Tree.set(key, Node{value, expireTime, node.update}) 220 221 resp.PrevValue = node.Value 222 223 s.watcher.notify(resp) 224 225 msg, err := json.Marshal(resp) 226 227 // Send to the messager 228 if s.messager != nil && err == nil { 229 s.messager <- string(msg) 230 } 231 232 s.addToResponseMap(index, &resp) 233 234 return msg, err 235 236 // Add new node 237 } else { 238 239 update := make(chan time.Time) 240 241 ok := s.Tree.set(key, Node{value, expireTime, update}) 242 243 if !ok { 244 return nil, etcdErr.NewError(102, "set: "+key) 245 } 246 247 if isExpire { 248 go s.monitorExpiration(key, update, expireTime) 249 } 250 251 resp.NewKey = true 252 253 msg, err := json.Marshal(resp) 254 255 // Nofity the watcher 256 s.watcher.notify(resp) 257 258 // Send to the messager 259 if s.messager != nil && err == nil { 260 s.messager <- string(msg) 261 } 262 263 s.addToResponseMap(index, &resp) 264 return msg, err 265 } 266 267 } 268 269 // Get the value of the key and return the raw response 270 func (s *Store) internalGet(key string) *Response { 271 272 key = path.Clean("/" + key) 273 274 node, ok := s.Tree.get(key) 275 276 if ok { 277 var TTL int64 278 var isExpire bool = false 279 280 isExpire = !node.ExpireTime.Equal(PERMANENT) 281 282 resp := &Response{ 283 Action: "GET", 284 Key: key, 285 Value: node.Value, 286 Index: s.Index, 287 } 288 289 // Update ttl 290 if isExpire { 291 TTL = int64(node.ExpireTime.Sub(time.Now()) / time.Second) 292 resp.Expiration = &node.ExpireTime 293 resp.TTL = TTL 294 } 295 296 return resp 297 298 } else { 299 // we do not found the key 300 return nil 301 } 302 } 303 304 // Get all the items under key 305 // If key is a file return the file 306 // If key is a directory reuturn an array of files 307 func (s *Store) Get(key string) ([]byte, error) { 308 s.mutex.RLock() 309 defer s.mutex.RUnlock() 310 311 resps, err := s.RawGet(key) 312 313 if err != nil { 314 return nil, err 315 } 316 317 key = path.Clean("/" + key) 318 319 // If the number of resps == 1 and the response key 320 // is the key we query, a signal key-value should 321 // be returned 322 if len(resps) == 1 && resps[0].Key == key { 323 return json.Marshal(resps[0]) 324 } 325 326 return json.Marshal(resps) 327 } 328 329 func (s *Store) rawGetNode(key string, node *Node) ([]*Response, error) { 330 resps := make([]*Response, 1) 331 332 isExpire := !node.ExpireTime.Equal(PERMANENT) 333 334 resps[0] = &Response{ 335 Action: "GET", 336 Index: s.Index, 337 Key: key, 338 Value: node.Value, 339 } 340 341 // Update ttl 342 if isExpire { 343 TTL := int64(node.ExpireTime.Sub(time.Now()) / time.Second) 344 resps[0].Expiration = &node.ExpireTime 345 resps[0].TTL = TTL 346 } 347 348 return resps, nil 349 } 350 351 func (s *Store) rawGetNodeList(key string, keys []string, nodes []*Node) ([]*Response, error) { 352 resps := make([]*Response, len(nodes)) 353 354 // TODO: check if nodes and keys are the same length 355 for i := 0; i < len(nodes); i++ { 356 var TTL int64 357 var isExpire bool = false 358 359 isExpire = !nodes[i].ExpireTime.Equal(PERMANENT) 360 361 resps[i] = &Response{ 362 Action: "GET", 363 Index: s.Index, 364 Key: path.Join(key, keys[i]), 365 } 366 367 if len(nodes[i].Value) != 0 { 368 resps[i].Value = nodes[i].Value 369 } else { 370 resps[i].Dir = true 371 } 372 373 // Update ttl 374 if isExpire { 375 TTL = int64(nodes[i].ExpireTime.Sub(time.Now()) / time.Second) 376 resps[i].Expiration = &nodes[i].ExpireTime 377 resps[i].TTL = TTL 378 } 379 380 } 381 382 return resps, nil 383 } 384 385 func (s *Store) RawGet(key string) ([]*Response, error) { 386 // Update stats 387 s.BasicStats.Gets++ 388 389 key = path.Clean("/" + key) 390 391 nodes, keys, ok := s.Tree.list(key) 392 if !ok { 393 return nil, etcdErr.NewError(100, "get: "+key) 394 } 395 396 switch node := nodes.(type) { 397 case *Node: 398 return s.rawGetNode(key, node) 399 case []*Node: 400 return s.rawGetNodeList(key, keys, node) 401 default: 402 panic("invalid cast ") 403 } 404 } 405 406 func (s *Store) Delete(key string, index uint64) ([]byte, error) { 407 s.mutex.Lock() 408 defer s.mutex.Unlock() 409 return s.internalDelete(key, index) 410 } 411 412 // Delete the key 413 func (s *Store) internalDelete(key string, index uint64) ([]byte, error) { 414 415 // Update stats 416 s.BasicStats.Deletes++ 417 418 key = path.Clean("/" + key) 419 420 // Update index 421 s.Index = index 422 423 node, ok := s.Tree.get(key) 424 425 if !ok { 426 return nil, etcdErr.NewError(100, "delete: "+key) 427 } 428 429 resp := Response{ 430 Action: "DELETE", 431 Key: key, 432 PrevValue: node.Value, 433 Index: index, 434 } 435 436 if node.ExpireTime.Equal(PERMANENT) { 437 438 s.Tree.delete(key) 439 440 } else { 441 resp.Expiration = &node.ExpireTime 442 // Kill the expire go routine 443 node.update <- PERMANENT 444 s.Tree.delete(key) 445 446 } 447 448 msg, err := json.Marshal(resp) 449 450 s.watcher.notify(resp) 451 452 // notify the messager 453 if s.messager != nil && err == nil { 454 s.messager <- string(msg) 455 } 456 457 s.addToResponseMap(index, &resp) 458 459 return msg, err 460 } 461 462 // Set the value of the key to the value if the given prevValue is equal to the value of the key 463 func (s *Store) TestAndSet(key string, prevValue string, value string, expireTime time.Time, index uint64) ([]byte, error) { 464 s.mutex.Lock() 465 defer s.mutex.Unlock() 466 467 // Update stats 468 s.BasicStats.TestAndSets++ 469 470 resp := s.internalGet(key) 471 472 if resp == nil { 473 if prevValue != "" { 474 errmsg := fmt.Sprintf("TestAndSet: key not found and previousValue is not empty %s:%s ", key, prevValue) 475 return nil, etcdErr.NewError(100, errmsg) 476 } 477 return s.internalSet(key, value, expireTime, index) 478 } 479 480 if resp.Value == prevValue { 481 482 // If test succeed, do set 483 return s.internalSet(key, value, expireTime, index) 484 } else { 485 486 // If fails, return err 487 return nil, etcdErr.NewError(101, fmt.Sprintf("TestAndSet: %s!=%s", 488 resp.Value, prevValue)) 489 } 490 491 } 492 493 // Add a channel to the watchHub. 494 // The watchHub will send response to the channel when any key under the prefix 495 // changes [since the sinceIndex if given] 496 func (s *Store) AddWatcher(prefix string, watcher *Watcher, sinceIndex uint64) error { 497 return s.watcher.addWatcher(prefix, watcher, sinceIndex, s.ResponseStartIndex, s.Index, s.ResponseMap) 498 } 499 500 // This function should be created as a go routine to delete the key-value pair 501 // when it reaches expiration time 502 503 func (s *Store) monitorExpiration(key string, update chan time.Time, expireTime time.Time) { 504 505 duration := expireTime.Sub(time.Now()) 506 507 for { 508 select { 509 510 // Timeout delete the node 511 case <-time.After(duration): 512 node, ok := s.Tree.get(key) 513 514 if !ok { 515 return 516 517 } else { 518 s.mutex.Lock() 519 520 s.Tree.delete(key) 521 522 resp := Response{ 523 Action: "DELETE", 524 Key: key, 525 PrevValue: node.Value, 526 Expiration: &node.ExpireTime, 527 Index: s.Index, 528 } 529 s.mutex.Unlock() 530 531 msg, err := json.Marshal(resp) 532 533 s.watcher.notify(resp) 534 535 // notify the messager 536 if s.messager != nil && err == nil { 537 s.messager <- string(msg) 538 } 539 540 return 541 542 } 543 544 case updateTime := <-update: 545 // Update duration 546 // If the node become a permanent one, the go routine is 547 // not needed 548 if updateTime.Equal(PERMANENT) { 549 return 550 } 551 552 // Update duration 553 duration = updateTime.Sub(time.Now()) 554 } 555 } 556 } 557 558 // When we receive a command that will change the state of the key-value store 559 // We will add the result of it to the ResponseMap for the use of watch command 560 // Also we may remove the oldest response when we add new one 561 func (s *Store) addToResponseMap(index uint64, resp *Response) { 562 563 // zero case 564 if s.ResponseMaxSize == 0 { 565 return 566 } 567 568 strIndex := strconv.FormatUint(index, 10) 569 s.ResponseMap[strIndex] = resp 570 571 // unlimited 572 if s.ResponseMaxSize < 0 { 573 s.ResponseCurrSize++ 574 return 575 } 576 577 // if we reach the max point, we need to delete the most latest 578 // response and update the startIndex 579 if s.ResponseCurrSize == uint(s.ResponseMaxSize) { 580 s.ResponseStartIndex++ 581 delete(s.ResponseMap, strconv.FormatUint(s.ResponseStartIndex, 10)) 582 } else { 583 s.ResponseCurrSize++ 584 } 585 } 586 587 func (s *Store) clone() *Store { 588 newStore := &Store{ 589 ResponseMaxSize: s.ResponseMaxSize, 590 ResponseCurrSize: s.ResponseCurrSize, 591 ResponseStartIndex: s.ResponseStartIndex, 592 Index: s.Index, 593 BasicStats: s.BasicStats, 594 } 595 596 newStore.Tree = s.Tree.clone() 597 newStore.ResponseMap = make(map[string]*Response) 598 599 for index, response := range s.ResponseMap { 600 newStore.ResponseMap[index] = response 601 } 602 603 return newStore 604 } 605 606 // Save the current state of the storage system 607 func (s *Store) Save() ([]byte, error) { 608 // first we clone the store 609 // json is very slow, we cannot hold the lock for such a long time 610 s.mutex.Lock() 611 cloneStore := s.clone() 612 s.mutex.Unlock() 613 614 b, err := json.Marshal(cloneStore) 615 if err != nil { 616 fmt.Println(err) 617 return nil, err 618 } 619 return b, nil 620 } 621 622 // Recovery the state of the stroage system from a previous state 623 func (s *Store) Recovery(state []byte) error { 624 s.mutex.Lock() 625 defer s.mutex.Unlock() 626 // we need to stop all the current watchers 627 // recovery will clear watcherHub 628 s.watcher.stopWatchers() 629 630 err := json.Unmarshal(state, s) 631 632 // The only thing need to change after the recovery is the 633 // node with expiration time, we need to delete all the node 634 // that have been expired and setup go routines to monitor the 635 // other ones 636 s.checkExpiration() 637 638 return err 639 } 640 641 // Clean the expired nodes 642 // Set up go routines to mon 643 func (s *Store) checkExpiration() { 644 s.Tree.traverse(s.checkNode, false) 645 } 646 647 // Check each node 648 func (s *Store) checkNode(key string, node *Node) { 649 650 if node.ExpireTime.Equal(PERMANENT) { 651 return 652 } else { 653 if node.ExpireTime.Sub(time.Now()) >= time.Second { 654 655 node.update = make(chan time.Time) 656 go s.monitorExpiration(key, node.update, node.ExpireTime) 657 658 } else { 659 // we should delete this node 660 s.Tree.delete(key) 661 } 662 } 663 }