github.com/kula/etcd@v0.2.1-0.20131226070625-e96234382ac0/store/store.go (about)

     1  /*
     2  Copyright 2013 CoreOS Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package store
    18  
    19  import (
    20  	"encoding/json"
    21  	"fmt"
    22  	"path"
    23  	"sort"
    24  	"strconv"
    25  	"strings"
    26  	"sync"
    27  	"time"
    28  
    29  	etcdErr "github.com/coreos/etcd/error"
    30  )
    31  
    32  // The default version to set when the store is first initialized.
    33  const defaultVersion = 2
    34  
    35  var minExpireTime time.Time
    36  
    37  func init() {
    38  	minExpireTime, _ = time.Parse(time.RFC3339, "2000-01-01T00:00:00Z")
    39  }
    40  
    41  type Store interface {
    42  	Version() int
    43  	CommandFactory() CommandFactory
    44  	Index() uint64
    45  
    46  	Get(nodePath string, recursive, sorted bool) (*Event, error)
    47  	Set(nodePath string, dir bool, value string, expireTime time.Time) (*Event, error)
    48  	Update(nodePath string, newValue string, expireTime time.Time) (*Event, error)
    49  	Create(nodePath string, dir bool, value string, unique bool,
    50  		expireTime time.Time) (*Event, error)
    51  	CompareAndSwap(nodePath string, prevValue string, prevIndex uint64,
    52  		value string, expireTime time.Time) (*Event, error)
    53  	Delete(nodePath string, recursive, dir bool) (*Event, error)
    54  	CompareAndDelete(nodePath string, prevValue string, prevIndex uint64) (*Event, error)
    55  	Watch(prefix string, recursive bool, sinceIndex uint64) (<-chan *Event, error)
    56  
    57  	Save() ([]byte, error)
    58  	Recovery(state []byte) error
    59  
    60  	TotalTransactions() uint64
    61  	JsonStats() []byte
    62  	DeleteExpiredKeys(cutoff time.Time)
    63  }
    64  
    65  type store struct {
    66  	Root           *node
    67  	WatcherHub     *watcherHub
    68  	CurrentIndex   uint64
    69  	Stats          *Stats
    70  	CurrentVersion int
    71  	ttlKeyHeap     *ttlKeyHeap  // need to recovery manually
    72  	worldLock      sync.RWMutex // stop the world lock
    73  }
    74  
    75  func New() Store {
    76  	return newStore()
    77  }
    78  
    79  func newStore() *store {
    80  	s := new(store)
    81  	s.CurrentVersion = defaultVersion
    82  	s.Root = newDir(s, "/", s.CurrentIndex, nil, "", Permanent)
    83  	s.Stats = newStats()
    84  	s.WatcherHub = newWatchHub(1000)
    85  	s.ttlKeyHeap = newTtlKeyHeap()
    86  	return s
    87  }
    88  
    89  // Version retrieves current version of the store.
    90  func (s *store) Version() int {
    91  	return s.CurrentVersion
    92  }
    93  
    94  // Retrieves current of the store
    95  func (s *store) Index() uint64 {
    96  	return s.CurrentIndex
    97  }
    98  
    99  // CommandFactory retrieves the command factory for the current version of the store.
   100  func (s *store) CommandFactory() CommandFactory {
   101  	return GetCommandFactory(s.Version())
   102  }
   103  
   104  // Get function returns a get event.
   105  // If recursive is true, it will return all the content under the node path.
   106  // If sorted is true, it will sort the content by keys.
   107  func (s *store) Get(nodePath string, recursive, sorted bool) (*Event, error) {
   108  	s.worldLock.RLock()
   109  	defer s.worldLock.RUnlock()
   110  
   111  	nodePath = path.Clean(path.Join("/", nodePath))
   112  
   113  	n, err := s.internalGet(nodePath)
   114  
   115  	if err != nil {
   116  		s.Stats.Inc(GetFail)
   117  		return nil, err
   118  	}
   119  
   120  	e := newEvent(Get, nodePath, n.ModifiedIndex, n.CreatedIndex)
   121  	eNode := e.Node
   122  
   123  	if n.IsDir() { // node is a directory
   124  		eNode.Dir = true
   125  
   126  		children, _ := n.List()
   127  		eNode.Nodes = make(NodeExterns, len(children))
   128  
   129  		// we do not use the index in the children slice directly
   130  		// we need to skip the hidden one
   131  		i := 0
   132  
   133  		for _, child := range children {
   134  			if child.IsHidden() { // get will not return hidden nodes
   135  				continue
   136  			}
   137  
   138  			eNode.Nodes[i] = child.Repr(recursive, sorted)
   139  			i++
   140  		}
   141  
   142  		// eliminate hidden nodes
   143  		eNode.Nodes = eNode.Nodes[:i]
   144  
   145  		if sorted {
   146  			sort.Sort(eNode.Nodes)
   147  		}
   148  
   149  	} else { // node is a file
   150  		eNode.Value, _ = n.Read()
   151  	}
   152  
   153  	eNode.Expiration, eNode.TTL = n.ExpirationAndTTL()
   154  
   155  	s.Stats.Inc(GetSuccess)
   156  
   157  	return e, nil
   158  }
   159  
   160  // Create function creates the node at nodePath. Create will help to create intermediate directories with no ttl.
   161  // If the node has already existed, create will fail.
   162  // If any node on the path is a file, create will fail.
   163  func (s *store) Create(nodePath string, dir bool, value string, unique bool, expireTime time.Time) (*Event, error) {
   164  	s.worldLock.Lock()
   165  	defer s.worldLock.Unlock()
   166  	e, err := s.internalCreate(nodePath, dir, value, unique, false, expireTime, Create)
   167  
   168  	if err == nil {
   169  		s.Stats.Inc(CreateSuccess)
   170  	} else {
   171  		s.Stats.Inc(CreateFail)
   172  	}
   173  
   174  	return e, err
   175  }
   176  
   177  // Set function creates or replace the node at nodePath.
   178  func (s *store) Set(nodePath string, dir bool, value string, expireTime time.Time) (*Event, error) {
   179  	s.worldLock.Lock()
   180  	defer s.worldLock.Unlock()
   181  	e, err := s.internalCreate(nodePath, dir, value, false, true, expireTime, Set)
   182  
   183  	if err == nil {
   184  		s.Stats.Inc(SetSuccess)
   185  	} else {
   186  		s.Stats.Inc(SetFail)
   187  	}
   188  
   189  	return e, err
   190  }
   191  
   192  func (s *store) CompareAndSwap(nodePath string, prevValue string, prevIndex uint64,
   193  	value string, expireTime time.Time) (*Event, error) {
   194  
   195  	nodePath = path.Clean(path.Join("/", nodePath))
   196  	// we do not allow the user to change "/"
   197  	if nodePath == "/" {
   198  		return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", s.CurrentIndex)
   199  	}
   200  
   201  	s.worldLock.Lock()
   202  	defer s.worldLock.Unlock()
   203  
   204  	n, err := s.internalGet(nodePath)
   205  
   206  	if err != nil {
   207  		s.Stats.Inc(CompareAndSwapFail)
   208  		return nil, err
   209  	}
   210  
   211  	if n.IsDir() { // can only compare and swap file
   212  		s.Stats.Inc(CompareAndSwapFail)
   213  		return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, s.CurrentIndex)
   214  	}
   215  
   216  	// If both of the prevValue and prevIndex are given, we will test both of them.
   217  	// Command will be executed, only if both of the tests are successful.
   218  	if !n.Compare(prevValue, prevIndex) {
   219  		cause := fmt.Sprintf("[%v != %v] [%v != %v]", prevValue, n.Value, prevIndex, n.ModifiedIndex)
   220  		s.Stats.Inc(CompareAndSwapFail)
   221  		return nil, etcdErr.NewError(etcdErr.EcodeTestFailed, cause, s.CurrentIndex)
   222  	}
   223  
   224  	// update etcd index
   225  	s.CurrentIndex++
   226  
   227  	e := newEvent(CompareAndSwap, nodePath, s.CurrentIndex, n.CreatedIndex)
   228  	eNode := e.Node
   229  
   230  	eNode.PrevValue = n.Value
   231  
   232  	// if test succeed, write the value
   233  	n.Write(value, s.CurrentIndex)
   234  	n.UpdateTTL(expireTime)
   235  
   236  	eNode.Value = value
   237  	eNode.Expiration, eNode.TTL = n.ExpirationAndTTL()
   238  
   239  	s.WatcherHub.notify(e)
   240  	s.Stats.Inc(CompareAndSwapSuccess)
   241  	return e, nil
   242  }
   243  
   244  // Delete function deletes the node at the given path.
   245  // If the node is a directory, recursive must be true to delete it.
   246  func (s *store) Delete(nodePath string, dir, recursive bool) (*Event, error) {
   247  	nodePath = path.Clean(path.Join("/", nodePath))
   248  	// we do not allow the user to change "/"
   249  	if nodePath == "/" {
   250  		return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", s.CurrentIndex)
   251  	}
   252  
   253  	s.worldLock.Lock()
   254  	defer s.worldLock.Unlock()
   255  
   256  	// recursive implies dir
   257  	if recursive == true {
   258  		dir = true
   259  	}
   260  
   261  	n, err := s.internalGet(nodePath)
   262  
   263  	if err != nil { // if the node does not exist, return error
   264  		s.Stats.Inc(DeleteFail)
   265  		return nil, err
   266  	}
   267  
   268  	nextIndex := s.CurrentIndex + 1
   269  	e := newEvent(Delete, nodePath, nextIndex, n.CreatedIndex)
   270  	eNode := e.Node
   271  
   272  	if n.IsDir() {
   273  		eNode.Dir = true
   274  	} else {
   275  		eNode.PrevValue = n.Value
   276  	}
   277  
   278  	callback := func(path string) { // notify function
   279  		// notify the watchers with deleted set true
   280  		s.WatcherHub.notifyWatchers(e, path, true)
   281  	}
   282  
   283  	err = n.Remove(dir, recursive, callback)
   284  
   285  	if err != nil {
   286  		s.Stats.Inc(DeleteFail)
   287  		return nil, err
   288  	}
   289  
   290  	// update etcd index
   291  	s.CurrentIndex++
   292  
   293  	s.WatcherHub.notify(e)
   294  	s.Stats.Inc(DeleteSuccess)
   295  
   296  	return e, nil
   297  }
   298  
   299  func (s *store) CompareAndDelete(nodePath string, prevValue string, prevIndex uint64) (*Event, error) {
   300  	nodePath = path.Clean(path.Join("/", nodePath))
   301  
   302  	s.worldLock.Lock()
   303  	defer s.worldLock.Unlock()
   304  
   305  	n, err := s.internalGet(nodePath)
   306  
   307  	if err != nil { // if the node does not exist, return error
   308  		s.Stats.Inc(CompareAndDeleteFail)
   309  		return nil, err
   310  	}
   311  
   312  	if n.IsDir() { // can only compare and delete file
   313  		s.Stats.Inc(CompareAndSwapFail)
   314  		return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, s.CurrentIndex)
   315  	}
   316  
   317  	// If both of the prevValue and prevIndex are given, we will test both of them.
   318  	// Command will be executed, only if both of the tests are successful.
   319  	if !n.Compare(prevValue, prevIndex) {
   320  		cause := fmt.Sprintf("[%v != %v] [%v != %v]", prevValue, n.Value, prevIndex, n.ModifiedIndex)
   321  		s.Stats.Inc(CompareAndDeleteFail)
   322  		return nil, etcdErr.NewError(etcdErr.EcodeTestFailed, cause, s.CurrentIndex)
   323  	}
   324  
   325  	// update etcd index
   326  	s.CurrentIndex++
   327  
   328  	e := newEvent(CompareAndDelete, nodePath, s.CurrentIndex, n.CreatedIndex)
   329  
   330  	callback := func(path string) { // notify function
   331  		// notify the watchers with deleted set true
   332  		s.WatcherHub.notifyWatchers(e, path, true)
   333  	}
   334  
   335  	// delete a key-value pair, no error should happen
   336  	n.Remove(false, false, callback)
   337  
   338  	s.WatcherHub.notify(e)
   339  	s.Stats.Inc(CompareAndDeleteSuccess)
   340  	return e, nil
   341  }
   342  
   343  func (s *store) Watch(key string, recursive bool, sinceIndex uint64) (<-chan *Event, error) {
   344  	key = path.Clean(path.Join("/", key))
   345  	nextIndex := s.CurrentIndex + 1
   346  
   347  	s.worldLock.RLock()
   348  	defer s.worldLock.RUnlock()
   349  
   350  	var c <-chan *Event
   351  	var err *etcdErr.Error
   352  
   353  	if sinceIndex == 0 {
   354  		c, err = s.WatcherHub.watch(key, recursive, nextIndex)
   355  
   356  	} else {
   357  		c, err = s.WatcherHub.watch(key, recursive, sinceIndex)
   358  	}
   359  
   360  	if err != nil {
   361  		// watchhub do not know the current Index
   362  		// we need to attach the currentIndex here
   363  		err.Index = s.CurrentIndex
   364  		return nil, err
   365  	}
   366  
   367  	return c, nil
   368  }
   369  
   370  // walk function walks all the nodePath and apply the walkFunc on each directory
   371  func (s *store) walk(nodePath string, walkFunc func(prev *node, component string) (*node, *etcdErr.Error)) (*node, *etcdErr.Error) {
   372  	components := strings.Split(nodePath, "/")
   373  
   374  	curr := s.Root
   375  	var err *etcdErr.Error
   376  
   377  	for i := 1; i < len(components); i++ {
   378  		if len(components[i]) == 0 { // ignore empty string
   379  			return curr, nil
   380  		}
   381  
   382  		curr, err = walkFunc(curr, components[i])
   383  		if err != nil {
   384  			return nil, err
   385  		}
   386  
   387  	}
   388  
   389  	return curr, nil
   390  }
   391  
   392  // Update function updates the value/ttl of the node.
   393  // If the node is a file, the value and the ttl can be updated.
   394  // If the node is a directory, only the ttl can be updated.
   395  func (s *store) Update(nodePath string, newValue string, expireTime time.Time) (*Event, error) {
   396  	nodePath = path.Clean(path.Join("/", nodePath))
   397  	// we do not allow the user to change "/"
   398  	if nodePath == "/" {
   399  		return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", s.CurrentIndex)
   400  	}
   401  
   402  	s.worldLock.Lock()
   403  	defer s.worldLock.Unlock()
   404  
   405  	currIndex, nextIndex := s.CurrentIndex, s.CurrentIndex+1
   406  
   407  	n, err := s.internalGet(nodePath)
   408  
   409  	if err != nil { // if the node does not exist, return error
   410  		s.Stats.Inc(UpdateFail)
   411  		return nil, err
   412  	}
   413  
   414  	e := newEvent(Update, nodePath, nextIndex, n.CreatedIndex)
   415  	eNode := e.Node
   416  
   417  	if n.IsDir() && len(newValue) != 0 {
   418  		// if the node is a directory, we cannot update value to non-empty
   419  		s.Stats.Inc(UpdateFail)
   420  		return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, currIndex)
   421  	}
   422  
   423  	eNode.PrevValue = n.Value
   424  	n.Write(newValue, nextIndex)
   425  	eNode.Value = newValue
   426  
   427  	// update ttl
   428  	n.UpdateTTL(expireTime)
   429  
   430  	eNode.Expiration, eNode.TTL = n.ExpirationAndTTL()
   431  
   432  	s.WatcherHub.notify(e)
   433  
   434  	s.Stats.Inc(UpdateSuccess)
   435  
   436  	s.CurrentIndex = nextIndex
   437  
   438  	return e, nil
   439  }
   440  
   441  func (s *store) internalCreate(nodePath string, dir bool, value string, unique, replace bool,
   442  	expireTime time.Time, action string) (*Event, error) {
   443  
   444  	currIndex, nextIndex := s.CurrentIndex, s.CurrentIndex+1
   445  
   446  	if unique { // append unique item under the node path
   447  		nodePath += "/" + strconv.FormatUint(nextIndex, 10)
   448  	}
   449  
   450  	nodePath = path.Clean(path.Join("/", nodePath))
   451  
   452  	// we do not allow the user to change "/"
   453  	if nodePath == "/" {
   454  		return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", currIndex)
   455  	}
   456  
   457  	// Assume expire times that are way in the past are not valid.
   458  	// This can occur when the time is serialized to JSON and read back in.
   459  	if expireTime.Before(minExpireTime) {
   460  		expireTime = Permanent
   461  	}
   462  
   463  	dirName, nodeName := path.Split(nodePath)
   464  
   465  	// walk through the nodePath, create dirs and get the last directory node
   466  	d, err := s.walk(dirName, s.checkDir)
   467  
   468  	if err != nil {
   469  		s.Stats.Inc(SetFail)
   470  		err.Index = currIndex
   471  		return nil, err
   472  	}
   473  
   474  	e := newEvent(action, nodePath, nextIndex, nextIndex)
   475  	eNode := e.Node
   476  
   477  	n, _ := d.GetChild(nodeName)
   478  
   479  	// force will try to replace a existing file
   480  	if n != nil {
   481  		if replace {
   482  			if n.IsDir() {
   483  				return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, currIndex)
   484  			}
   485  			eNode.PrevValue, _ = n.Read()
   486  
   487  			n.Remove(false, false, nil)
   488  		} else {
   489  			return nil, etcdErr.NewError(etcdErr.EcodeNodeExist, nodePath, currIndex)
   490  		}
   491  	}
   492  
   493  	if !dir { // create file
   494  		eNode.Value = value
   495  
   496  		n = newKV(s, nodePath, value, nextIndex, d, "", expireTime)
   497  
   498  	} else { // create directory
   499  		eNode.Dir = true
   500  
   501  		n = newDir(s, nodePath, nextIndex, d, "", expireTime)
   502  	}
   503  
   504  	// we are sure d is a directory and does not have the children with name n.Name
   505  	d.Add(n)
   506  
   507  	// node with TTL
   508  	if !n.IsPermanent() {
   509  		s.ttlKeyHeap.push(n)
   510  
   511  		eNode.Expiration, eNode.TTL = n.ExpirationAndTTL()
   512  	}
   513  
   514  	s.CurrentIndex = nextIndex
   515  
   516  	s.WatcherHub.notify(e)
   517  	return e, nil
   518  }
   519  
   520  // InternalGet function get the node of the given nodePath.
   521  func (s *store) internalGet(nodePath string) (*node, *etcdErr.Error) {
   522  	nodePath = path.Clean(path.Join("/", nodePath))
   523  
   524  	walkFunc := func(parent *node, name string) (*node, *etcdErr.Error) {
   525  
   526  		if !parent.IsDir() {
   527  			err := etcdErr.NewError(etcdErr.EcodeNotDir, parent.Path, s.CurrentIndex)
   528  			return nil, err
   529  		}
   530  
   531  		child, ok := parent.Children[name]
   532  		if ok {
   533  			return child, nil
   534  		}
   535  
   536  		return nil, etcdErr.NewError(etcdErr.EcodeKeyNotFound, path.Join(parent.Path, name), s.CurrentIndex)
   537  	}
   538  
   539  	f, err := s.walk(nodePath, walkFunc)
   540  
   541  	if err != nil {
   542  		return nil, err
   543  	}
   544  	return f, nil
   545  }
   546  
   547  // deleteExpiredKyes will delete all
   548  func (s *store) DeleteExpiredKeys(cutoff time.Time) {
   549  	s.worldLock.Lock()
   550  	defer s.worldLock.Unlock()
   551  
   552  	for {
   553  		node := s.ttlKeyHeap.top()
   554  		if node == nil || node.ExpireTime.After(cutoff) {
   555  			break
   556  		}
   557  
   558  		s.CurrentIndex++
   559  		e := newEvent(Expire, node.Path, s.CurrentIndex, node.CreatedIndex)
   560  
   561  		callback := func(path string) { // notify function
   562  			// notify the watchers with deleted set true
   563  			s.WatcherHub.notifyWatchers(e, path, true)
   564  		}
   565  
   566  		s.ttlKeyHeap.pop()
   567  		node.Remove(true, true, callback)
   568  
   569  		s.Stats.Inc(ExpireCount)
   570  		s.WatcherHub.notify(e)
   571  	}
   572  
   573  }
   574  
   575  // checkDir function will check whether the component is a directory under parent node.
   576  // If it is a directory, this function will return the pointer to that node.
   577  // If it does not exist, this function will create a new directory and return the pointer to that node.
   578  // If it is a file, this function will return error.
   579  func (s *store) checkDir(parent *node, dirName string) (*node, *etcdErr.Error) {
   580  	node, ok := parent.Children[dirName]
   581  
   582  	if ok {
   583  		if node.IsDir() {
   584  			return node, nil
   585  		}
   586  
   587  		return nil, etcdErr.NewError(etcdErr.EcodeNotDir, node.Path, s.CurrentIndex)
   588  	}
   589  
   590  	n := newDir(s, path.Join(parent.Path, dirName), s.CurrentIndex+1, parent, parent.ACL, Permanent)
   591  
   592  	parent.Children[dirName] = n
   593  
   594  	return n, nil
   595  }
   596  
   597  // Save function saves the static state of the store system.
   598  // Save function will not be able to save the state of watchers.
   599  // Save function will not save the parent field of the node. Or there will
   600  // be cyclic dependencies issue for the json package.
   601  func (s *store) Save() ([]byte, error) {
   602  	s.worldLock.Lock()
   603  
   604  	clonedStore := newStore()
   605  	clonedStore.CurrentIndex = s.CurrentIndex
   606  	clonedStore.Root = s.Root.Clone()
   607  	clonedStore.WatcherHub = s.WatcherHub.clone()
   608  	clonedStore.Stats = s.Stats.clone()
   609  	clonedStore.CurrentVersion = s.CurrentVersion
   610  
   611  	s.worldLock.Unlock()
   612  
   613  	b, err := json.Marshal(clonedStore)
   614  
   615  	if err != nil {
   616  		return nil, err
   617  	}
   618  
   619  	return b, nil
   620  }
   621  
   622  // recovery function recovery the store system from a static state.
   623  // It needs to recovery the parent field of the nodes.
   624  // It needs to delete the expired nodes since the saved time and also
   625  // need to create monitor go routines.
   626  func (s *store) Recovery(state []byte) error {
   627  	s.worldLock.Lock()
   628  	defer s.worldLock.Unlock()
   629  	err := json.Unmarshal(state, s)
   630  
   631  	if err != nil {
   632  		return err
   633  	}
   634  
   635  	s.ttlKeyHeap = newTtlKeyHeap()
   636  
   637  	s.Root.recoverAndclean()
   638  	return nil
   639  }
   640  
   641  func (s *store) JsonStats() []byte {
   642  	s.Stats.Watchers = uint64(s.WatcherHub.count)
   643  	return s.Stats.toJson()
   644  }
   645  
   646  func (s *store) TotalTransactions() uint64 {
   647  	return s.Stats.TotalTranscations()
   648  }