github.com/macb/etcd@v0.3.1-0.20140227003422-a60481c6b1a0/store/store.go (about)

     1  /*
     2  Copyright 2013 CoreOS Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package store
    18  
    19  import (
    20  	"encoding/json"
    21  	"fmt"
    22  	"path"
    23  	"strconv"
    24  	"strings"
    25  	"sync"
    26  	"time"
    27  
    28  	etcdErr "github.com/coreos/etcd/error"
    29  	ustrings "github.com/coreos/etcd/pkg/strings"
    30  )
    31  
    32  // The default version to set when the store is first initialized.
    33  const defaultVersion = 2
    34  
    35  var minExpireTime time.Time
    36  
    37  func init() {
    38  	minExpireTime, _ = time.Parse(time.RFC3339, "2000-01-01T00:00:00Z")
    39  }
    40  
    41  type Store interface {
    42  	Version() int
    43  	CommandFactory() CommandFactory
    44  	Index() uint64
    45  
    46  	Get(nodePath string, recursive, sorted bool) (*Event, error)
    47  	Set(nodePath string, dir bool, value string, expireTime time.Time) (*Event, error)
    48  	Update(nodePath string, newValue string, expireTime time.Time) (*Event, error)
    49  	Create(nodePath string, dir bool, value string, unique bool,
    50  		expireTime time.Time) (*Event, error)
    51  	CompareAndSwap(nodePath string, prevValue string, prevIndex uint64,
    52  		value string, expireTime time.Time) (*Event, error)
    53  	Delete(nodePath string, recursive, dir bool) (*Event, error)
    54  	CompareAndDelete(nodePath string, prevValue string, prevIndex uint64) (*Event, error)
    55  
    56  	Watch(prefix string, recursive, stream bool, sinceIndex uint64) (*Watcher, error)
    57  
    58  	Save() ([]byte, error)
    59  	Recovery(state []byte) error
    60  
    61  	TotalTransactions() uint64
    62  	JsonStats() []byte
    63  	DeleteExpiredKeys(cutoff time.Time)
    64  }
    65  
    66  type store struct {
    67  	Root           *node
    68  	WatcherHub     *watcherHub
    69  	CurrentIndex   uint64
    70  	Stats          *Stats
    71  	CurrentVersion int
    72  	ttlKeyHeap     *ttlKeyHeap  // need to recovery manually
    73  	worldLock      sync.RWMutex // stop the world lock
    74  }
    75  
    76  func New() Store {
    77  	return newStore()
    78  }
    79  
    80  func newStore() *store {
    81  	s := new(store)
    82  	s.CurrentVersion = defaultVersion
    83  	s.Root = newDir(s, "/", s.CurrentIndex, nil, "", Permanent)
    84  	s.Stats = newStats()
    85  	s.WatcherHub = newWatchHub(1000)
    86  	s.ttlKeyHeap = newTtlKeyHeap()
    87  	return s
    88  }
    89  
    90  // Version retrieves current version of the store.
    91  func (s *store) Version() int {
    92  	return s.CurrentVersion
    93  }
    94  
    95  // Retrieves current of the store
    96  func (s *store) Index() uint64 {
    97  	return s.CurrentIndex
    98  }
    99  
   100  // CommandFactory retrieves the command factory for the current version of the store.
   101  func (s *store) CommandFactory() CommandFactory {
   102  	return GetCommandFactory(s.Version())
   103  }
   104  
   105  // Get returns a get event.
   106  // If recursive is true, it will return all the content under the node path.
   107  // If sorted is true, it will sort the content by keys.
   108  func (s *store) Get(nodePath string, recursive, sorted bool) (*Event, error) {
   109  	s.worldLock.RLock()
   110  	defer s.worldLock.RUnlock()
   111  
   112  	nodePath = path.Clean(path.Join("/", nodePath))
   113  
   114  	n, err := s.internalGet(nodePath)
   115  
   116  	if err != nil {
   117  		s.Stats.Inc(GetFail)
   118  		return nil, err
   119  	}
   120  
   121  	e := newEvent(Get, nodePath, n.ModifiedIndex, n.CreatedIndex)
   122  	e.Node.loadInternalNode(n, recursive, sorted)
   123  
   124  	s.Stats.Inc(GetSuccess)
   125  
   126  	return e, nil
   127  }
   128  
   129  // Create creates the node at nodePath. Create will help to create intermediate directories with no ttl.
   130  // If the node has already existed, create will fail.
   131  // If any node on the path is a file, create will fail.
   132  func (s *store) Create(nodePath string, dir bool, value string, unique bool, expireTime time.Time) (*Event, error) {
   133  	s.worldLock.Lock()
   134  	defer s.worldLock.Unlock()
   135  	e, err := s.internalCreate(nodePath, dir, value, unique, false, expireTime, Create)
   136  
   137  	if err == nil {
   138  		s.Stats.Inc(CreateSuccess)
   139  	} else {
   140  		s.Stats.Inc(CreateFail)
   141  	}
   142  
   143  	return e, err
   144  }
   145  
   146  // Set creates or replace the node at nodePath.
   147  func (s *store) Set(nodePath string, dir bool, value string, expireTime time.Time) (*Event, error) {
   148  	var err error
   149  
   150  	s.worldLock.Lock()
   151  	defer s.worldLock.Unlock()
   152  
   153  	defer func() {
   154  		if err == nil {
   155  			s.Stats.Inc(SetSuccess)
   156  		} else {
   157  			s.Stats.Inc(SetFail)
   158  		}
   159  	}()
   160  
   161  	// Get prevNode value
   162  	n, getErr := s.internalGet(nodePath)
   163  	if getErr != nil && getErr.ErrorCode != etcdErr.EcodeKeyNotFound {
   164  		err = getErr
   165  		return nil, err
   166  	}
   167  
   168  	// Set new value
   169  	e, err := s.internalCreate(nodePath, dir, value, false, true, expireTime, Set)
   170  	if err != nil {
   171  		return nil, err
   172  	}
   173  
   174  	// Put prevNode into event
   175  	if getErr == nil {
   176  		prev := newEvent(Get, nodePath, n.ModifiedIndex, n.CreatedIndex)
   177  		prev.Node.loadInternalNode(n, false, false)
   178  		e.PrevNode = prev.Node
   179  	}
   180  
   181  	return e, nil
   182  }
   183  
   184  func (s *store) CompareAndSwap(nodePath string, prevValue string, prevIndex uint64,
   185  	value string, expireTime time.Time) (*Event, error) {
   186  
   187  	nodePath = path.Clean(path.Join("/", nodePath))
   188  	// we do not allow the user to change "/"
   189  	if nodePath == "/" {
   190  		return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", s.CurrentIndex)
   191  	}
   192  
   193  	s.worldLock.Lock()
   194  	defer s.worldLock.Unlock()
   195  
   196  	n, err := s.internalGet(nodePath)
   197  
   198  	if err != nil {
   199  		s.Stats.Inc(CompareAndSwapFail)
   200  		return nil, err
   201  	}
   202  
   203  	if n.IsDir() { // can only compare and swap file
   204  		s.Stats.Inc(CompareAndSwapFail)
   205  		return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, s.CurrentIndex)
   206  	}
   207  
   208  	// If both of the prevValue and prevIndex are given, we will test both of them.
   209  	// Command will be executed, only if both of the tests are successful.
   210  	if !n.Compare(prevValue, prevIndex) {
   211  		cause := fmt.Sprintf("[%v != %v] [%v != %v]", prevValue, n.Value, prevIndex, n.ModifiedIndex)
   212  		s.Stats.Inc(CompareAndSwapFail)
   213  		return nil, etcdErr.NewError(etcdErr.EcodeTestFailed, cause, s.CurrentIndex)
   214  	}
   215  
   216  	// update etcd index
   217  	s.CurrentIndex++
   218  
   219  	e := newEvent(CompareAndSwap, nodePath, s.CurrentIndex, n.CreatedIndex)
   220  	e.PrevNode = n.Repr(false, false)
   221  	eNode := e.Node
   222  
   223  	// if test succeed, write the value
   224  	n.Write(value, s.CurrentIndex)
   225  	n.UpdateTTL(expireTime)
   226  
   227  	// copy the value for safety
   228  	valueCopy := ustrings.Clone(value)
   229  	eNode.Value = &valueCopy
   230  	eNode.Expiration, eNode.TTL = n.ExpirationAndTTL()
   231  
   232  	s.WatcherHub.notify(e)
   233  	s.Stats.Inc(CompareAndSwapSuccess)
   234  	return e, nil
   235  }
   236  
   237  // Delete deletes the node at the given path.
   238  // If the node is a directory, recursive must be true to delete it.
   239  func (s *store) Delete(nodePath string, dir, recursive bool) (*Event, error) {
   240  	nodePath = path.Clean(path.Join("/", nodePath))
   241  	// we do not allow the user to change "/"
   242  	if nodePath == "/" {
   243  		return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", s.CurrentIndex)
   244  	}
   245  
   246  	s.worldLock.Lock()
   247  	defer s.worldLock.Unlock()
   248  
   249  	// recursive implies dir
   250  	if recursive == true {
   251  		dir = true
   252  	}
   253  
   254  	n, err := s.internalGet(nodePath)
   255  
   256  	if err != nil { // if the node does not exist, return error
   257  		s.Stats.Inc(DeleteFail)
   258  		return nil, err
   259  	}
   260  
   261  	nextIndex := s.CurrentIndex + 1
   262  	e := newEvent(Delete, nodePath, nextIndex, n.CreatedIndex)
   263  	e.PrevNode = n.Repr(false, false)
   264  	eNode := e.Node
   265  
   266  	if n.IsDir() {
   267  		eNode.Dir = true
   268  	}
   269  
   270  	callback := func(path string) { // notify function
   271  		// notify the watchers with deleted set true
   272  		s.WatcherHub.notifyWatchers(e, path, true)
   273  	}
   274  
   275  	err = n.Remove(dir, recursive, callback)
   276  
   277  	if err != nil {
   278  		s.Stats.Inc(DeleteFail)
   279  		return nil, err
   280  	}
   281  
   282  	// update etcd index
   283  	s.CurrentIndex++
   284  
   285  	s.WatcherHub.notify(e)
   286  
   287  	s.Stats.Inc(DeleteSuccess)
   288  
   289  	return e, nil
   290  }
   291  
   292  func (s *store) CompareAndDelete(nodePath string, prevValue string, prevIndex uint64) (*Event, error) {
   293  	nodePath = path.Clean(path.Join("/", nodePath))
   294  
   295  	s.worldLock.Lock()
   296  	defer s.worldLock.Unlock()
   297  
   298  	n, err := s.internalGet(nodePath)
   299  
   300  	if err != nil { // if the node does not exist, return error
   301  		s.Stats.Inc(CompareAndDeleteFail)
   302  		return nil, err
   303  	}
   304  
   305  	if n.IsDir() { // can only compare and delete file
   306  		s.Stats.Inc(CompareAndSwapFail)
   307  		return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, s.CurrentIndex)
   308  	}
   309  
   310  	// If both of the prevValue and prevIndex are given, we will test both of them.
   311  	// Command will be executed, only if both of the tests are successful.
   312  	if !n.Compare(prevValue, prevIndex) {
   313  		cause := fmt.Sprintf("[%v != %v] [%v != %v]", prevValue, n.Value, prevIndex, n.ModifiedIndex)
   314  		s.Stats.Inc(CompareAndDeleteFail)
   315  		return nil, etcdErr.NewError(etcdErr.EcodeTestFailed, cause, s.CurrentIndex)
   316  	}
   317  
   318  	// update etcd index
   319  	s.CurrentIndex++
   320  
   321  	e := newEvent(CompareAndDelete, nodePath, s.CurrentIndex, n.CreatedIndex)
   322  	e.PrevNode = n.Repr(false, false)
   323  
   324  	callback := func(path string) { // notify function
   325  		// notify the watchers with deleted set true
   326  		s.WatcherHub.notifyWatchers(e, path, true)
   327  	}
   328  
   329  	// delete a key-value pair, no error should happen
   330  	n.Remove(false, false, callback)
   331  
   332  	s.WatcherHub.notify(e)
   333  	s.Stats.Inc(CompareAndDeleteSuccess)
   334  	return e, nil
   335  }
   336  
   337  func (s *store) Watch(key string, recursive, stream bool, sinceIndex uint64) (*Watcher, error) {
   338  	key = path.Clean(path.Join("/", key))
   339  	nextIndex := s.CurrentIndex + 1
   340  
   341  	s.worldLock.RLock()
   342  	defer s.worldLock.RUnlock()
   343  
   344  	var w *Watcher
   345  	var err *etcdErr.Error
   346  
   347  	if sinceIndex == 0 {
   348  		w, err = s.WatcherHub.watch(key, recursive, stream, nextIndex)
   349  
   350  	} else {
   351  		w, err = s.WatcherHub.watch(key, recursive, stream, sinceIndex)
   352  	}
   353  
   354  	if err != nil {
   355  		// watchhub do not know the current Index
   356  		// we need to attach the currentIndex here
   357  		err.Index = s.CurrentIndex
   358  		return nil, err
   359  	}
   360  
   361  	return w, nil
   362  }
   363  
   364  // walk walks all the nodePath and apply the walkFunc on each directory
   365  func (s *store) walk(nodePath string, walkFunc func(prev *node, component string) (*node, *etcdErr.Error)) (*node, *etcdErr.Error) {
   366  	components := strings.Split(nodePath, "/")
   367  
   368  	curr := s.Root
   369  	var err *etcdErr.Error
   370  
   371  	for i := 1; i < len(components); i++ {
   372  		if len(components[i]) == 0 { // ignore empty string
   373  			return curr, nil
   374  		}
   375  
   376  		curr, err = walkFunc(curr, components[i])
   377  		if err != nil {
   378  			return nil, err
   379  		}
   380  
   381  	}
   382  
   383  	return curr, nil
   384  }
   385  
   386  // Update updates the value/ttl of the node.
   387  // If the node is a file, the value and the ttl can be updated.
   388  // If the node is a directory, only the ttl can be updated.
   389  func (s *store) Update(nodePath string, newValue string, expireTime time.Time) (*Event, error) {
   390  	nodePath = path.Clean(path.Join("/", nodePath))
   391  	// we do not allow the user to change "/"
   392  	if nodePath == "/" {
   393  		return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", s.CurrentIndex)
   394  	}
   395  
   396  	s.worldLock.Lock()
   397  	defer s.worldLock.Unlock()
   398  
   399  	currIndex, nextIndex := s.CurrentIndex, s.CurrentIndex+1
   400  
   401  	n, err := s.internalGet(nodePath)
   402  
   403  	if err != nil { // if the node does not exist, return error
   404  		s.Stats.Inc(UpdateFail)
   405  		return nil, err
   406  	}
   407  
   408  	e := newEvent(Update, nodePath, nextIndex, n.CreatedIndex)
   409  	e.PrevNode = n.Repr(false, false)
   410  	eNode := e.Node
   411  
   412  	if n.IsDir() && len(newValue) != 0 {
   413  		// if the node is a directory, we cannot update value to non-empty
   414  		s.Stats.Inc(UpdateFail)
   415  		return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, currIndex)
   416  	}
   417  
   418  	n.Write(newValue, nextIndex)
   419  
   420  	// copy the value for safety
   421  	newValueCopy := ustrings.Clone(newValue)
   422  	eNode.Value = &newValueCopy
   423  
   424  	// update ttl
   425  	n.UpdateTTL(expireTime)
   426  
   427  	eNode.Expiration, eNode.TTL = n.ExpirationAndTTL()
   428  
   429  	s.WatcherHub.notify(e)
   430  
   431  	s.Stats.Inc(UpdateSuccess)
   432  
   433  	s.CurrentIndex = nextIndex
   434  
   435  	return e, nil
   436  }
   437  
   438  func (s *store) internalCreate(nodePath string, dir bool, value string, unique, replace bool,
   439  	expireTime time.Time, action string) (*Event, error) {
   440  
   441  	currIndex, nextIndex := s.CurrentIndex, s.CurrentIndex+1
   442  
   443  	if unique { // append unique item under the node path
   444  		nodePath += "/" + strconv.FormatUint(nextIndex, 10)
   445  	}
   446  
   447  	nodePath = path.Clean(path.Join("/", nodePath))
   448  
   449  	// we do not allow the user to change "/"
   450  	if nodePath == "/" {
   451  		return nil, etcdErr.NewError(etcdErr.EcodeRootROnly, "/", currIndex)
   452  	}
   453  
   454  	// Assume expire times that are way in the past are not valid.
   455  	// This can occur when the time is serialized to JSON and read back in.
   456  	if expireTime.Before(minExpireTime) {
   457  		expireTime = Permanent
   458  	}
   459  
   460  	dirName, nodeName := path.Split(nodePath)
   461  
   462  	// walk through the nodePath, create dirs and get the last directory node
   463  	d, err := s.walk(dirName, s.checkDir)
   464  
   465  	if err != nil {
   466  		s.Stats.Inc(SetFail)
   467  		err.Index = currIndex
   468  		return nil, err
   469  	}
   470  
   471  	e := newEvent(action, nodePath, nextIndex, nextIndex)
   472  	eNode := e.Node
   473  
   474  	n, _ := d.GetChild(nodeName)
   475  
   476  	// force will try to replace a existing file
   477  	if n != nil {
   478  		if replace {
   479  			if n.IsDir() {
   480  				return nil, etcdErr.NewError(etcdErr.EcodeNotFile, nodePath, currIndex)
   481  			}
   482  			e.PrevNode = n.Repr(false, false)
   483  
   484  			n.Remove(false, false, nil)
   485  		} else {
   486  			return nil, etcdErr.NewError(etcdErr.EcodeNodeExist, nodePath, currIndex)
   487  		}
   488  	}
   489  
   490  	if !dir { // create file
   491  		// copy the value for safety
   492  		valueCopy := ustrings.Clone(value)
   493  		eNode.Value = &valueCopy
   494  
   495  		n = newKV(s, nodePath, value, nextIndex, d, "", expireTime)
   496  
   497  	} else { // create directory
   498  		eNode.Dir = true
   499  
   500  		n = newDir(s, nodePath, nextIndex, d, "", expireTime)
   501  	}
   502  
   503  	// we are sure d is a directory and does not have the children with name n.Name
   504  	d.Add(n)
   505  
   506  	// node with TTL
   507  	if !n.IsPermanent() {
   508  		s.ttlKeyHeap.push(n)
   509  
   510  		eNode.Expiration, eNode.TTL = n.ExpirationAndTTL()
   511  	}
   512  
   513  	s.CurrentIndex = nextIndex
   514  
   515  	s.WatcherHub.notify(e)
   516  
   517  	return e, nil
   518  }
   519  
   520  // InternalGet gets the node of the given nodePath.
   521  func (s *store) internalGet(nodePath string) (*node, *etcdErr.Error) {
   522  	nodePath = path.Clean(path.Join("/", nodePath))
   523  
   524  	walkFunc := func(parent *node, name string) (*node, *etcdErr.Error) {
   525  
   526  		if !parent.IsDir() {
   527  			err := etcdErr.NewError(etcdErr.EcodeNotDir, parent.Path, s.CurrentIndex)
   528  			return nil, err
   529  		}
   530  
   531  		child, ok := parent.Children[name]
   532  		if ok {
   533  			return child, nil
   534  		}
   535  
   536  		return nil, etcdErr.NewError(etcdErr.EcodeKeyNotFound, path.Join(parent.Path, name), s.CurrentIndex)
   537  	}
   538  
   539  	f, err := s.walk(nodePath, walkFunc)
   540  
   541  	if err != nil {
   542  		return nil, err
   543  	}
   544  	return f, nil
   545  }
   546  
   547  // deleteExpiredKyes will delete all
   548  func (s *store) DeleteExpiredKeys(cutoff time.Time) {
   549  	s.worldLock.Lock()
   550  	defer s.worldLock.Unlock()
   551  
   552  	for {
   553  		node := s.ttlKeyHeap.top()
   554  		if node == nil || node.ExpireTime.After(cutoff) {
   555  			break
   556  		}
   557  
   558  		s.CurrentIndex++
   559  		e := newEvent(Expire, node.Path, s.CurrentIndex, node.CreatedIndex)
   560  		e.PrevNode = node.Repr(false, false)
   561  
   562  		callback := func(path string) { // notify function
   563  			// notify the watchers with deleted set true
   564  			s.WatcherHub.notifyWatchers(e, path, true)
   565  		}
   566  
   567  		s.ttlKeyHeap.pop()
   568  		node.Remove(true, true, callback)
   569  
   570  		s.Stats.Inc(ExpireCount)
   571  
   572  		s.WatcherHub.notify(e)
   573  	}
   574  
   575  }
   576  
   577  // checkDir will check whether the component is a directory under parent node.
   578  // If it is a directory, this function will return the pointer to that node.
   579  // If it does not exist, this function will create a new directory and return the pointer to that node.
   580  // If it is a file, this function will return error.
   581  func (s *store) checkDir(parent *node, dirName string) (*node, *etcdErr.Error) {
   582  	node, ok := parent.Children[dirName]
   583  
   584  	if ok {
   585  		if node.IsDir() {
   586  			return node, nil
   587  		}
   588  
   589  		return nil, etcdErr.NewError(etcdErr.EcodeNotDir, node.Path, s.CurrentIndex)
   590  	}
   591  
   592  	n := newDir(s, path.Join(parent.Path, dirName), s.CurrentIndex+1, parent, parent.ACL, Permanent)
   593  
   594  	parent.Children[dirName] = n
   595  
   596  	return n, nil
   597  }
   598  
   599  // Save saves the static state of the store system.
   600  // It will not be able to save the state of watchers.
   601  // It will not save the parent field of the node. Or there will
   602  // be cyclic dependencies issue for the json package.
   603  func (s *store) Save() ([]byte, error) {
   604  	s.worldLock.Lock()
   605  
   606  	clonedStore := newStore()
   607  	clonedStore.CurrentIndex = s.CurrentIndex
   608  	clonedStore.Root = s.Root.Clone()
   609  	clonedStore.WatcherHub = s.WatcherHub.clone()
   610  	clonedStore.Stats = s.Stats.clone()
   611  	clonedStore.CurrentVersion = s.CurrentVersion
   612  
   613  	s.worldLock.Unlock()
   614  
   615  	b, err := json.Marshal(clonedStore)
   616  
   617  	if err != nil {
   618  		return nil, err
   619  	}
   620  
   621  	return b, nil
   622  }
   623  
   624  // Recovery recovers the store system from a static state
   625  // It needs to recover the parent field of the nodes.
   626  // It needs to delete the expired nodes since the saved time and also
   627  // needs to create monitoring go routines.
   628  func (s *store) Recovery(state []byte) error {
   629  	s.worldLock.Lock()
   630  	defer s.worldLock.Unlock()
   631  	err := json.Unmarshal(state, s)
   632  
   633  	if err != nil {
   634  		return err
   635  	}
   636  
   637  	s.ttlKeyHeap = newTtlKeyHeap()
   638  
   639  	s.Root.recoverAndclean()
   640  	return nil
   641  }
   642  
   643  func (s *store) JsonStats() []byte {
   644  	s.Stats.Watchers = uint64(s.WatcherHub.count)
   645  	return s.Stats.toJson()
   646  }
   647  
   648  func (s *store) TotalTransactions() uint64 {
   649  	return s.Stats.TotalTranscations()
   650  }