github.com/portworx/kvdb@v0.0.0-20241107215734-a185a966f535/bolt/kv_bolt.go (about)

     1  package bolt
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"strings"
    11  	"sync"
    12  	"sync/atomic"
    13  	"time"
    14  
    15  	"github.com/boltdb/bolt"
    16  	"github.com/hashicorp/memberlist"
    17  	"github.com/portworx/kvdb"
    18  	"github.com/portworx/kvdb/common"
    19  	"github.com/sirupsen/logrus"
    20  )
    21  
    22  const (
    23  	// Name is the name of this kvdb implementation.
    24  	Name = "bolt-kv"
    25  	// KvSnap is an option passed to designate this kvdb as a snap.
    26  	KvSnap = "KvSnap"
    27  	// KvUseInterface is an option passed that configures the mem to store
    28  	// the values as interfaces instead of bytes. It will not create a
    29  	// copy of the interface that is passed in. USE WITH CAUTION
    30  	KvUseInterface = "KvUseInterface"
    31  	// bootstrapkey is the name of the KV bootstrap key space.
    32  	bootstrapKey = "bootstrap"
    33  	// dbName is the name of the bolt database file.
    34  	dbPath = "px.db"
    35  )
    36  
    37  var (
    38  	// ErrSnap is returned if an operation is not supported on a snap.
    39  	ErrSnap = errors.New("operation not supported on snap")
    40  	// ErrSnapWithInterfaceNotSupported is returned when a snap kv-mem is
    41  	// created with KvUseInterface flag on
    42  	ErrSnapWithInterfaceNotSupported = errors.New("snap kvdb not supported with interfaces")
    43  	// ErrIllegalSelect is returned when an incorrect select function
    44  	// implementation is detected.
    45  	ErrIllegalSelect = errors.New("Illegal Select implementation")
    46  	// pxBucket is the name of the default PX keyspace in the internal KVDB.
    47  	pxBucket = []byte("px")
    48  )
    49  
    50  func init() {
    51  	logrus.Infof("Registering internal KVDB provider")
    52  	if err := kvdb.Register(Name, New, Version); err != nil {
    53  		panic(err.Error())
    54  	}
    55  }
    56  
    57  type boltKV struct {
    58  	common.BaseKvdb
    59  
    60  	// db is the handle to the bolt DB.
    61  	db *bolt.DB
    62  
    63  	// locks is the map of currently held locks
    64  	locks map[string]chan int
    65  
    66  	// machines is a list of peers in the cluster
    67  	machines []string
    68  
    69  	// updates is the list of latest few updates
    70  	dist WatchDistributor
    71  
    72  	// mutex protects m, w, wt
    73  	mutex sync.Mutex
    74  
    75  	// index current kvdb index
    76  	index uint64
    77  
    78  	// domain scoping for this KVDB instance
    79  	domain string
    80  
    81  	kvdb.Controller
    82  }
    83  
    84  // watchUpdate refers to an update to this kvdb
    85  type watchUpdate struct {
    86  	// key is the key that was updated
    87  	key string
    88  	// kvp is the key-value that was updated
    89  	kvp kvdb.KVPair
    90  	// err is any error on update
    91  	err error
    92  }
    93  
    94  // WatchUpdateQueue is a producer consumer queue.
    95  type WatchUpdateQueue interface {
    96  	// Enqueue will enqueue an update. It is non-blocking.
    97  	Enqueue(update *watchUpdate)
    98  	// Dequeue will either return an element from front of the queue or
    99  	// will block until element becomes available
   100  	Dequeue() *watchUpdate
   101  }
   102  
   103  // WatchDistributor distributes updates to the watchers
   104  type WatchDistributor interface {
   105  	// Add creates a new watch queue to send updates
   106  	Add() WatchUpdateQueue
   107  	// Remove removes an existing watch queue
   108  	Remove(WatchUpdateQueue)
   109  	// NewUpdate is invoked to distribute a new update
   110  	NewUpdate(w *watchUpdate)
   111  }
   112  
   113  // distributor implements WatchDistributor interface
   114  type distributor struct {
   115  	sync.Mutex
   116  	// updates is the list of latest few updates
   117  	updates []*watchUpdate
   118  	// watchers watch for updates
   119  	watchers []WatchUpdateQueue
   120  }
   121  
   122  // NewWatchDistributor returns a new instance of
   123  // the WatchDistrubtor interface
   124  func NewWatchDistributor() WatchDistributor {
   125  	return &distributor{}
   126  }
   127  
   128  func (d *distributor) Add() WatchUpdateQueue {
   129  	d.Lock()
   130  	defer d.Unlock()
   131  	q := NewWatchUpdateQueue()
   132  	for _, u := range d.updates {
   133  		q.Enqueue(u)
   134  	}
   135  	d.watchers = append(d.watchers, q)
   136  	return q
   137  }
   138  
   139  func (d *distributor) Remove(r WatchUpdateQueue) {
   140  	d.Lock()
   141  	defer d.Unlock()
   142  	for i, q := range d.watchers {
   143  		if q == r {
   144  			copy(d.watchers[i:], d.watchers[i+1:])
   145  			d.watchers[len(d.watchers)-1] = nil
   146  			d.watchers = d.watchers[:len(d.watchers)-1]
   147  		}
   148  	}
   149  }
   150  
   151  func (d *distributor) NewUpdate(u *watchUpdate) {
   152  	d.Lock()
   153  	defer d.Unlock()
   154  	// collect update
   155  	d.updates = append(d.updates, u)
   156  	if len(d.updates) > 100 {
   157  		d.updates = d.updates[100:]
   158  	}
   159  	// send update to watchers
   160  	for _, q := range d.watchers {
   161  		q.Enqueue(u)
   162  	}
   163  }
   164  
   165  // watchQueue implements WatchUpdateQueue interface for watchUpdates
   166  type watchQueue struct {
   167  	// updates is the list of updates
   168  	updates []*watchUpdate
   169  	// m is the mutex to protect updates
   170  	m *sync.Mutex
   171  	// cv is used to coordinate the producer-consumer threads
   172  	cv *sync.Cond
   173  }
   174  
   175  // NewWatchUpdateQueue returns an instance of WatchUpdateQueue
   176  func NewWatchUpdateQueue() WatchUpdateQueue {
   177  	mtx := &sync.Mutex{}
   178  	return &watchQueue{
   179  		m:       mtx,
   180  		cv:      sync.NewCond(mtx),
   181  		updates: make([]*watchUpdate, 0)}
   182  }
   183  
   184  func (w *watchQueue) Dequeue() *watchUpdate {
   185  	w.m.Lock()
   186  	for {
   187  		if len(w.updates) > 0 {
   188  			update := w.updates[0]
   189  			w.updates = w.updates[1:]
   190  			w.m.Unlock()
   191  			return update
   192  		}
   193  		w.cv.Wait()
   194  	}
   195  }
   196  
   197  // Enqueue enqueues and never blocks
   198  func (w *watchQueue) Enqueue(update *watchUpdate) {
   199  	w.m.Lock()
   200  	w.updates = append(w.updates, update)
   201  	w.cv.Signal()
   202  	w.m.Unlock()
   203  }
   204  
   205  type watchData struct {
   206  	cb        kvdb.WatchCB
   207  	opaque    interface{}
   208  	waitIndex uint64
   209  }
   210  
   211  // New constructs a new kvdb.Kvdb.
   212  func New(
   213  	domain string,
   214  	machines []string,
   215  	options map[string]string,
   216  	fatalErrorCb kvdb.FatalErrorCB,
   217  ) (kvdb.Kvdb, error) {
   218  	if domain != "" && !strings.HasSuffix(domain, "/") {
   219  		domain = domain + "/"
   220  	}
   221  
   222  	logrus.Infof("Initializing a new internal KVDB client with domain %v and pairs %v",
   223  		domain,
   224  		machines,
   225  	)
   226  
   227  	path := dbPath
   228  	if p, ok := options[KvSnap]; ok {
   229  		path = p
   230  		logrus.Infof("Creating a new Bolt KVDB using snapshot path %v", path)
   231  	} else {
   232  		logrus.Infof("Creating a new Bolt KVDB using path %v", path)
   233  	}
   234  
   235  	handle, err := bolt.Open(
   236  		path,
   237  		0777,
   238  		nil,
   239  		// &bolt.Options{Timeout: 1 * time.Second},
   240  	)
   241  	if err != nil {
   242  		logrus.Fatalf("Could not open internal KVDB: %v", err)
   243  		return nil, err
   244  	}
   245  
   246  	tx, err := handle.Begin(true)
   247  	if err != nil {
   248  		logrus.Fatalf("Could not open KVDB transaction: %v", err)
   249  		return nil, err
   250  	}
   251  	defer tx.Rollback()
   252  
   253  	if _, err = tx.CreateBucketIfNotExists(pxBucket); err != nil {
   254  		logrus.Fatalf("Could not create default KVDB bucket: %v", err)
   255  		return nil, err
   256  	}
   257  
   258  	if err = tx.Commit(); err != nil {
   259  		logrus.Fatalf("Could not commit default KVDB bucket: %v", err)
   260  		return nil, err
   261  	}
   262  
   263  	kv := &boltKV{
   264  		BaseKvdb:   common.BaseKvdb{FatalCb: fatalErrorCb},
   265  		db:         handle,
   266  		dist:       NewWatchDistributor(),
   267  		domain:     domain,
   268  		Controller: kvdb.ControllerNotSupported,
   269  		locks:      make(map[string]chan int),
   270  	}
   271  
   272  	return kv, nil
   273  }
   274  
   275  // Version returns the supported version of the mem implementation
   276  func Version(url string, kvdbOptions map[string]string) (string, error) {
   277  	return kvdb.BoltVersion1, nil
   278  }
   279  
   280  func (kv *boltKV) String() string {
   281  	return Name
   282  }
   283  
   284  func (kv *boltKV) Capabilities() int {
   285  	return kvdb.KVCapabilityOrderedUpdates
   286  }
   287  
   288  func (kv *boltKV) get(key string) (*kvdb.KVPair, error) {
   289  	if kv.db == nil {
   290  		return nil, kvdb.ErrNotFound
   291  	}
   292  
   293  	// XXX FIXME some bug above this cases the prefix to be pre-loaded.
   294  	key = strings.TrimPrefix(key, kv.domain)
   295  	key = kv.domain + key
   296  
   297  	tx, err := kv.db.Begin(false)
   298  	if err != nil {
   299  		logrus.Fatalf("Could not open KVDB transaction in GET: %v", err)
   300  		return nil, err
   301  	}
   302  	defer tx.Rollback()
   303  
   304  	bucket := tx.Bucket(pxBucket)
   305  
   306  	if strings.HasPrefix(key, "pwx/test/pwx/test") {
   307  		logrus.Panicf("Double pre")
   308  	}
   309  	val := bucket.Get([]byte(key))
   310  	logrus.Warnf("XXX getting on %v = %v", key, string(val))
   311  	if val == nil {
   312  		return nil, kvdb.ErrNotFound
   313  	}
   314  
   315  	var kvp *kvdb.KVPair
   316  	err = json.Unmarshal(val, &kvp)
   317  	if err != nil {
   318  		logrus.Warnf("Requested key could not be parsed from KVDB: %v, %v (%v)",
   319  			key,
   320  			val,
   321  			err,
   322  		)
   323  		return nil, err
   324  	}
   325  
   326  	kv.normalize(kvp)
   327  
   328  	return kvp, nil
   329  }
   330  
   331  func (kv *boltKV) put(
   332  	key string,
   333  	value interface{},
   334  	ttl uint64,
   335  	action kvdb.KVAction,
   336  ) (*kvdb.KVPair, error) {
   337  	var (
   338  		kvp *kvdb.KVPair
   339  		b   []byte
   340  		err error
   341  	)
   342  
   343  	if kv.db == nil {
   344  		return nil, kvdb.ErrNotFound
   345  	}
   346  
   347  	// XXX FIXME some bug above this cases the prefix to be pre-loaded.
   348  	key = strings.TrimPrefix(key, kv.domain)
   349  	key = kv.domain + key
   350  
   351  	tx, err := kv.db.Begin(true)
   352  	if err != nil {
   353  		return nil, err
   354  	}
   355  	defer tx.Rollback()
   356  
   357  	bucket := tx.Bucket(pxBucket)
   358  	if bucket == nil {
   359  		logrus.Warnf("Requested bucket not found in internal KVDB: %v (%v)",
   360  			pxBucket,
   361  			err,
   362  		)
   363  		return nil, kvdb.ErrNotFound
   364  	}
   365  
   366  	// XXX FIXME is this going to work across restarts?
   367  	index := atomic.AddUint64(&kv.index, 1)
   368  
   369  	b, err = common.ToBytes(value)
   370  	if err != nil {
   371  		return nil, err
   372  	}
   373  
   374  	kvp = &kvdb.KVPair{
   375  		Key:           key,
   376  		Value:         b,
   377  		TTL:           int64(ttl),
   378  		KVDBIndex:     index,
   379  		ModifiedIndex: index,
   380  		CreatedIndex:  index,
   381  		Action:        action,
   382  	}
   383  
   384  	kv.normalize(kvp)
   385  
   386  	enc, err := json.Marshal(kvp)
   387  	if err != nil {
   388  		logrus.Warnf("Requested KVP cannot be marshalled into internal KVDB: %v (%v)",
   389  			kvp,
   390  			err,
   391  		)
   392  		return nil, err
   393  	}
   394  
   395  	logrus.Warnf("XXX putting on %v with %v %v", key, ttl, time.Duration(ttl))
   396  	if err = bucket.Put([]byte(key), enc); err != nil {
   397  		logrus.Warnf("Requested KVP could not be inserted into internal KVDB: %v (%v)",
   398  			kvp,
   399  			err,
   400  		)
   401  		return nil, err
   402  	}
   403  
   404  	if err = tx.Commit(); err != nil {
   405  		logrus.Fatalf("Could not commit put transaction in KVDB bucket for %v (%v): %v",
   406  			key,
   407  			enc,
   408  			err,
   409  		)
   410  		return nil, err
   411  	}
   412  
   413  	kv.dist.NewUpdate(&watchUpdate{kvp.Key, *kvp, nil})
   414  
   415  	// XXX FIXME - need to re-instate the timers after a crash
   416  	if ttl != 0 {
   417  		// time.AfterFunc(time.Second*time.Duration(ttl), func() {
   418  		time.AfterFunc(time.Duration(ttl), func() {
   419  			// TODO: handle error
   420  			kv.mutex.Lock()
   421  			defer kv.mutex.Unlock()
   422  			logrus.Warnf("XXX TRIGGERING auto delete on %v %v", key, ttl)
   423  			if _, err := kv.delete(key); err != nil {
   424  				logrus.Warnf("Error while performing a timed DB delete on key %v: %v", key, err)
   425  			}
   426  		})
   427  	}
   428  
   429  	return kvp, nil
   430  }
   431  
   432  // enumerate returns a list of values and creates a copy if specified
   433  func (kv *boltKV) enumerate(prefix string) (kvdb.KVPairs, error) {
   434  	if kv.db == nil {
   435  		return nil, kvdb.ErrNotFound
   436  	}
   437  
   438  	// XXX FIXME some bug above this cases the prefix to be pre-loaded.
   439  	prefix = strings.TrimPrefix(prefix, kv.domain)
   440  	prefix = kv.domain + prefix
   441  
   442  	var kvps = make(kvdb.KVPairs, 0, 100)
   443  
   444  	kv.db.View(func(tx *bolt.Tx) error {
   445  		bucket := tx.Bucket(pxBucket)
   446  		if bucket == nil {
   447  			logrus.Warnf("Requested bucket not found in internal KVDB: %v",
   448  				pxBucket,
   449  			)
   450  			return kvdb.ErrNotFound
   451  		}
   452  
   453  		c := bucket.Cursor()
   454  		for k, v := c.First(); k != nil; k, v = c.Next() {
   455  			if strings.HasPrefix(string(k), prefix) && !strings.Contains(string(k), "/_") {
   456  				var kvp *kvdb.KVPair
   457  				if err := json.Unmarshal(v, &kvp); err != nil {
   458  					logrus.Warnf("Enumerated prefix could not be parsed from KVDB: %v, %v (%v)",
   459  						prefix,
   460  						v,
   461  						err,
   462  					)
   463  					logrus.Fatalf("Could not enumerate internal KVDB: %v: %v",
   464  						v,
   465  						err,
   466  					)
   467  					return err
   468  				}
   469  				kv.normalize(kvp)
   470  				kvps = append(kvps, kvp)
   471  			}
   472  		}
   473  		return nil
   474  	})
   475  
   476  	return kvps, nil
   477  }
   478  
   479  func (kv *boltKV) delete(key string) (*kvdb.KVPair, error) {
   480  	if kv.db == nil {
   481  		return nil, kvdb.ErrNotFound
   482  	}
   483  
   484  	// XXX FIXME some bug above this cases the prefix to be pre-loaded.
   485  	key = strings.TrimPrefix(key, kv.domain)
   486  	key = kv.domain + key
   487  
   488  	kvp, err := kv.get(key)
   489  	if err != nil {
   490  		return nil, err
   491  	}
   492  	kvp.KVDBIndex = atomic.AddUint64(&kv.index, 1)
   493  	kvp.ModifiedIndex = kvp.KVDBIndex
   494  	kvp.Action = kvdb.KVDelete
   495  
   496  	tx, err := kv.db.Begin(true)
   497  	if err != nil {
   498  		return nil, err
   499  	}
   500  	defer tx.Rollback()
   501  
   502  	bucket := tx.Bucket(pxBucket)
   503  	if bucket == nil {
   504  		logrus.Warnf("Requested bucket for delete not found in internal KVDB: %v (%v)",
   505  			pxBucket,
   506  			err,
   507  		)
   508  		return nil, kvdb.ErrNotFound
   509  	}
   510  
   511  	logrus.Warnf("XXX deleting on %v", key)
   512  	if err = bucket.Delete([]byte(key)); err != nil {
   513  		logrus.Warnf("Requested KVP for delete could not be deleted from internal KVDB: %v (%v)",
   514  			kvp,
   515  			err,
   516  		)
   517  		return nil, err
   518  	}
   519  
   520  	if err = tx.Commit(); err != nil {
   521  		logrus.Fatalf("Could not commit delete transaction in KVDB bucket for %v: %v",
   522  			key,
   523  			err,
   524  		)
   525  		return nil, err
   526  	}
   527  
   528  	kv.dist.NewUpdate(&watchUpdate{kvp.Key, *kvp, nil})
   529  	return kvp, nil
   530  }
   531  
   532  func (kv *boltKV) exists(key string) (*kvdb.KVPair, error) {
   533  	return kv.get(key)
   534  }
   535  
   536  func (kv *boltKV) Get(key string) (*kvdb.KVPair, error) {
   537  	kv.mutex.Lock()
   538  	defer kv.mutex.Unlock()
   539  	v, err := kv.get(key)
   540  	if err != nil {
   541  		return nil, err
   542  	}
   543  	return v, nil
   544  }
   545  
   546  func (kv *boltKV) snapDB() (string, error) {
   547  	snapPath := dbPath + ".snap." + time.Now().String()
   548  
   549  	from, err := os.Open(dbPath)
   550  	if err != nil {
   551  		logrus.Fatalf("Could not open bolt DB: %v", err)
   552  		return "", err
   553  	}
   554  	defer from.Close()
   555  
   556  	to, err := os.OpenFile(snapPath, os.O_RDWR|os.O_CREATE, 0666)
   557  	if err != nil {
   558  		logrus.Fatalf("Could not create bolt DB snap: %v", err)
   559  		return "", err
   560  	}
   561  	defer to.Close()
   562  
   563  	_, err = io.Copy(to, from)
   564  	if err != nil {
   565  		logrus.Fatalf("Could not copy bolt DB snap: %v", err)
   566  		return "", err
   567  	}
   568  
   569  	return snapPath, nil
   570  }
   571  
   572  func (kv *boltKV) Snapshot(prefix []string, consistent bool) (kvdb.Kvdb, uint64, error) {
   573  	kv.mutex.Lock()
   574  	defer kv.mutex.Unlock()
   575  
   576  	_, err := kv.put(bootstrapKey, time.Now().UnixNano(), 0, kvdb.KVCreate)
   577  	if err != nil {
   578  		logrus.Fatalf("Could not create bootstrap key during snapshot: %v", err)
   579  		return nil, 0, err
   580  	}
   581  
   582  	snapPath, err := kv.snapDB()
   583  	if err != nil {
   584  		logrus.Fatalf("Could not create DB snapshot: %v", err)
   585  		return nil, 0, err
   586  	}
   587  
   588  	options := make(map[string]string)
   589  	options[KvSnap] = snapPath
   590  
   591  	snapKV, err := New(
   592  		kv.domain,
   593  		kv.machines,
   594  		options,
   595  		kv.FatalCb,
   596  	)
   597  
   598  	highestKvPair, _ := kv.delete(bootstrapKey)
   599  
   600  	return snapKV, highestKvPair.ModifiedIndex, nil
   601  }
   602  
   603  func (kv *boltKV) Put(
   604  	key string,
   605  	value interface{},
   606  	ttl uint64,
   607  ) (*kvdb.KVPair, error) {
   608  	kv.mutex.Lock()
   609  	defer kv.mutex.Unlock()
   610  	return kv.put(key, value, ttl, kvdb.KVSet)
   611  }
   612  
   613  func (kv *boltKV) GetVal(key string, v interface{}) (*kvdb.KVPair, error) {
   614  	kv.mutex.Lock()
   615  	defer kv.mutex.Unlock()
   616  	kvp, err := kv.get(key)
   617  	if err != nil {
   618  		return nil, err
   619  	}
   620  
   621  	err = json.Unmarshal(kvp.Value, v)
   622  	return kvp, err
   623  }
   624  
   625  func (kv *boltKV) Create(
   626  	key string,
   627  	value interface{},
   628  	ttl uint64,
   629  ) (*kvdb.KVPair, error) {
   630  	kv.mutex.Lock()
   631  	defer kv.mutex.Unlock()
   632  
   633  	result, err := kv.exists(key)
   634  	if err != nil {
   635  		return kv.put(key, value, ttl, kvdb.KVCreate)
   636  	}
   637  	return result, kvdb.ErrExist
   638  }
   639  
   640  // XXX needs to be atomic
   641  func (kv *boltKV) Update(
   642  	key string,
   643  	value interface{},
   644  	ttl uint64,
   645  ) (*kvdb.KVPair, error) {
   646  	kv.mutex.Lock()
   647  	defer kv.mutex.Unlock()
   648  
   649  	if _, err := kv.exists(key); err != nil {
   650  		return nil, kvdb.ErrNotFound
   651  	}
   652  	kvp, err := kv.put(key, value, ttl, kvdb.KVSet)
   653  	if err != nil {
   654  		return nil, err
   655  	}
   656  
   657  	kvp.Action = kvdb.KVSet
   658  	return kvp, nil
   659  }
   660  
   661  func (kv *boltKV) Enumerate(prefix string) (kvdb.KVPairs, error) {
   662  	kv.mutex.Lock()
   663  	defer kv.mutex.Unlock()
   664  	return kv.enumerate(prefix)
   665  }
   666  
   667  func (kv *boltKV) Delete(key string) (*kvdb.KVPair, error) {
   668  	kv.mutex.Lock()
   669  	defer kv.mutex.Unlock()
   670  
   671  	return kv.delete(key)
   672  }
   673  
   674  func (kv *boltKV) DeleteTree(prefix string) error {
   675  	kv.mutex.Lock()
   676  	defer kv.mutex.Unlock()
   677  
   678  	if len(prefix) > 0 && !strings.HasSuffix(prefix, kvdb.DefaultSeparator) {
   679  		prefix += kvdb.DefaultSeparator
   680  	}
   681  
   682  	kvp, err := kv.enumerate(prefix)
   683  	if err != nil {
   684  		return err
   685  	}
   686  	for _, v := range kvp {
   687  		// TODO: multiple errors
   688  		if _, iErr := kv.delete(v.Key); iErr != nil {
   689  			err = iErr
   690  		}
   691  	}
   692  	return err
   693  }
   694  
   695  func (kv *boltKV) Keys(prefix, sep string) ([]string, error) {
   696  	if "" == sep {
   697  		sep = "/"
   698  	}
   699  
   700  	// XXX FIXME some bug above this cases the prefix to be pre-loaded.
   701  	prefix = strings.TrimPrefix(prefix, kv.domain)
   702  	prefix = kv.domain + prefix
   703  
   704  	lenPrefix := len(prefix)
   705  	lenSep := len(sep)
   706  	if prefix[lenPrefix-lenSep:] != sep {
   707  		prefix += sep
   708  		lenPrefix += lenSep
   709  	}
   710  	seen := make(map[string]bool)
   711  	kv.mutex.Lock()
   712  	defer kv.mutex.Unlock()
   713  
   714  	kv.db.View(func(tx *bolt.Tx) error {
   715  		bucket := tx.Bucket(pxBucket)
   716  		if bucket == nil {
   717  			logrus.Warnf("Requested bucket not found in internal KVDB: %v",
   718  				pxBucket,
   719  			)
   720  			return kvdb.ErrNotFound
   721  		}
   722  
   723  		c := bucket.Cursor()
   724  		for k, _ := c.First(); k != nil; k, _ = c.Next() {
   725  			if strings.HasPrefix(string(k), prefix) && !strings.Contains(string(k), "/_") {
   726  				key := k[lenPrefix:]
   727  				if idx := strings.Index(string(key), sep); idx > 0 {
   728  					key = key[:idx]
   729  				}
   730  				seen[string(key)] = true
   731  			}
   732  		}
   733  		return nil
   734  	})
   735  
   736  	retList := make([]string, len(seen))
   737  	i := 0
   738  	for k := range seen {
   739  		retList[i] = strings.TrimPrefix(k, kv.domain)
   740  		i++
   741  	}
   742  
   743  	return retList, nil
   744  }
   745  
   746  func (kv *boltKV) CompareAndSet(
   747  	kvp *kvdb.KVPair,
   748  	flags kvdb.KVFlags,
   749  	prevValue []byte,
   750  ) (*kvdb.KVPair, error) {
   751  	kv.mutex.Lock()
   752  	defer kv.mutex.Unlock()
   753  
   754  	logrus.Infof("XXX CompareAndSet %v", kvp)
   755  
   756  	result, err := kv.exists(kvp.Key)
   757  	if err != nil {
   758  		return nil, err
   759  	}
   760  	if prevValue != nil {
   761  		if !bytes.Equal(result.Value, prevValue) {
   762  			return nil, kvdb.ErrValueMismatch
   763  		}
   764  	}
   765  	if flags == kvdb.KVModifiedIndex {
   766  		if kvp.ModifiedIndex != result.ModifiedIndex {
   767  			return nil, kvdb.ErrValueMismatch
   768  		}
   769  	}
   770  	return kv.put(kvp.Key, kvp.Value, 0, kvdb.KVSet)
   771  }
   772  
   773  func (kv *boltKV) CompareAndDelete(
   774  	kvp *kvdb.KVPair,
   775  	flags kvdb.KVFlags,
   776  ) (*kvdb.KVPair, error) {
   777  	logrus.Infof("XXX CompareAndDelete %v", kvp)
   778  
   779  	kv.mutex.Lock()
   780  	defer kv.mutex.Unlock()
   781  
   782  	// XXX FIXME this needs to be atomic cluster wide
   783  
   784  	logrus.Warnf("XXX Checking %v", kvp.Key)
   785  	result, err := kv.exists(kvp.Key)
   786  	if err != nil {
   787  		return nil, err
   788  	}
   789  
   790  	if !bytes.Equal(result.Value, kvp.Value) {
   791  		logrus.Warnf("CompareAndDelete failed because of value mismatch %v != %v",
   792  			result.Value, kvp.Value)
   793  		return nil, kvdb.ErrNotFound
   794  	}
   795  	if kvp.ModifiedIndex != result.ModifiedIndex {
   796  		logrus.Warnf("CompareAndDelete failed because of modified index mismatch %v != %v",
   797  			result.ModifiedIndex, kvp.ModifiedIndex)
   798  		return nil, kvdb.ErrNotFound
   799  	}
   800  	return kv.delete(kvp.Key)
   801  }
   802  
   803  func (kv *boltKV) WatchKey(
   804  	key string,
   805  	waitIndex uint64,
   806  	opaque interface{},
   807  	cb kvdb.WatchCB,
   808  ) error {
   809  	kv.mutex.Lock()
   810  	defer kv.mutex.Unlock()
   811  
   812  	go kv.watchCb(
   813  		kv.dist.Add(),
   814  		key,
   815  		&watchData{
   816  			cb:        cb,
   817  			waitIndex: waitIndex,
   818  			opaque:    opaque,
   819  		},
   820  		false,
   821  	)
   822  
   823  	return nil
   824  }
   825  
   826  func (kv *boltKV) WatchTree(
   827  	prefix string,
   828  	waitIndex uint64,
   829  	opaque interface{},
   830  	cb kvdb.WatchCB,
   831  ) error {
   832  	kv.mutex.Lock()
   833  	defer kv.mutex.Unlock()
   834  
   835  	// XXX FIXME - some top level code has a bug and sends the prefix preloaded
   836  	prefix = strings.TrimPrefix(prefix, kv.domain)
   837  
   838  	go kv.watchCb(
   839  		kv.dist.Add(),
   840  		prefix,
   841  		&watchData{
   842  			cb:        cb,
   843  			waitIndex: waitIndex,
   844  			opaque:    opaque,
   845  		},
   846  		true,
   847  	)
   848  
   849  	return nil
   850  }
   851  
   852  func (kv *boltKV) Compact(index uint64) error {
   853  	return kvdb.ErrNotSupported
   854  }
   855  
   856  func (kv *boltKV) Lock(key string) (*kvdb.KVPair, error) {
   857  	return kv.LockWithID(key, "locked")
   858  }
   859  
   860  func (kv *boltKV) LockWithID(
   861  	key string,
   862  	lockerID string,
   863  ) (*kvdb.KVPair, error) {
   864  	return kv.LockWithTimeout(key, lockerID, kvdb.DefaultLockTryDuration, kv.GetLockHoldDuration())
   865  }
   866  
   867  func (kv *boltKV) LockWithTimeout(
   868  	key string,
   869  	lockerID string,
   870  	lockTryDuration time.Duration,
   871  	lockHoldDuration time.Duration,
   872  ) (*kvdb.KVPair, error) {
   873  	logrus.Infof("XXX Lock %v %v %v", key, lockTryDuration, lockHoldDuration)
   874  
   875  	duration := time.Second
   876  
   877  	// XXX FIXME - if we crash, we need to cleanup this lock.
   878  	result, err := kv.Create(key, lockerID, uint64(lockHoldDuration))
   879  	startTime := time.Now()
   880  	for count := 0; err != nil; count++ {
   881  		time.Sleep(duration)
   882  		result, err = kv.Create(key, lockerID, uint64(lockHoldDuration))
   883  		if err != nil && count > 0 && count%15 == 0 {
   884  			var currLockerID string
   885  			if _, errGet := kv.GetVal(key, currLockerID); errGet == nil {
   886  				logrus.Infof("Lock %v locked for %v seconds, tag: %v",
   887  					key, count, currLockerID)
   888  			}
   889  		}
   890  
   891  		if err != nil && time.Since(startTime) > lockTryDuration {
   892  			logrus.Warnf("Timeout waiting for lock on %v: count=%v err=%v", key, count, err)
   893  			return nil, err
   894  		}
   895  	}
   896  
   897  	lockChan := make(chan int)
   898  	kv.mutex.Lock()
   899  	logrus.Warnf("XXX Locked %v", key)
   900  	kv.locks[key] = lockChan
   901  	kv.mutex.Unlock()
   902  	if lockHoldDuration > 0 {
   903  		go func() {
   904  			timeout := time.After(lockHoldDuration)
   905  			for {
   906  				select {
   907  				case <-timeout:
   908  					logrus.Warnf("XXX LOCK timeout on %v after %v", key, lockHoldDuration)
   909  					kv.LockTimedout(key, lockHoldDuration)
   910  				case <-lockChan:
   911  					logrus.Warnf("XXX LOCK chan wakeup on %v", key)
   912  					return
   913  				}
   914  			}
   915  		}()
   916  	}
   917  
   918  	return result, err
   919  }
   920  
   921  func (kv *boltKV) Unlock(kvp *kvdb.KVPair) error {
   922  	logrus.Warnf("XXX Unlocking %v", kvp)
   923  	if kvp == nil {
   924  		logrus.Panicf("Unlock on a nil kvp")
   925  	}
   926  	kv.mutex.Lock()
   927  	lockChan, ok := kv.locks[kvp.Key]
   928  	logrus.Warnf("XXX Unlock chan %v on %v", lockChan, kvp.Key)
   929  	if ok {
   930  		delete(kv.locks, kvp.Key)
   931  	}
   932  	kv.mutex.Unlock()
   933  	if lockChan != nil {
   934  		logrus.Warnf("XXX Waking up chan on %v", kvp.Key)
   935  		close(lockChan)
   936  	}
   937  
   938  	_, err := kv.CompareAndDelete(kvp, kvdb.KVFlags(0))
   939  
   940  	return err
   941  }
   942  
   943  func (kv *boltKV) EnumerateWithSelect(
   944  	prefix string,
   945  	enumerateSelect kvdb.EnumerateSelect,
   946  	copySelect kvdb.CopySelect,
   947  ) ([]interface{}, error) {
   948  	return nil, kvdb.ErrNotSupported
   949  }
   950  
   951  func (kv *boltKV) IsKeyLocked(key string) (bool, string, error) {
   952  	return false, "", kvdb.ErrNotSupported
   953  }
   954  
   955  func (kv *boltKV) EnumerateKVPWithSelect(
   956  	prefix string,
   957  	enumerateSelect kvdb.EnumerateKVPSelect,
   958  	copySelect kvdb.CopyKVPSelect,
   959  ) (kvdb.KVPairs, error) {
   960  	return nil, kvdb.ErrNotSupported
   961  }
   962  
   963  func (kv *boltKV) GetWithCopy(
   964  	key string,
   965  	copySelect kvdb.CopySelect,
   966  ) (interface{}, error) {
   967  	return nil, kvdb.ErrNotSupported
   968  }
   969  
   970  func (kv *boltKV) TxNew() (kvdb.Tx, error) {
   971  	return nil, kvdb.ErrNotSupported
   972  }
   973  
   974  func (kv *boltKV) normalize(kvp *kvdb.KVPair) {
   975  	kvp.Key = strings.TrimPrefix(kvp.Key, kv.domain)
   976  }
   977  
   978  func copyWatchKeys(w map[string]*watchData) []string {
   979  	keys := make([]string, len(w))
   980  	i := 0
   981  	for key := range w {
   982  		keys[i] = key
   983  		i++
   984  	}
   985  	return keys
   986  }
   987  
   988  func (kv *boltKV) watchCb(
   989  	q WatchUpdateQueue,
   990  	prefix string,
   991  	v *watchData,
   992  	treeWatch bool,
   993  ) {
   994  	for {
   995  		logrus.Warnf("XXX watchCb on %v", prefix)
   996  		update := q.Dequeue()
   997  		logrus.Warnf("XXX watchCb compare on %v %v %v %v %v",
   998  			treeWatch, update.key, prefix, v.waitIndex, update.kvp.ModifiedIndex)
   999  		if ((treeWatch && strings.HasPrefix(update.key, prefix)) ||
  1000  			(!treeWatch && update.key == prefix)) &&
  1001  			(v.waitIndex == 0 || v.waitIndex < update.kvp.ModifiedIndex) {
  1002  			logrus.Warnf("XXX watchCb FIRED on %v", prefix)
  1003  			err := v.cb(update.key, v.opaque, &update.kvp, update.err)
  1004  			if err != nil {
  1005  				_ = v.cb("", v.opaque, nil, kvdb.ErrWatchStopped)
  1006  				kv.dist.Remove(q)
  1007  				return
  1008  			}
  1009  		}
  1010  	}
  1011  }
  1012  
  1013  func (kv *boltKV) SnapPut(snapKvp *kvdb.KVPair) (*kvdb.KVPair, error) {
  1014  	return nil, kvdb.ErrNotSupported
  1015  }
  1016  
  1017  func (kv *boltKV) AddUser(username string, password string) error {
  1018  	return kvdb.ErrNotSupported
  1019  }
  1020  
  1021  func (kv *boltKV) RemoveUser(username string) error {
  1022  	return kvdb.ErrNotSupported
  1023  }
  1024  
  1025  func (kv *boltKV) GrantUserAccess(
  1026  	username string,
  1027  	permType kvdb.PermissionType,
  1028  	subtree string,
  1029  ) error {
  1030  	return kvdb.ErrNotSupported
  1031  }
  1032  
  1033  func (kv *boltKV) RevokeUsersAccess(
  1034  	username string,
  1035  	permType kvdb.PermissionType,
  1036  	subtree string,
  1037  ) error {
  1038  	return kvdb.ErrNotSupported
  1039  }
  1040  
  1041  func (kv *boltKV) Serialize() ([]byte, error) {
  1042  
  1043  	kvps, err := kv.Enumerate("")
  1044  	if err != nil {
  1045  		return nil, err
  1046  	}
  1047  	return kv.SerializeAll(kvps)
  1048  }
  1049  
  1050  func (kv *boltKV) Deserialize(b []byte) (kvdb.KVPairs, error) {
  1051  	return kv.DeserializeAll(b)
  1052  }
  1053  
  1054  // MemberList based Bolt implementation
  1055  var (
  1056  	mtx        sync.RWMutex
  1057  	items      = map[string]string{}
  1058  	broadcasts *memberlist.TransmitLimitedQueue
  1059  )
  1060  
  1061  type broadcast struct {
  1062  	msg    []byte
  1063  	notify chan<- struct{}
  1064  }
  1065  
  1066  type delegate struct{}
  1067  
  1068  type update struct {
  1069  	Action string // add, del
  1070  	Data   map[string]string
  1071  }
  1072  
  1073  func (b *broadcast) Invalidates(other memberlist.Broadcast) bool {
  1074  	return false
  1075  }
  1076  
  1077  func (b *broadcast) Message() []byte {
  1078  	return b.msg
  1079  }
  1080  
  1081  func (b *broadcast) Finished() {
  1082  	if b.notify != nil {
  1083  		close(b.notify)
  1084  	}
  1085  }
  1086  
  1087  func (d *delegate) NodeMeta(limit int) []byte {
  1088  	return []byte{}
  1089  }
  1090  
  1091  func (d *delegate) NotifyMsg(b []byte) {
  1092  	if len(b) == 0 {
  1093  		return
  1094  	}
  1095  
  1096  	switch b[0] {
  1097  	case 'd': // data
  1098  		var updates []*update
  1099  		if err := json.Unmarshal(b[1:], &updates); err != nil {
  1100  			return
  1101  		}
  1102  		mtx.Lock()
  1103  		for _, u := range updates {
  1104  			for k, v := range u.Data {
  1105  				switch u.Action {
  1106  				case "add":
  1107  					items[k] = v
  1108  				case "del":
  1109  					delete(items, k)
  1110  				}
  1111  			}
  1112  		}
  1113  		mtx.Unlock()
  1114  	}
  1115  }
  1116  
  1117  func (d *delegate) GetBroadcasts(overhead, limit int) [][]byte {
  1118  	return broadcasts.GetBroadcasts(overhead, limit)
  1119  }
  1120  
  1121  func (d *delegate) LocalState(join bool) []byte {
  1122  	mtx.RLock()
  1123  	m := items
  1124  	mtx.RUnlock()
  1125  	b, _ := json.Marshal(m)
  1126  	return b
  1127  }
  1128  
  1129  func (d *delegate) MergeRemoteState(buf []byte, join bool) {
  1130  	if len(buf) == 0 {
  1131  		return
  1132  	}
  1133  	if !join {
  1134  		return
  1135  	}
  1136  	var m map[string]string
  1137  	if err := json.Unmarshal(buf, &m); err != nil {
  1138  		return
  1139  	}
  1140  	mtx.Lock()
  1141  	for k, v := range m {
  1142  		items[k] = v
  1143  	}
  1144  	mtx.Unlock()
  1145  }
  1146  
  1147  func mlPut(key string, val string) error {
  1148  	mtx.Lock()
  1149  	defer mtx.Unlock()
  1150  
  1151  	b, err := json.Marshal([]*update{
  1152  		{
  1153  			Action: "add",
  1154  			Data: map[string]string{
  1155  				key: val,
  1156  			},
  1157  		},
  1158  	})
  1159  
  1160  	if err != nil {
  1161  		return err
  1162  	}
  1163  
  1164  	items[key] = val
  1165  
  1166  	broadcasts.QueueBroadcast(&broadcast{
  1167  		msg:    append([]byte("d"), b...),
  1168  		notify: nil,
  1169  	})
  1170  
  1171  	return nil
  1172  }
  1173  
  1174  func mlDel(key string) error {
  1175  	mtx.Lock()
  1176  	defer mtx.Unlock()
  1177  
  1178  	b, err := json.Marshal([]*update{
  1179  		{
  1180  			Action: "del",
  1181  			Data: map[string]string{
  1182  				key: "",
  1183  			},
  1184  		},
  1185  	})
  1186  
  1187  	if err != nil {
  1188  		return err
  1189  	}
  1190  
  1191  	delete(items, key)
  1192  
  1193  	broadcasts.QueueBroadcast(&broadcast{
  1194  		msg:    append([]byte("d"), b...),
  1195  		notify: nil,
  1196  	})
  1197  
  1198  	return nil
  1199  }
  1200  
  1201  func mlGet(key string) (error, []byte) {
  1202  	mtx.Lock()
  1203  	defer mtx.Unlock()
  1204  
  1205  	val := items[key]
  1206  	return nil, []byte(val)
  1207  }
  1208  
  1209  func mlStart() error {
  1210  	hostname, _ := os.Hostname()
  1211  	c := memberlist.DefaultLocalConfig()
  1212  	c.Delegate = &delegate{}
  1213  	c.BindPort = 0
  1214  	c.Name = hostname + "-" + "UUIDXXX"
  1215  	m, err := memberlist.Create(c)
  1216  	if err != nil {
  1217  		return err
  1218  	}
  1219  
  1220  	// XXX TODO
  1221  	members := []string{"127.0.0.1"}
  1222  
  1223  	if len(members) > 0 {
  1224  		if members, err := m.Join(members); err != nil {
  1225  			return err
  1226  		} else {
  1227  			logrus.Infof("Internal KVDB joining members: %v", members)
  1228  		}
  1229  	}
  1230  
  1231  	broadcasts = &memberlist.TransmitLimitedQueue{
  1232  		NumNodes: func() int {
  1233  			return m.NumMembers()
  1234  		},
  1235  		RetransmitMult: 3,
  1236  	}
  1237  	node := m.LocalNode()
  1238  	fmt.Printf("Local member %s:%d\n", node.Addr, node.Port)
  1239  	return nil
  1240  }