github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/core/raftlease/store.go (about)

     1  // Copyright 2018 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package raftlease
     5  
     6  import (
     7  	"sync"
     8  	"sync/atomic"
     9  	"time"
    10  
    11  	"github.com/juju/clock"
    12  	"github.com/juju/errors"
    13  	"github.com/juju/loggo"
    14  	"github.com/juju/pubsub"
    15  	"github.com/prometheus/client_golang/prometheus"
    16  
    17  	"github.com/juju/juju/core/globalclock"
    18  	"github.com/juju/juju/core/lease"
    19  )
    20  
    21  var logger = loggo.GetLogger("juju.core.raftlease")
    22  
    23  // NotifyTarget defines methods needed to keep an external database
    24  // updated with who holds leases. (In non-test code the notify target
    25  // will generally be the state DB.)
    26  type NotifyTarget interface {
    27  	// Claimed will be called when a new lease has been claimed. Not
    28  	// allowed to return an error because this is purely advisory -
    29  	// the lease claim has still occurred, whether or not the callback
    30  	// succeeds.
    31  	Claimed(lease.Key, string)
    32  
    33  	// Expired will be called when an existing lease has expired. Not
    34  	// allowed to return an error because this is purely advisory.
    35  	Expired(lease.Key)
    36  }
    37  
    38  // TrapdoorFunc returns a trapdoor to be attached to lease details for
    39  // use by clients. This is intended to hold assertions that can be
    40  // added to state transactions to ensure the lease is still held when
    41  // the transaction is applied.
    42  type TrapdoorFunc func(lease.Key, string) lease.Trapdoor
    43  
    44  // ReadonlyFSM defines the methods of the lease FSM the store can use
    45  // - any writes must go through the hub.
    46  type ReadonlyFSM interface {
    47  	// Leases receives a func for retrieving time, because it needs to be
    48  	// determined after potential lock-waiting to be accurate.
    49  	Leases(func() time.Time, ...lease.Key) map[lease.Key]lease.Info
    50  	GlobalTime() time.Time
    51  	Pinned() map[lease.Key][]string
    52  }
    53  
    54  // StoreConfig holds resources and settings needed to run the Store.
    55  type StoreConfig struct {
    56  	FSM           ReadonlyFSM
    57  	Hub           *pubsub.StructuredHub
    58  	Trapdoor      TrapdoorFunc
    59  	RequestTopic  string
    60  	ResponseTopic func(requestID uint64) string
    61  
    62  	Clock          clock.Clock
    63  	ForwardTimeout time.Duration
    64  }
    65  
    66  // NewStore returns a core/lease.Store that manages leases in Raft.
    67  func NewStore(config StoreConfig) *Store {
    68  	return &Store{
    69  		fsm:      config.FSM,
    70  		hub:      config.Hub,
    71  		config:   config,
    72  		prevTime: config.FSM.GlobalTime(),
    73  		metrics:  newMetricsCollector(),
    74  	}
    75  }
    76  
    77  // Store manages a raft FSM and forwards writes through a pubsub hub.
    78  type Store struct {
    79  	fsm       ReadonlyFSM
    80  	hub       *pubsub.StructuredHub
    81  	requestID uint64
    82  	config    StoreConfig
    83  	metrics   *metricsCollector
    84  
    85  	prevTimeMu sync.Mutex
    86  	prevTime   time.Time
    87  }
    88  
    89  // Autoexpire is part of lease.Store.
    90  func (*Store) Autoexpire() bool { return true }
    91  
    92  // ClaimLease is part of lease.Store.
    93  func (s *Store) ClaimLease(key lease.Key, req lease.Request) error {
    94  	err := s.runOnLeader(&Command{
    95  		Version:   CommandVersion,
    96  		Operation: OperationClaim,
    97  		Namespace: key.Namespace,
    98  		ModelUUID: key.ModelUUID,
    99  		Lease:     key.Lease,
   100  		Holder:    req.Holder,
   101  		Duration:  req.Duration,
   102  	})
   103  	return errors.Trace(err)
   104  }
   105  
   106  // ExtendLease is part of lease.Store.
   107  func (s *Store) ExtendLease(key lease.Key, req lease.Request) error {
   108  	return errors.Trace(s.runOnLeader(&Command{
   109  		Version:   CommandVersion,
   110  		Operation: OperationExtend,
   111  		Namespace: key.Namespace,
   112  		ModelUUID: key.ModelUUID,
   113  		Lease:     key.Lease,
   114  		Holder:    req.Holder,
   115  		Duration:  req.Duration,
   116  	}))
   117  }
   118  
   119  // ExpireLease is part of lease.Store.
   120  func (s *Store) ExpireLease(key lease.Key) error {
   121  	// It's always an invalid operation - expiration happens
   122  	// automatically when time is advanced.
   123  	return lease.ErrInvalid
   124  }
   125  
   126  // Leases is part of lease.Store.
   127  func (s *Store) Leases(keys ...lease.Key) map[lease.Key]lease.Info {
   128  	leaseMap := s.fsm.Leases(s.config.Clock.Now, keys...)
   129  	// Add trapdoors into the information from the FSM.
   130  	for k, v := range leaseMap {
   131  		v.Trapdoor = s.config.Trapdoor(k, v.Holder)
   132  		leaseMap[k] = v
   133  	}
   134  	return leaseMap
   135  }
   136  
   137  // Refresh is part of lease.Store.
   138  func (s *Store) Refresh() error {
   139  	return nil
   140  }
   141  
   142  // PinLease is part of lease.Store.
   143  func (s *Store) PinLease(key lease.Key, entity string) error {
   144  	return errors.Trace(s.pinOp(OperationPin, key, entity))
   145  }
   146  
   147  // UnpinLease is part of lease.Store.
   148  func (s *Store) UnpinLease(key lease.Key, entity string) error {
   149  	return errors.Trace(s.pinOp(OperationUnpin, key, entity))
   150  }
   151  
   152  // Pinned is part of the Store interface.
   153  func (s *Store) Pinned() map[lease.Key][]string {
   154  	return s.fsm.Pinned()
   155  }
   156  
   157  func (s *Store) pinOp(operation string, key lease.Key, entity string) error {
   158  	return errors.Trace(s.runOnLeader(&Command{
   159  		Version:   CommandVersion,
   160  		Operation: operation,
   161  		Namespace: key.Namespace,
   162  		ModelUUID: key.ModelUUID,
   163  		Lease:     key.Lease,
   164  		PinEntity: entity,
   165  	}))
   166  }
   167  
   168  // Advance is part of globalclock.Updater.
   169  func (s *Store) Advance(duration time.Duration) error {
   170  	s.prevTimeMu.Lock()
   171  	defer s.prevTimeMu.Unlock()
   172  	newTime := s.prevTime.Add(duration)
   173  	err := s.runOnLeader(&Command{
   174  		Version:   CommandVersion,
   175  		Operation: OperationSetTime,
   176  		OldTime:   s.prevTime,
   177  		NewTime:   newTime,
   178  	})
   179  	if globalclock.IsConcurrentUpdate(err) {
   180  		// Someone else updated before us - get the new time.
   181  		s.prevTime = s.fsm.GlobalTime()
   182  	} else if lease.IsTimeout(err) {
   183  		// Convert this to a globalclock timeout to match the Updater
   184  		// interface.
   185  		err = globalclock.ErrTimeout
   186  	} else if err == nil {
   187  		s.prevTime = newTime
   188  	}
   189  	return errors.Trace(err)
   190  }
   191  
   192  func (s *Store) runOnLeader(command *Command) error {
   193  	bytes, err := command.Marshal()
   194  	if err != nil {
   195  		return errors.Trace(err)
   196  	}
   197  	requestID := atomic.AddUint64(&s.requestID, 1)
   198  	responseTopic := s.config.ResponseTopic(requestID)
   199  
   200  	responseChan := make(chan ForwardResponse, 1)
   201  	errChan := make(chan error)
   202  	unsubscribe, err := s.hub.Subscribe(
   203  		responseTopic,
   204  		func(_ string, resp ForwardResponse, err error) {
   205  			if err != nil {
   206  				errChan <- err
   207  				return
   208  			}
   209  			responseChan <- resp
   210  		},
   211  	)
   212  	if err != nil {
   213  		return errors.Trace(err)
   214  	}
   215  	defer unsubscribe()
   216  
   217  	start := time.Now()
   218  	defer func() {
   219  		elapsed := time.Now().Sub(start)
   220  		logger.Tracef("runOnLeader elapsed from publish: %v", elapsed.Round(time.Millisecond))
   221  	}()
   222  	_, err = s.hub.Publish(s.config.RequestTopic, ForwardRequest{
   223  		Command:       string(bytes),
   224  		ResponseTopic: responseTopic,
   225  	})
   226  	if err != nil {
   227  		s.record(command.Operation, "error", start)
   228  		return errors.Trace(err)
   229  	}
   230  
   231  	select {
   232  	case <-s.config.Clock.After(s.config.ForwardTimeout):
   233  		logger.Infof("timeout")
   234  		s.record(command.Operation, "timeout", start)
   235  		return lease.ErrTimeout
   236  	case err := <-errChan:
   237  		logger.Errorf("%v", err)
   238  		s.record(command.Operation, "error", start)
   239  		return errors.Trace(err)
   240  	case response := <-responseChan:
   241  		err := RecoverError(response.Error)
   242  		logger.Tracef("got response, err %v", err)
   243  		result := "failure"
   244  		if err == nil {
   245  			result = "success"
   246  		}
   247  		s.record(command.Operation, result, start)
   248  		return err
   249  	}
   250  }
   251  
   252  func (s *Store) record(operation, result string, start time.Time) {
   253  	elapsedMS := float64(time.Now().Sub(start)) / float64(time.Millisecond)
   254  	s.metrics.requests.With(prometheus.Labels{
   255  		"operation": operation,
   256  		"result":    result,
   257  	}).Observe(elapsedMS)
   258  }
   259  
   260  // ForwardRequest is a message sent over the hub to the raft forwarder
   261  // (only running on the raft leader node).
   262  type ForwardRequest struct {
   263  	Command       string `yaml:"command"`
   264  	ResponseTopic string `yaml:"response-topic"`
   265  }
   266  
   267  // ForwardResponse is the response sent back from the raft forwarder.
   268  type ForwardResponse struct {
   269  	Error *ResponseError `yaml:"error"`
   270  }
   271  
   272  // ResponseError is used for sending error values back to the lease
   273  // store via the hub.
   274  type ResponseError struct {
   275  	Message string `yaml:"message"`
   276  	Code    string `yaml:"code"`
   277  }
   278  
   279  // AsResponseError returns a *ResponseError that can be sent back over
   280  // the hub in response to a forwarded FSM command.
   281  func AsResponseError(err error) *ResponseError {
   282  	if err == nil {
   283  		return nil
   284  	}
   285  	message := err.Error()
   286  	var code string
   287  	switch errors.Cause(err) {
   288  	case lease.ErrInvalid:
   289  		code = "invalid"
   290  	case globalclock.ErrConcurrentUpdate:
   291  		code = "concurrent-update"
   292  	default:
   293  		code = "error"
   294  	}
   295  	return &ResponseError{
   296  		Message: message,
   297  		Code:    code,
   298  	}
   299  }
   300  
   301  // RecoverError converts a ResponseError back into the specific error
   302  // it represents, or into a generic error if it wasn't one of the
   303  // singleton errors handled.
   304  func RecoverError(resp *ResponseError) error {
   305  	if resp == nil {
   306  		return nil
   307  	}
   308  	switch resp.Code {
   309  	case "invalid":
   310  		return lease.ErrInvalid
   311  	case "concurrent-update":
   312  		return globalclock.ErrConcurrentUpdate
   313  	default:
   314  		return errors.New(resp.Message)
   315  	}
   316  }
   317  
   318  // Describe is part of prometheus.Collector.
   319  func (s *Store) Describe(ch chan<- *prometheus.Desc) {
   320  	s.metrics.Describe(ch)
   321  }
   322  
   323  // Collect is part of prometheus.Collector.
   324  func (s *Store) Collect(ch chan<- prometheus.Metric) {
   325  	s.metrics.Collect(ch)
   326  }