github.com/paybyphone/terraform@v0.9.5-0.20170613192930-9706042ddd51/backend/remote-state/consul/client.go (about)

     1  package consul
     2  
     3  import (
     4  	"bytes"
     5  	"compress/gzip"
     6  	"crypto/md5"
     7  	"encoding/json"
     8  	"errors"
     9  	"fmt"
    10  	"log"
    11  	"sync"
    12  	"time"
    13  
    14  	consulapi "github.com/hashicorp/consul/api"
    15  	multierror "github.com/hashicorp/go-multierror"
    16  	"github.com/hashicorp/terraform/state"
    17  	"github.com/hashicorp/terraform/state/remote"
    18  )
    19  
    20  const (
    21  	lockSuffix     = "/.lock"
    22  	lockInfoSuffix = "/.lockinfo"
    23  )
    24  
    25  // RemoteClient is a remote client that stores data in Consul.
    26  type RemoteClient struct {
    27  	Client *consulapi.Client
    28  	Path   string
    29  	GZip   bool
    30  
    31  	mu sync.Mutex
    32  	// lockState is true if we're using locks
    33  	lockState bool
    34  
    35  	// The index of the last state we wrote.
    36  	// If this is > 0, Put will perform a CAS to ensure that the state wasn't
    37  	// changed during the operation. This is important even with locks, because
    38  	// if the client loses the lock for some reason, then reacquires it, we
    39  	// need to make sure that the state was not modified.
    40  	modifyIndex uint64
    41  
    42  	consulLock *consulapi.Lock
    43  	lockCh     <-chan struct{}
    44  
    45  	info *state.LockInfo
    46  
    47  	// cancel the goroutine which is monitoring the lock.
    48  	monitorCancel chan struct{}
    49  	monitorDone   chan struct{}
    50  }
    51  
    52  func (c *RemoteClient) Get() (*remote.Payload, error) {
    53  	c.mu.Lock()
    54  	defer c.mu.Unlock()
    55  
    56  	pair, _, err := c.Client.KV().Get(c.Path, nil)
    57  	if err != nil {
    58  		return nil, err
    59  	}
    60  	if pair == nil {
    61  		return nil, nil
    62  	}
    63  
    64  	c.modifyIndex = pair.ModifyIndex
    65  
    66  	payload := pair.Value
    67  	// If the payload starts with 0x1f, it's gzip, not json
    68  	if len(pair.Value) >= 1 && pair.Value[0] == '\x1f' {
    69  		if data, err := uncompressState(pair.Value); err == nil {
    70  			payload = data
    71  		} else {
    72  			return nil, err
    73  		}
    74  	}
    75  
    76  	md5 := md5.Sum(pair.Value)
    77  	return &remote.Payload{
    78  		Data: payload,
    79  		MD5:  md5[:],
    80  	}, nil
    81  }
    82  
    83  func (c *RemoteClient) Put(data []byte) error {
    84  	c.mu.Lock()
    85  	defer c.mu.Unlock()
    86  
    87  	payload := data
    88  	if c.GZip {
    89  		if compressedState, err := compressState(data); err == nil {
    90  			payload = compressedState
    91  		} else {
    92  			return err
    93  		}
    94  	}
    95  
    96  	kv := c.Client.KV()
    97  
    98  	// default to doing a CAS
    99  	verb := consulapi.KVCAS
   100  
   101  	// Assume a 0 index doesn't need a CAS for now, since we are either
   102  	// creating a new state or purposely overwriting one.
   103  	if c.modifyIndex == 0 {
   104  		verb = consulapi.KVSet
   105  	}
   106  
   107  	// KV.Put doesn't return the new index, so we use a single operation
   108  	// transaction to get the new index with a single request.
   109  	txOps := consulapi.KVTxnOps{
   110  		&consulapi.KVTxnOp{
   111  			Verb:  verb,
   112  			Key:   c.Path,
   113  			Value: payload,
   114  			Index: c.modifyIndex,
   115  		},
   116  	}
   117  
   118  	ok, resp, _, err := kv.Txn(txOps, nil)
   119  	if err != nil {
   120  		return err
   121  	}
   122  
   123  	// transaction was rolled back
   124  	if !ok {
   125  		return fmt.Errorf("consul CAS failed with transaction errors: %v", resp.Errors)
   126  	}
   127  
   128  	if len(resp.Results) != 1 {
   129  		// this probably shouldn't happen
   130  		return fmt.Errorf("expected on 1 response value, got: %d", len(resp.Results))
   131  	}
   132  
   133  	c.modifyIndex = resp.Results[0].ModifyIndex
   134  	return nil
   135  }
   136  
   137  func (c *RemoteClient) Delete() error {
   138  	c.mu.Lock()
   139  	defer c.mu.Unlock()
   140  
   141  	kv := c.Client.KV()
   142  	_, err := kv.Delete(c.Path, nil)
   143  	return err
   144  }
   145  
   146  func (c *RemoteClient) putLockInfo(info *state.LockInfo) error {
   147  	info.Path = c.Path
   148  	info.Created = time.Now().UTC()
   149  
   150  	kv := c.Client.KV()
   151  	_, err := kv.Put(&consulapi.KVPair{
   152  		Key:   c.Path + lockInfoSuffix,
   153  		Value: info.Marshal(),
   154  	}, nil)
   155  
   156  	return err
   157  }
   158  
   159  func (c *RemoteClient) getLockInfo() (*state.LockInfo, error) {
   160  	path := c.Path + lockInfoSuffix
   161  	pair, _, err := c.Client.KV().Get(path, nil)
   162  	if err != nil {
   163  		return nil, err
   164  	}
   165  	if pair == nil {
   166  		return nil, nil
   167  	}
   168  
   169  	li := &state.LockInfo{}
   170  	err = json.Unmarshal(pair.Value, li)
   171  	if err != nil {
   172  		return nil, fmt.Errorf("error unmarshaling lock info: %s", err)
   173  	}
   174  
   175  	return li, nil
   176  }
   177  
   178  func (c *RemoteClient) Lock(info *state.LockInfo) (string, error) {
   179  	c.mu.Lock()
   180  	defer c.mu.Unlock()
   181  
   182  	if !c.lockState {
   183  		return "", nil
   184  	}
   185  
   186  	c.info = info
   187  
   188  	// These checks only are to ensure we strictly follow the specification.
   189  	// Terraform shouldn't ever re-lock, so provide errors for the 2 possible
   190  	// states if this is called.
   191  	select {
   192  	case <-c.lockCh:
   193  		// We had a lock, but lost it.
   194  		return "", errors.New("lost consul lock, cannot re-lock")
   195  	default:
   196  		if c.lockCh != nil {
   197  			// we have an active lock already
   198  			return "", fmt.Errorf("state %q already locked", c.Path)
   199  		}
   200  	}
   201  
   202  	return c.lock()
   203  }
   204  
   205  // called after a lock is acquired
   206  var testLockHook func()
   207  
   208  func (c *RemoteClient) lock() (string, error) {
   209  	if c.consulLock == nil {
   210  		opts := &consulapi.LockOptions{
   211  			Key: c.Path + lockSuffix,
   212  			// only wait briefly, so terraform has the choice to fail fast or
   213  			// retry as needed.
   214  			LockWaitTime: time.Second,
   215  			LockTryOnce:  true,
   216  		}
   217  
   218  		lock, err := c.Client.LockOpts(opts)
   219  		if err != nil {
   220  			return "", err
   221  		}
   222  
   223  		c.consulLock = lock
   224  	}
   225  
   226  	lockErr := &state.LockError{}
   227  
   228  	lockCh, err := c.consulLock.Lock(make(chan struct{}))
   229  	if err != nil {
   230  		lockErr.Err = err
   231  		return "", lockErr
   232  	}
   233  
   234  	if lockCh == nil {
   235  		lockInfo, e := c.getLockInfo()
   236  		if e != nil {
   237  			lockErr.Err = e
   238  			return "", lockErr
   239  		}
   240  
   241  		lockErr.Info = lockInfo
   242  		return "", lockErr
   243  	}
   244  
   245  	c.lockCh = lockCh
   246  
   247  	err = c.putLockInfo(c.info)
   248  	if err != nil {
   249  		if unlockErr := c.unlock(c.info.ID); unlockErr != nil {
   250  			err = multierror.Append(err, unlockErr)
   251  		}
   252  
   253  		return "", err
   254  	}
   255  
   256  	// Start a goroutine to monitor the lock state.
   257  	// If we lose the lock to due communication issues with the consul agent,
   258  	// attempt to immediately reacquire the lock. Put will verify the integrity
   259  	// of the state by using a CAS operation.
   260  	c.monitorCancel = make(chan struct{})
   261  	c.monitorDone = make(chan struct{})
   262  	go func(cancel, done chan struct{}) {
   263  		defer func() {
   264  			close(done)
   265  		}()
   266  		select {
   267  		case <-c.lockCh:
   268  			for {
   269  				c.mu.Lock()
   270  				c.consulLock = nil
   271  				_, err := c.lock()
   272  				c.mu.Unlock()
   273  
   274  				if err != nil {
   275  					// We failed to get the lock, keep trying as long as
   276  					// terraform is running. There may be changes in progress,
   277  					// so there's no use in aborting. Either we eventually
   278  					// reacquire the lock, or a Put will fail on a CAS.
   279  					log.Printf("[ERROR] attempting to reacquire lock: %s", err)
   280  					time.Sleep(time.Second)
   281  
   282  					select {
   283  					case <-cancel:
   284  						return
   285  					default:
   286  					}
   287  					continue
   288  				}
   289  
   290  				// if the error was nil, the new lock started a new copy of
   291  				// this goroutine.
   292  				return
   293  			}
   294  
   295  		case <-cancel:
   296  			return
   297  		}
   298  	}(c.monitorCancel, c.monitorDone)
   299  
   300  	if testLockHook != nil {
   301  		testLockHook()
   302  	}
   303  
   304  	return c.info.ID, nil
   305  }
   306  
   307  func (c *RemoteClient) Unlock(id string) error {
   308  	c.mu.Lock()
   309  	defer c.mu.Unlock()
   310  
   311  	if !c.lockState {
   312  		return nil
   313  	}
   314  
   315  	return c.unlock(id)
   316  }
   317  
   318  func (c *RemoteClient) unlock(id string) error {
   319  	// cancel our monitoring goroutine
   320  	if c.monitorCancel != nil {
   321  		close(c.monitorCancel)
   322  	}
   323  
   324  	// this doesn't use the lock id, because the lock is tied to the consul client.
   325  	if c.consulLock == nil || c.lockCh == nil {
   326  		return nil
   327  	}
   328  
   329  	select {
   330  	case <-c.lockCh:
   331  		return errors.New("consul lock was lost")
   332  	default:
   333  	}
   334  
   335  	kv := c.Client.KV()
   336  
   337  	var errs error
   338  
   339  	if _, err := kv.Delete(c.Path+lockInfoSuffix, nil); err != nil {
   340  		errs = multierror.Append(errs, err)
   341  	}
   342  
   343  	if err := c.consulLock.Unlock(); err != nil {
   344  		errs = multierror.Append(errs, err)
   345  	}
   346  
   347  	// the monitoring goroutine may be in a select on this chan, so we need to
   348  	// wait for it to return before changing the value.
   349  	<-c.monitorDone
   350  	c.lockCh = nil
   351  
   352  	// This is only cleanup, and will fail if the lock was immediately taken by
   353  	// another client, so we don't report an error to the user here.
   354  	c.consulLock.Destroy()
   355  
   356  	return errs
   357  }
   358  
   359  func compressState(data []byte) ([]byte, error) {
   360  	b := new(bytes.Buffer)
   361  	gz := gzip.NewWriter(b)
   362  	if _, err := gz.Write(data); err != nil {
   363  		return nil, err
   364  	}
   365  	if err := gz.Flush(); err != nil {
   366  		return nil, err
   367  	}
   368  	if err := gz.Close(); err != nil {
   369  		return nil, err
   370  	}
   371  	return b.Bytes(), nil
   372  }
   373  
   374  func uncompressState(data []byte) ([]byte, error) {
   375  	b := new(bytes.Buffer)
   376  	gz, err := gzip.NewReader(bytes.NewReader(data))
   377  	if err != nil {
   378  		return nil, err
   379  	}
   380  	b.ReadFrom(gz)
   381  	if err := gz.Close(); err != nil {
   382  		return nil, err
   383  	}
   384  	return b.Bytes(), nil
   385  }