github.com/hugorut/terraform@v1.1.3/src/backend/remote-state/oss/client.go (about)

     1  package oss
     2  
     3  import (
     4  	"bytes"
     5  	"crypto/md5"
     6  	"encoding/hex"
     7  	"encoding/json"
     8  	"fmt"
     9  	"io"
    10  	"log"
    11  	"time"
    12  
    13  	"github.com/aliyun/aliyun-oss-go-sdk/oss"
    14  	"github.com/aliyun/aliyun-tablestore-go-sdk/tablestore"
    15  	"github.com/hashicorp/go-multierror"
    16  	uuid "github.com/hashicorp/go-uuid"
    17  	"github.com/pkg/errors"
    18  
    19  	"github.com/hugorut/terraform/src/states/remote"
    20  	"github.com/hugorut/terraform/src/states/statemgr"
    21  )
    22  
    23  const (
    24  	// Store the last saved serial in tablestore with this suffix for consistency checks.
    25  	stateIDSuffix = "-md5"
    26  
    27  	pkName = "LockID"
    28  )
    29  
    30  var (
    31  	// The amount of time we will retry a state waiting for it to match the
    32  	// expected checksum.
    33  	consistencyRetryTimeout = 10 * time.Second
    34  
    35  	// delay when polling the state
    36  	consistencyRetryPollInterval = 2 * time.Second
    37  )
    38  
    39  // test hook called when checksums don't match
    40  var testChecksumHook func()
    41  
    42  type RemoteClient struct {
    43  	ossClient            *oss.Client
    44  	otsClient            *tablestore.TableStoreClient
    45  	bucketName           string
    46  	stateFile            string
    47  	lockFile             string
    48  	serverSideEncryption bool
    49  	acl                  string
    50  	otsTable             string
    51  }
    52  
    53  func (c *RemoteClient) Get() (payload *remote.Payload, err error) {
    54  	deadline := time.Now().Add(consistencyRetryTimeout)
    55  
    56  	// If we have a checksum, and the returned payload doesn't match, we retry
    57  	// up until deadline.
    58  	for {
    59  		payload, err = c.getObj()
    60  		if err != nil {
    61  			return nil, err
    62  		}
    63  
    64  		// If the remote state was manually removed the payload will be nil,
    65  		// but if there's still a digest entry for that state we will still try
    66  		// to compare the MD5 below.
    67  		var digest []byte
    68  		if payload != nil {
    69  			digest = payload.MD5
    70  		}
    71  
    72  		// verify that this state is what we expect
    73  		if expected, err := c.getMD5(); err != nil {
    74  			log.Printf("[WARN] failed to fetch state md5: %s", err)
    75  		} else if len(expected) > 0 && !bytes.Equal(expected, digest) {
    76  			log.Printf("[WARN] state md5 mismatch: expected '%x', got '%x'", expected, digest)
    77  
    78  			if testChecksumHook != nil {
    79  				testChecksumHook()
    80  			}
    81  
    82  			if time.Now().Before(deadline) {
    83  				time.Sleep(consistencyRetryPollInterval)
    84  				log.Println("[INFO] retrying OSS RemoteClient.Get...")
    85  				continue
    86  			}
    87  
    88  			return nil, fmt.Errorf(errBadChecksumFmt, digest)
    89  		}
    90  
    91  		break
    92  	}
    93  	return payload, nil
    94  }
    95  
    96  func (c *RemoteClient) Put(data []byte) error {
    97  	bucket, err := c.ossClient.Bucket(c.bucketName)
    98  	if err != nil {
    99  		return fmt.Errorf("error getting bucket: %#v", err)
   100  	}
   101  
   102  	body := bytes.NewReader(data)
   103  
   104  	var options []oss.Option
   105  	if c.acl != "" {
   106  		options = append(options, oss.ACL(oss.ACLType(c.acl)))
   107  	}
   108  	options = append(options, oss.ContentType("application/json"))
   109  	if c.serverSideEncryption {
   110  		options = append(options, oss.ServerSideEncryption("AES256"))
   111  	}
   112  	options = append(options, oss.ContentLength(int64(len(data))))
   113  
   114  	if body != nil {
   115  		if err := bucket.PutObject(c.stateFile, body, options...); err != nil {
   116  			return fmt.Errorf("failed to upload state %s: %#v", c.stateFile, err)
   117  		}
   118  	}
   119  
   120  	sum := md5.Sum(data)
   121  	if err := c.putMD5(sum[:]); err != nil {
   122  		// if this errors out, we unfortunately have to error out altogether,
   123  		// since the next Get will inevitably fail.
   124  		return fmt.Errorf("failed to store state MD5: %s", err)
   125  	}
   126  	return nil
   127  }
   128  
   129  func (c *RemoteClient) Delete() error {
   130  	bucket, err := c.ossClient.Bucket(c.bucketName)
   131  	if err != nil {
   132  		return fmt.Errorf("error getting bucket %s: %#v", c.bucketName, err)
   133  	}
   134  
   135  	log.Printf("[DEBUG] Deleting remote state from OSS: %#v", c.stateFile)
   136  
   137  	if err := bucket.DeleteObject(c.stateFile); err != nil {
   138  		return fmt.Errorf("error deleting state %s: %#v", c.stateFile, err)
   139  	}
   140  
   141  	if err := c.deleteMD5(); err != nil {
   142  		log.Printf("[WARN] Error deleting state MD5: %s", err)
   143  	}
   144  	return nil
   145  }
   146  
   147  func (c *RemoteClient) Lock(info *statemgr.LockInfo) (string, error) {
   148  	if c.otsTable == "" {
   149  		return "", nil
   150  	}
   151  
   152  	info.Path = c.lockPath()
   153  
   154  	if info.ID == "" {
   155  		lockID, err := uuid.GenerateUUID()
   156  		if err != nil {
   157  			return "", err
   158  		}
   159  		info.ID = lockID
   160  	}
   161  
   162  	putParams := &tablestore.PutRowChange{
   163  		TableName: c.otsTable,
   164  		PrimaryKey: &tablestore.PrimaryKey{
   165  			PrimaryKeys: []*tablestore.PrimaryKeyColumn{
   166  				{
   167  					ColumnName: pkName,
   168  					Value:      c.lockPath(),
   169  				},
   170  			},
   171  		},
   172  		Columns: []tablestore.AttributeColumn{
   173  			{
   174  				ColumnName: "Info",
   175  				Value:      string(info.Marshal()),
   176  			},
   177  		},
   178  		Condition: &tablestore.RowCondition{
   179  			RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_NOT_EXIST,
   180  		},
   181  	}
   182  
   183  	log.Printf("[DEBUG] Recording state lock in tablestore: %#v", putParams)
   184  
   185  	_, err := c.otsClient.PutRow(&tablestore.PutRowRequest{
   186  		PutRowChange: putParams,
   187  	})
   188  	if err != nil {
   189  		log.Printf("[WARN] Error storing state lock in tablestore: %#v", err)
   190  		lockInfo, infoErr := c.getLockInfo()
   191  		if infoErr != nil {
   192  			log.Printf("[WARN] Error getting lock info: %#v", err)
   193  			err = multierror.Append(err, infoErr)
   194  		}
   195  		lockErr := &statemgr.LockError{
   196  			Err:  err,
   197  			Info: lockInfo,
   198  		}
   199  		log.Printf("[WARN] state lock error: %#v", lockErr)
   200  		return "", lockErr
   201  	}
   202  
   203  	return info.ID, nil
   204  }
   205  
   206  func (c *RemoteClient) getMD5() ([]byte, error) {
   207  	if c.otsTable == "" {
   208  		return nil, nil
   209  	}
   210  
   211  	getParams := &tablestore.SingleRowQueryCriteria{
   212  		TableName: c.otsTable,
   213  		PrimaryKey: &tablestore.PrimaryKey{
   214  			PrimaryKeys: []*tablestore.PrimaryKeyColumn{
   215  				{
   216  					ColumnName: pkName,
   217  					Value:      c.lockPath() + stateIDSuffix,
   218  				},
   219  			},
   220  		},
   221  		ColumnsToGet: []string{pkName, "Digest"},
   222  		MaxVersion:   1,
   223  	}
   224  
   225  	log.Printf("[DEBUG] Retrieving state serial in tablestore: %#v", getParams)
   226  
   227  	object, err := c.otsClient.GetRow(&tablestore.GetRowRequest{
   228  		SingleRowQueryCriteria: getParams,
   229  	})
   230  
   231  	if err != nil {
   232  		return nil, err
   233  	}
   234  
   235  	var val string
   236  	if v, ok := object.GetColumnMap().Columns["Digest"]; ok && len(v) > 0 {
   237  		val = v[0].Value.(string)
   238  	}
   239  
   240  	sum, err := hex.DecodeString(val)
   241  	if err != nil || len(sum) != md5.Size {
   242  		return nil, errors.New("invalid md5")
   243  	}
   244  
   245  	return sum, nil
   246  }
   247  
   248  // store the hash of the state to that clients can check for stale state files.
   249  func (c *RemoteClient) putMD5(sum []byte) error {
   250  	if c.otsTable == "" {
   251  		return nil
   252  	}
   253  
   254  	if len(sum) != md5.Size {
   255  		return errors.New("invalid payload md5")
   256  	}
   257  
   258  	putParams := &tablestore.PutRowChange{
   259  		TableName: c.otsTable,
   260  		PrimaryKey: &tablestore.PrimaryKey{
   261  			PrimaryKeys: []*tablestore.PrimaryKeyColumn{
   262  				{
   263  					ColumnName: pkName,
   264  					Value:      c.lockPath() + stateIDSuffix,
   265  				},
   266  			},
   267  		},
   268  		Columns: []tablestore.AttributeColumn{
   269  			{
   270  				ColumnName: "Digest",
   271  				Value:      hex.EncodeToString(sum),
   272  			},
   273  		},
   274  		Condition: &tablestore.RowCondition{
   275  			RowExistenceExpectation: tablestore.RowExistenceExpectation_IGNORE,
   276  		},
   277  	}
   278  
   279  	log.Printf("[DEBUG] Recoring state serial in tablestore: %#v", putParams)
   280  
   281  	_, err := c.otsClient.PutRow(&tablestore.PutRowRequest{
   282  		PutRowChange: putParams,
   283  	})
   284  
   285  	if err != nil {
   286  		log.Printf("[WARN] failed to record state serial in tablestore: %s", err)
   287  	}
   288  
   289  	return nil
   290  }
   291  
   292  // remove the hash value for a deleted state
   293  func (c *RemoteClient) deleteMD5() error {
   294  	if c.otsTable == "" {
   295  		return nil
   296  	}
   297  
   298  	params := &tablestore.DeleteRowRequest{
   299  		DeleteRowChange: &tablestore.DeleteRowChange{
   300  			TableName: c.otsTable,
   301  			PrimaryKey: &tablestore.PrimaryKey{
   302  				PrimaryKeys: []*tablestore.PrimaryKeyColumn{
   303  					{
   304  						ColumnName: pkName,
   305  						Value:      c.lockPath() + stateIDSuffix,
   306  					},
   307  				},
   308  			},
   309  			Condition: &tablestore.RowCondition{
   310  				RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_EXIST,
   311  			},
   312  		},
   313  	}
   314  
   315  	log.Printf("[DEBUG] Deleting state serial in tablestore: %#v", params)
   316  
   317  	if _, err := c.otsClient.DeleteRow(params); err != nil {
   318  		return err
   319  	}
   320  
   321  	return nil
   322  }
   323  
   324  func (c *RemoteClient) getLockInfo() (*statemgr.LockInfo, error) {
   325  	getParams := &tablestore.SingleRowQueryCriteria{
   326  		TableName: c.otsTable,
   327  		PrimaryKey: &tablestore.PrimaryKey{
   328  			PrimaryKeys: []*tablestore.PrimaryKeyColumn{
   329  				{
   330  					ColumnName: pkName,
   331  					Value:      c.lockPath(),
   332  				},
   333  			},
   334  		},
   335  		ColumnsToGet: []string{pkName, "Info"},
   336  		MaxVersion:   1,
   337  	}
   338  
   339  	log.Printf("[DEBUG] Retrieving state lock info from tablestore: %#v", getParams)
   340  
   341  	object, err := c.otsClient.GetRow(&tablestore.GetRowRequest{
   342  		SingleRowQueryCriteria: getParams,
   343  	})
   344  	if err != nil {
   345  		return nil, err
   346  	}
   347  
   348  	var infoData string
   349  	if v, ok := object.GetColumnMap().Columns["Info"]; ok && len(v) > 0 {
   350  		infoData = v[0].Value.(string)
   351  	}
   352  	lockInfo := &statemgr.LockInfo{}
   353  	err = json.Unmarshal([]byte(infoData), lockInfo)
   354  	if err != nil {
   355  		return nil, err
   356  	}
   357  	return lockInfo, nil
   358  }
   359  func (c *RemoteClient) Unlock(id string) error {
   360  	if c.otsTable == "" {
   361  		return nil
   362  	}
   363  
   364  	lockErr := &statemgr.LockError{}
   365  
   366  	lockInfo, err := c.getLockInfo()
   367  	if err != nil {
   368  		lockErr.Err = fmt.Errorf("failed to retrieve lock info: %s", err)
   369  		return lockErr
   370  	}
   371  	lockErr.Info = lockInfo
   372  
   373  	if lockInfo.ID != id {
   374  		lockErr.Err = fmt.Errorf("lock id %q does not match existing lock", id)
   375  		return lockErr
   376  	}
   377  	params := &tablestore.DeleteRowRequest{
   378  		DeleteRowChange: &tablestore.DeleteRowChange{
   379  			TableName: c.otsTable,
   380  			PrimaryKey: &tablestore.PrimaryKey{
   381  				PrimaryKeys: []*tablestore.PrimaryKeyColumn{
   382  					{
   383  						ColumnName: pkName,
   384  						Value:      c.lockPath(),
   385  					},
   386  				},
   387  			},
   388  			Condition: &tablestore.RowCondition{
   389  				RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_EXIST,
   390  			},
   391  		},
   392  	}
   393  
   394  	log.Printf("[DEBUG] Deleting state lock from tablestore: %#v", params)
   395  
   396  	_, err = c.otsClient.DeleteRow(params)
   397  
   398  	if err != nil {
   399  		lockErr.Err = err
   400  		return lockErr
   401  	}
   402  
   403  	return nil
   404  }
   405  
   406  func (c *RemoteClient) lockPath() string {
   407  	return fmt.Sprintf("%s/%s", c.bucketName, c.stateFile)
   408  }
   409  
   410  func (c *RemoteClient) getObj() (*remote.Payload, error) {
   411  	bucket, err := c.ossClient.Bucket(c.bucketName)
   412  	if err != nil {
   413  		return nil, fmt.Errorf("error getting bucket %s: %#v", c.bucketName, err)
   414  	}
   415  
   416  	if exist, err := bucket.IsObjectExist(c.stateFile); err != nil {
   417  		return nil, fmt.Errorf("estimating object %s is exist got an error: %#v", c.stateFile, err)
   418  	} else if !exist {
   419  		return nil, nil
   420  	}
   421  
   422  	var options []oss.Option
   423  	output, err := bucket.GetObject(c.stateFile, options...)
   424  	if err != nil {
   425  		return nil, fmt.Errorf("error getting object: %#v", err)
   426  	}
   427  
   428  	buf := bytes.NewBuffer(nil)
   429  	if _, err := io.Copy(buf, output); err != nil {
   430  		return nil, fmt.Errorf("failed to read remote state: %s", err)
   431  	}
   432  	sum := md5.Sum(buf.Bytes())
   433  	payload := &remote.Payload{
   434  		Data: buf.Bytes(),
   435  		MD5:  sum[:],
   436  	}
   437  
   438  	// If there was no data, then return nil
   439  	if len(payload.Data) == 0 {
   440  		return nil, nil
   441  	}
   442  
   443  	return payload, nil
   444  }
   445  
   446  const errBadChecksumFmt = `state data in OSS does not have the expected content.
   447  
   448  This may be caused by unusually long delays in OSS processing a previous state
   449  update.  Please wait for a minute or two and try again. If this problem
   450  persists, and neither OSS nor TableStore are experiencing an outage, you may need
   451  to manually verify the remote state and update the Digest value stored in the
   452  TableStore table to the following value: %x`