github.com/jaredpalmer/terraform@v1.1.0-alpha20210908.0.20210911170307-88705c943a03/internal/backend/remote-state/oss/client.go (about)

     1  package oss
     2  
     3  import (
     4  	"bytes"
     5  	"crypto/md5"
     6  	"encoding/json"
     7  	"fmt"
     8  	"io"
     9  
    10  	"encoding/hex"
    11  	"log"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/aliyun/aliyun-oss-go-sdk/oss"
    16  	"github.com/aliyun/aliyun-tablestore-go-sdk/tablestore"
    17  	"github.com/hashicorp/go-multierror"
    18  	uuid "github.com/hashicorp/go-uuid"
    19  	"github.com/hashicorp/terraform/internal/states/remote"
    20  	"github.com/hashicorp/terraform/internal/states/statemgr"
    21  	"github.com/pkg/errors"
    22  )
    23  
    24  const (
    25  	// Store the last saved serial in tablestore with this suffix for consistency checks.
    26  	stateIDSuffix = "-md5"
    27  
    28  	pkName = "LockID"
    29  )
    30  
    31  var (
    32  	// The amount of time we will retry a state waiting for it to match the
    33  	// expected checksum.
    34  	consistencyRetryTimeout = 10 * time.Second
    35  
    36  	// delay when polling the state
    37  	consistencyRetryPollInterval = 2 * time.Second
    38  )
    39  
    40  // test hook called when checksums don't match
    41  var testChecksumHook func()
    42  
    43  type RemoteClient struct {
    44  	ossClient            *oss.Client
    45  	otsClient            *tablestore.TableStoreClient
    46  	bucketName           string
    47  	stateFile            string
    48  	lockFile             string
    49  	serverSideEncryption bool
    50  	acl                  string
    51  	info                 *statemgr.LockInfo
    52  	mu                   sync.Mutex
    53  	otsTable             string
    54  }
    55  
    56  func (c *RemoteClient) Get() (payload *remote.Payload, err error) {
    57  	deadline := time.Now().Add(consistencyRetryTimeout)
    58  
    59  	// If we have a checksum, and the returned payload doesn't match, we retry
    60  	// up until deadline.
    61  	for {
    62  		payload, err = c.getObj()
    63  		if err != nil {
    64  			return nil, err
    65  		}
    66  
    67  		// If the remote state was manually removed the payload will be nil,
    68  		// but if there's still a digest entry for that state we will still try
    69  		// to compare the MD5 below.
    70  		var digest []byte
    71  		if payload != nil {
    72  			digest = payload.MD5
    73  		}
    74  
    75  		// verify that this state is what we expect
    76  		if expected, err := c.getMD5(); err != nil {
    77  			log.Printf("[WARN] failed to fetch state md5: %s", err)
    78  		} else if len(expected) > 0 && !bytes.Equal(expected, digest) {
    79  			log.Printf("[WARN] state md5 mismatch: expected '%x', got '%x'", expected, digest)
    80  
    81  			if testChecksumHook != nil {
    82  				testChecksumHook()
    83  			}
    84  
    85  			if time.Now().Before(deadline) {
    86  				time.Sleep(consistencyRetryPollInterval)
    87  				log.Println("[INFO] retrying OSS RemoteClient.Get...")
    88  				continue
    89  			}
    90  
    91  			return nil, fmt.Errorf(errBadChecksumFmt, digest)
    92  		}
    93  
    94  		break
    95  	}
    96  	return payload, nil
    97  }
    98  
    99  func (c *RemoteClient) Put(data []byte) error {
   100  	bucket, err := c.ossClient.Bucket(c.bucketName)
   101  	if err != nil {
   102  		return fmt.Errorf("Error getting bucket: %#v", err)
   103  	}
   104  
   105  	body := bytes.NewReader(data)
   106  
   107  	var options []oss.Option
   108  	if c.acl != "" {
   109  		options = append(options, oss.ACL(oss.ACLType(c.acl)))
   110  	}
   111  	options = append(options, oss.ContentType("application/json"))
   112  	if c.serverSideEncryption {
   113  		options = append(options, oss.ServerSideEncryption("AES256"))
   114  	}
   115  	options = append(options, oss.ContentLength(int64(len(data))))
   116  
   117  	if body != nil {
   118  		if err := bucket.PutObject(c.stateFile, body, options...); err != nil {
   119  			return fmt.Errorf("Failed to upload state %s: %#v", c.stateFile, err)
   120  		}
   121  	}
   122  
   123  	sum := md5.Sum(data)
   124  	if err := c.putMD5(sum[:]); err != nil {
   125  		// if this errors out, we unfortunately have to error out altogether,
   126  		// since the next Get will inevitably fail.
   127  		return fmt.Errorf("Failed to store state MD5: %s", err)
   128  	}
   129  	return nil
   130  }
   131  
   132  func (c *RemoteClient) Delete() error {
   133  	bucket, err := c.ossClient.Bucket(c.bucketName)
   134  	if err != nil {
   135  		return fmt.Errorf("Error getting bucket %s: %#v", c.bucketName, err)
   136  	}
   137  
   138  	log.Printf("[DEBUG] Deleting remote state from OSS: %#v", c.stateFile)
   139  
   140  	if err := bucket.DeleteObject(c.stateFile); err != nil {
   141  		return fmt.Errorf("Error deleting state %s: %#v", c.stateFile, err)
   142  	}
   143  
   144  	if err := c.deleteMD5(); err != nil {
   145  		log.Printf("[WARN] Error deleting state MD5: %s", err)
   146  	}
   147  	return nil
   148  }
   149  
   150  func (c *RemoteClient) Lock(info *statemgr.LockInfo) (string, error) {
   151  	if c.otsTable == "" {
   152  		return "", nil
   153  	}
   154  
   155  	info.Path = c.lockPath()
   156  
   157  	if info.ID == "" {
   158  		lockID, err := uuid.GenerateUUID()
   159  		if err != nil {
   160  			return "", err
   161  		}
   162  		info.ID = lockID
   163  	}
   164  
   165  	putParams := &tablestore.PutRowChange{
   166  		TableName: c.otsTable,
   167  		PrimaryKey: &tablestore.PrimaryKey{
   168  			PrimaryKeys: []*tablestore.PrimaryKeyColumn{
   169  				{
   170  					ColumnName: pkName,
   171  					Value:      c.lockPath(),
   172  				},
   173  			},
   174  		},
   175  		Columns: []tablestore.AttributeColumn{
   176  			{
   177  				ColumnName: "Info",
   178  				Value:      string(info.Marshal()),
   179  			},
   180  		},
   181  		Condition: &tablestore.RowCondition{
   182  			RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_NOT_EXIST,
   183  		},
   184  	}
   185  
   186  	log.Printf("[DEBUG] Recording state lock in tablestore: %#v", putParams)
   187  
   188  	_, err := c.otsClient.PutRow(&tablestore.PutRowRequest{
   189  		PutRowChange: putParams,
   190  	})
   191  	if err != nil {
   192  		log.Printf("[WARN] Error storing state lock in tablestore: %#v", err)
   193  		lockInfo, infoErr := c.getLockInfo()
   194  		if infoErr != nil {
   195  			log.Printf("[WARN] Error getting lock info: %#v", err)
   196  			err = multierror.Append(err, infoErr)
   197  		}
   198  		lockErr := &statemgr.LockError{
   199  			Err:  err,
   200  			Info: lockInfo,
   201  		}
   202  		log.Printf("[WARN] state lock error: %#v", lockErr)
   203  		return "", lockErr
   204  	}
   205  
   206  	return info.ID, nil
   207  }
   208  
   209  func (c *RemoteClient) getMD5() ([]byte, error) {
   210  	if c.otsTable == "" {
   211  		return nil, nil
   212  	}
   213  
   214  	getParams := &tablestore.SingleRowQueryCriteria{
   215  		TableName: c.otsTable,
   216  		PrimaryKey: &tablestore.PrimaryKey{
   217  			PrimaryKeys: []*tablestore.PrimaryKeyColumn{
   218  				{
   219  					ColumnName: pkName,
   220  					Value:      c.lockPath() + stateIDSuffix,
   221  				},
   222  			},
   223  		},
   224  		ColumnsToGet: []string{pkName, "Digest"},
   225  		MaxVersion:   1,
   226  	}
   227  
   228  	log.Printf("[DEBUG] Retrieving state serial in tablestore: %#v", getParams)
   229  
   230  	object, err := c.otsClient.GetRow(&tablestore.GetRowRequest{
   231  		SingleRowQueryCriteria: getParams,
   232  	})
   233  
   234  	if err != nil {
   235  		return nil, err
   236  	}
   237  
   238  	var val string
   239  	if v, ok := object.GetColumnMap().Columns["Digest"]; ok && len(v) > 0 {
   240  		val = v[0].Value.(string)
   241  	}
   242  
   243  	sum, err := hex.DecodeString(val)
   244  	if err != nil || len(sum) != md5.Size {
   245  		return nil, errors.New("invalid md5")
   246  	}
   247  
   248  	return sum, nil
   249  }
   250  
   251  // store the hash of the state to that clients can check for stale state files.
   252  func (c *RemoteClient) putMD5(sum []byte) error {
   253  	if c.otsTable == "" {
   254  		return nil
   255  	}
   256  
   257  	if len(sum) != md5.Size {
   258  		return errors.New("invalid payload md5")
   259  	}
   260  
   261  	putParams := &tablestore.PutRowChange{
   262  		TableName: c.otsTable,
   263  		PrimaryKey: &tablestore.PrimaryKey{
   264  			PrimaryKeys: []*tablestore.PrimaryKeyColumn{
   265  				{
   266  					ColumnName: pkName,
   267  					Value:      c.lockPath() + stateIDSuffix,
   268  				},
   269  			},
   270  		},
   271  		Columns: []tablestore.AttributeColumn{
   272  			{
   273  				ColumnName: "Digest",
   274  				Value:      hex.EncodeToString(sum),
   275  			},
   276  		},
   277  		Condition: &tablestore.RowCondition{
   278  			RowExistenceExpectation: tablestore.RowExistenceExpectation_IGNORE,
   279  		},
   280  	}
   281  
   282  	log.Printf("[DEBUG] Recoring state serial in tablestore: %#v", putParams)
   283  
   284  	_, err := c.otsClient.PutRow(&tablestore.PutRowRequest{
   285  		PutRowChange: putParams,
   286  	})
   287  
   288  	if err != nil {
   289  		log.Printf("[WARN] failed to record state serial in tablestore: %s", err)
   290  	}
   291  
   292  	return nil
   293  }
   294  
   295  // remove the hash value for a deleted state
   296  func (c *RemoteClient) deleteMD5() error {
   297  	if c.otsTable == "" {
   298  		return nil
   299  	}
   300  
   301  	params := &tablestore.DeleteRowRequest{
   302  		DeleteRowChange: &tablestore.DeleteRowChange{
   303  			TableName: c.otsTable,
   304  			PrimaryKey: &tablestore.PrimaryKey{
   305  				PrimaryKeys: []*tablestore.PrimaryKeyColumn{
   306  					{
   307  						ColumnName: pkName,
   308  						Value:      c.lockPath() + stateIDSuffix,
   309  					},
   310  				},
   311  			},
   312  			Condition: &tablestore.RowCondition{
   313  				RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_EXIST,
   314  			},
   315  		},
   316  	}
   317  
   318  	log.Printf("[DEBUG] Deleting state serial in tablestore: %#v", params)
   319  
   320  	if _, err := c.otsClient.DeleteRow(params); err != nil {
   321  		return err
   322  	}
   323  
   324  	return nil
   325  }
   326  
   327  func (c *RemoteClient) getLockInfo() (*statemgr.LockInfo, error) {
   328  	getParams := &tablestore.SingleRowQueryCriteria{
   329  		TableName: c.otsTable,
   330  		PrimaryKey: &tablestore.PrimaryKey{
   331  			PrimaryKeys: []*tablestore.PrimaryKeyColumn{
   332  				{
   333  					ColumnName: pkName,
   334  					Value:      c.lockPath(),
   335  				},
   336  			},
   337  		},
   338  		ColumnsToGet: []string{pkName, "Info"},
   339  		MaxVersion:   1,
   340  	}
   341  
   342  	log.Printf("[DEBUG] Retrieving state lock info from tablestore: %#v", getParams)
   343  
   344  	object, err := c.otsClient.GetRow(&tablestore.GetRowRequest{
   345  		SingleRowQueryCriteria: getParams,
   346  	})
   347  	if err != nil {
   348  		return nil, err
   349  	}
   350  
   351  	var infoData string
   352  	if v, ok := object.GetColumnMap().Columns["Info"]; ok && len(v) > 0 {
   353  		infoData = v[0].Value.(string)
   354  	}
   355  	lockInfo := &statemgr.LockInfo{}
   356  	err = json.Unmarshal([]byte(infoData), lockInfo)
   357  	if err != nil {
   358  		return nil, err
   359  	}
   360  	return lockInfo, nil
   361  }
   362  func (c *RemoteClient) Unlock(id string) error {
   363  	if c.otsTable == "" {
   364  		return nil
   365  	}
   366  
   367  	lockErr := &statemgr.LockError{}
   368  
   369  	lockInfo, err := c.getLockInfo()
   370  	if err != nil {
   371  		lockErr.Err = fmt.Errorf("failed to retrieve lock info: %s", err)
   372  		return lockErr
   373  	}
   374  	lockErr.Info = lockInfo
   375  
   376  	if lockInfo.ID != id {
   377  		lockErr.Err = fmt.Errorf("lock id %q does not match existing lock", id)
   378  		return lockErr
   379  	}
   380  	params := &tablestore.DeleteRowRequest{
   381  		DeleteRowChange: &tablestore.DeleteRowChange{
   382  			TableName: c.otsTable,
   383  			PrimaryKey: &tablestore.PrimaryKey{
   384  				PrimaryKeys: []*tablestore.PrimaryKeyColumn{
   385  					{
   386  						ColumnName: pkName,
   387  						Value:      c.lockPath(),
   388  					},
   389  				},
   390  			},
   391  			Condition: &tablestore.RowCondition{
   392  				RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_EXIST,
   393  			},
   394  		},
   395  	}
   396  
   397  	log.Printf("[DEBUG] Deleting state lock from tablestore: %#v", params)
   398  
   399  	_, err = c.otsClient.DeleteRow(params)
   400  
   401  	if err != nil {
   402  		lockErr.Err = err
   403  		return lockErr
   404  	}
   405  
   406  	return nil
   407  }
   408  
   409  func (c *RemoteClient) lockPath() string {
   410  	return fmt.Sprintf("%s/%s", c.bucketName, c.stateFile)
   411  }
   412  
   413  func (c *RemoteClient) getObj() (*remote.Payload, error) {
   414  	bucket, err := c.ossClient.Bucket(c.bucketName)
   415  	if err != nil {
   416  		return nil, fmt.Errorf("Error getting bucket %s: %#v", c.bucketName, err)
   417  	}
   418  
   419  	if exist, err := bucket.IsObjectExist(c.stateFile); err != nil {
   420  		return nil, fmt.Errorf("Estimating object %s is exist got an error: %#v", c.stateFile, err)
   421  	} else if !exist {
   422  		return nil, nil
   423  	}
   424  
   425  	var options []oss.Option
   426  	output, err := bucket.GetObject(c.stateFile, options...)
   427  	if err != nil {
   428  		return nil, fmt.Errorf("Error getting object: %#v", err)
   429  	}
   430  
   431  	buf := bytes.NewBuffer(nil)
   432  	if _, err := io.Copy(buf, output); err != nil {
   433  		return nil, fmt.Errorf("Failed to read remote state: %s", err)
   434  	}
   435  	sum := md5.Sum(buf.Bytes())
   436  	payload := &remote.Payload{
   437  		Data: buf.Bytes(),
   438  		MD5:  sum[:],
   439  	}
   440  
   441  	// If there was no data, then return nil
   442  	if len(payload.Data) == 0 {
   443  		return nil, nil
   444  	}
   445  
   446  	return payload, nil
   447  }
   448  
   449  const errBadChecksumFmt = `state data in OSS does not have the expected content.
   450  
   451  This may be caused by unusually long delays in OSS processing a previous state
   452  update.  Please wait for a minute or two and try again. If this problem
   453  persists, and neither OSS nor TableStore are experiencing an outage, you may need
   454  to manually verify the remote state and update the Digest value stored in the
   455  TableStore table to the following value: %x
   456  `