github.com/rstandt/terraform@v0.12.32-0.20230710220336-b1063613405c/backend/remote-state/oss/client.go (about) 1 package oss 2 3 import ( 4 "bytes" 5 "crypto/md5" 6 "encoding/json" 7 "fmt" 8 "io" 9 10 "encoding/hex" 11 "github.com/aliyun/aliyun-oss-go-sdk/oss" 12 "github.com/aliyun/aliyun-tablestore-go-sdk/tablestore" 13 "github.com/hashicorp/go-multierror" 14 uuid "github.com/hashicorp/go-uuid" 15 "github.com/hashicorp/terraform/helper/hashcode" 16 "github.com/hashicorp/terraform/state" 17 "github.com/hashicorp/terraform/state/remote" 18 "github.com/pkg/errors" 19 "log" 20 "sync" 21 "time" 22 ) 23 24 // Store the last saved serial in tablestore with this suffix for consistency checks. 25 const ( 26 stateIDSuffix = "-md5" 27 statePKValue = "terraform-remote-state-lock" 28 ) 29 30 var ( 31 // The amount of time we will retry a state waiting for it to match the 32 // expected checksum. 33 consistencyRetryTimeout = 10 * time.Second 34 35 // delay when polling the state 36 consistencyRetryPollInterval = 2 * time.Second 37 ) 38 39 // test hook called when checksums don't match 40 var testChecksumHook func() 41 42 type TableStorePrimaryKeyMeta struct { 43 PKName string 44 PKType string 45 } 46 47 type RemoteClient struct { 48 ossClient *oss.Client 49 otsClient *tablestore.TableStoreClient 50 bucketName string 51 stateFile string 52 lockFile string 53 serverSideEncryption bool 54 acl string 55 info *state.LockInfo 56 mu sync.Mutex 57 otsTable string 58 otsTabkePK TableStorePrimaryKeyMeta 59 } 60 61 func (c *RemoteClient) Get() (payload *remote.Payload, err error) { 62 deadline := time.Now().Add(consistencyRetryTimeout) 63 64 // If we have a checksum, and the returned payload doesn't match, we retry 65 // up until deadline. 66 for { 67 payload, err = c.getObj() 68 if err != nil { 69 return nil, err 70 } 71 72 // If the remote state was manually removed the payload will be nil, 73 // but if there's still a digest entry for that state we will still try 74 // to compare the MD5 below. 75 var digest []byte 76 if payload != nil { 77 digest = payload.MD5 78 } 79 80 // verify that this state is what we expect 81 if expected, err := c.getMD5(); err != nil { 82 log.Printf("[WARN] failed to fetch state md5: %s", err) 83 } else if len(expected) > 0 && !bytes.Equal(expected, digest) { 84 log.Printf("[WARN] state md5 mismatch: expected '%x', got '%x'", expected, digest) 85 86 if testChecksumHook != nil { 87 testChecksumHook() 88 } 89 90 if time.Now().Before(deadline) { 91 time.Sleep(consistencyRetryPollInterval) 92 log.Println("[INFO] retrying OSS RemoteClient.Get...") 93 continue 94 } 95 96 return nil, fmt.Errorf(errBadChecksumFmt, digest) 97 } 98 99 break 100 } 101 return payload, nil 102 } 103 104 func (c *RemoteClient) Put(data []byte) error { 105 bucket, err := c.ossClient.Bucket(c.bucketName) 106 if err != nil { 107 return fmt.Errorf("Error getting bucket: %#v", err) 108 } 109 110 body := bytes.NewReader(data) 111 112 var options []oss.Option 113 if c.acl != "" { 114 options = append(options, oss.ACL(oss.ACLType(c.acl))) 115 } 116 options = append(options, oss.ContentType("application/json")) 117 if c.serverSideEncryption { 118 options = append(options, oss.ServerSideEncryption("AES256")) 119 } 120 options = append(options, oss.ContentLength(int64(len(data)))) 121 122 if body != nil { 123 if err := bucket.PutObject(c.stateFile, body, options...); err != nil { 124 return fmt.Errorf("Failed to upload state %s: %#v", c.stateFile, err) 125 } 126 } 127 128 sum := md5.Sum(data) 129 if err := c.putMD5(sum[:]); err != nil { 130 // if this errors out, we unfortunately have to error out altogether, 131 // since the next Get will inevitably fail. 132 return fmt.Errorf("Failed to store state MD5: %s", err) 133 } 134 return nil 135 } 136 137 func (c *RemoteClient) Delete() error { 138 bucket, err := c.ossClient.Bucket(c.bucketName) 139 if err != nil { 140 return fmt.Errorf("Error getting bucket %s: %#v", c.bucketName, err) 141 } 142 143 log.Printf("[DEBUG] Deleting remote state from OSS: %#v", c.stateFile) 144 145 if err := bucket.DeleteObject(c.stateFile); err != nil { 146 return fmt.Errorf("Error deleting state %s: %#v", c.stateFile, err) 147 } 148 149 if err := c.deleteMD5(); err != nil { 150 log.Printf("[WARN] Error deleting state MD5: %s", err) 151 } 152 return nil 153 } 154 155 func (c *RemoteClient) Lock(info *state.LockInfo) (string, error) { 156 if c.otsTable == "" { 157 return "", nil 158 } 159 160 if info.ID == "" { 161 lockID, err := uuid.GenerateUUID() 162 if err != nil { 163 return "", err 164 } 165 info.ID = lockID 166 } 167 168 putParams := &tablestore.PutRowChange{ 169 TableName: c.otsTable, 170 PrimaryKey: &tablestore.PrimaryKey{ 171 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 172 { 173 ColumnName: c.otsTabkePK.PKName, 174 Value: c.getPKValue(), 175 }, 176 }, 177 }, 178 Columns: []tablestore.AttributeColumn{ 179 { 180 ColumnName: "LockID", 181 Value: c.lockFile, 182 }, 183 { 184 ColumnName: "Info", 185 Value: string(info.Marshal()), 186 }, 187 }, 188 Condition: &tablestore.RowCondition{ 189 RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_NOT_EXIST, 190 }, 191 } 192 193 log.Printf("[DEBUG] Recoring state lock in tablestore: %#v", putParams) 194 195 _, err := c.otsClient.PutRow(&tablestore.PutRowRequest{ 196 PutRowChange: putParams, 197 }) 198 if err != nil { 199 log.Printf("[WARN] Error storing state lock in tablestore: %#v", err) 200 lockInfo, infoErr := c.getLockInfo() 201 if infoErr != nil { 202 log.Printf("[WARN] Error getting lock info: %#v", err) 203 err = multierror.Append(err, infoErr) 204 } 205 lockErr := &state.LockError{ 206 Err: err, 207 Info: lockInfo, 208 } 209 log.Printf("[WARN] state lock error: %#v", lockErr) 210 return "", lockErr 211 } 212 213 return info.ID, nil 214 } 215 216 func (c *RemoteClient) getMD5() ([]byte, error) { 217 if c.otsTable == "" { 218 return nil, nil 219 } 220 221 getParams := &tablestore.SingleRowQueryCriteria{ 222 TableName: c.otsTable, 223 PrimaryKey: &tablestore.PrimaryKey{ 224 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 225 { 226 ColumnName: c.otsTabkePK.PKName, 227 Value: c.getPKValue(), 228 }, 229 }, 230 }, 231 ColumnsToGet: []string{"LockID", "Digest"}, 232 MaxVersion: 1, 233 } 234 235 log.Printf("[DEBUG] Retrieving state serial in tablestore: %#v", getParams) 236 237 object, err := c.otsClient.GetRow(&tablestore.GetRowRequest{ 238 SingleRowQueryCriteria: getParams, 239 }) 240 241 if err != nil { 242 return nil, err 243 } 244 245 var val string 246 if v, ok := object.GetColumnMap().Columns["Digest"]; ok && len(v) > 0 { 247 val = v[0].Value.(string) 248 } 249 250 sum, err := hex.DecodeString(val) 251 if err != nil || len(sum) != md5.Size { 252 return nil, errors.New("invalid md5") 253 } 254 255 return sum, nil 256 } 257 258 // store the hash of the state to that clients can check for stale state files. 259 func (c *RemoteClient) putMD5(sum []byte) error { 260 if c.otsTable == "" { 261 return nil 262 } 263 264 if len(sum) != md5.Size { 265 return errors.New("invalid payload md5") 266 } 267 268 putParams := &tablestore.PutRowChange{ 269 TableName: c.otsTable, 270 PrimaryKey: &tablestore.PrimaryKey{ 271 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 272 { 273 ColumnName: c.otsTabkePK.PKName, 274 Value: c.getPKValue(), 275 }, 276 }, 277 }, 278 Columns: []tablestore.AttributeColumn{ 279 { 280 ColumnName: "LockID", 281 Value: c.lockPath() + stateIDSuffix, 282 }, 283 { 284 ColumnName: "Digest", 285 Value: hex.EncodeToString(sum), 286 }, 287 }, 288 Condition: &tablestore.RowCondition{ 289 RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_NOT_EXIST, 290 }, 291 } 292 293 log.Printf("[DEBUG] Recoring state serial in tablestore: %#v", putParams) 294 295 _, err := c.otsClient.PutRow(&tablestore.PutRowRequest{ 296 PutRowChange: putParams, 297 }) 298 299 if err != nil { 300 log.Printf("[WARN] failed to record state serial in tablestore: %s", err) 301 } 302 303 return nil 304 } 305 306 // remove the hash value for a deleted state 307 func (c *RemoteClient) deleteMD5() error { 308 if c.otsTable == "" { 309 return nil 310 } 311 312 params := &tablestore.DeleteRowRequest{ 313 DeleteRowChange: &tablestore.DeleteRowChange{ 314 TableName: c.otsTable, 315 PrimaryKey: &tablestore.PrimaryKey{ 316 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 317 { 318 ColumnName: c.otsTabkePK.PKName, 319 Value: c.getPKValue(), 320 }, 321 }, 322 }, 323 Condition: &tablestore.RowCondition{ 324 RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_EXIST, 325 }, 326 }, 327 } 328 329 log.Printf("[DEBUG] Deleting state serial in tablestore: %#v", params) 330 331 if _, err := c.otsClient.DeleteRow(params); err != nil { 332 return err 333 } 334 335 return nil 336 } 337 338 func (c *RemoteClient) getLockInfo() (*state.LockInfo, error) { 339 getParams := &tablestore.SingleRowQueryCriteria{ 340 TableName: c.otsTable, 341 PrimaryKey: &tablestore.PrimaryKey{ 342 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 343 { 344 ColumnName: c.otsTabkePK.PKName, 345 Value: c.getPKValue(), 346 }, 347 }, 348 }, 349 ColumnsToGet: []string{"LockID", "Info"}, 350 MaxVersion: 1, 351 } 352 353 log.Printf("[DEBUG] Retrieving state lock info from tablestore: %#v", getParams) 354 355 object, err := c.otsClient.GetRow(&tablestore.GetRowRequest{ 356 SingleRowQueryCriteria: getParams, 357 }) 358 if err != nil { 359 return nil, err 360 } 361 362 var infoData string 363 if v, ok := object.GetColumnMap().Columns["Info"]; ok && len(v) > 0 { 364 infoData = v[0].Value.(string) 365 } 366 lockInfo := &state.LockInfo{} 367 err = json.Unmarshal([]byte(infoData), lockInfo) 368 if err != nil { 369 return nil, err 370 } 371 return lockInfo, nil 372 } 373 func (c *RemoteClient) Unlock(id string) error { 374 if c.otsTable == "" { 375 return nil 376 } 377 378 lockErr := &state.LockError{} 379 380 lockInfo, err := c.getLockInfo() 381 if err != nil { 382 lockErr.Err = fmt.Errorf("failed to retrieve lock info: %s", err) 383 return lockErr 384 } 385 lockErr.Info = lockInfo 386 387 if lockInfo.ID != id { 388 lockErr.Err = fmt.Errorf("lock id %q does not match existing lock", id) 389 return lockErr 390 } 391 params := &tablestore.DeleteRowRequest{ 392 DeleteRowChange: &tablestore.DeleteRowChange{ 393 TableName: c.otsTable, 394 PrimaryKey: &tablestore.PrimaryKey{ 395 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 396 { 397 ColumnName: c.otsTabkePK.PKName, 398 Value: c.getPKValue(), 399 }, 400 }, 401 }, 402 Condition: &tablestore.RowCondition{ 403 RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_EXIST, 404 }, 405 }, 406 } 407 408 log.Printf("[DEBUG] Deleting state lock from tablestore: %#v", params) 409 410 _, err = c.otsClient.DeleteRow(params) 411 412 if err != nil { 413 lockErr.Err = err 414 return lockErr 415 } 416 417 return nil 418 } 419 420 func (c *RemoteClient) lockPath() string { 421 return fmt.Sprintf("%s/%s", c.bucketName, c.stateFile) 422 } 423 424 func (c *RemoteClient) getObj() (*remote.Payload, error) { 425 bucket, err := c.ossClient.Bucket(c.bucketName) 426 if err != nil { 427 return nil, fmt.Errorf("Error getting bucket %s: %#v", c.bucketName, err) 428 } 429 430 if exist, err := bucket.IsObjectExist(c.stateFile); err != nil { 431 return nil, fmt.Errorf("Estimating object %s is exist got an error: %#v", c.stateFile, err) 432 } else if !exist { 433 return nil, nil 434 } 435 436 var options []oss.Option 437 output, err := bucket.GetObject(c.stateFile, options...) 438 if err != nil { 439 return nil, fmt.Errorf("Error getting object: %#v", err) 440 } 441 442 buf := bytes.NewBuffer(nil) 443 if _, err := io.Copy(buf, output); err != nil { 444 return nil, fmt.Errorf("Failed to read remote state: %s", err) 445 } 446 sum := md5.Sum(buf.Bytes()) 447 payload := &remote.Payload{ 448 Data: buf.Bytes(), 449 MD5: sum[:], 450 } 451 452 // If there was no data, then return nil 453 if len(payload.Data) == 0 { 454 return nil, nil 455 } 456 457 return payload, nil 458 } 459 460 func (c *RemoteClient) getPKValue() (value interface{}) { 461 value = statePKValue 462 if c.otsTabkePK.PKType == "Integer" { 463 value = hashcode.String(statePKValue) 464 } else if c.otsTabkePK.PKType == "Binary" { 465 value = stringToBin(statePKValue) 466 } 467 return 468 } 469 470 func stringToBin(s string) (binString string) { 471 for _, c := range s { 472 binString = fmt.Sprintf("%s%b", binString, c) 473 } 474 return 475 } 476 477 const errBadChecksumFmt = `state data in OSS does not have the expected content. 478 479 This may be caused by unusually long delays in OSS processing a previous state 480 update. Please wait for a minute or two and try again. If this problem 481 persists, and neither OSS nor TableStore are experiencing an outage, you may need 482 to manually verify the remote state and update the Digest value stored in the 483 TableStore table to the following value: %x 484 `