github.com/jaredpalmer/terraform@v1.1.0-alpha20210908.0.20210911170307-88705c943a03/internal/backend/remote-state/oss/client.go (about) 1 package oss 2 3 import ( 4 "bytes" 5 "crypto/md5" 6 "encoding/json" 7 "fmt" 8 "io" 9 10 "encoding/hex" 11 "log" 12 "sync" 13 "time" 14 15 "github.com/aliyun/aliyun-oss-go-sdk/oss" 16 "github.com/aliyun/aliyun-tablestore-go-sdk/tablestore" 17 "github.com/hashicorp/go-multierror" 18 uuid "github.com/hashicorp/go-uuid" 19 "github.com/hashicorp/terraform/internal/states/remote" 20 "github.com/hashicorp/terraform/internal/states/statemgr" 21 "github.com/pkg/errors" 22 ) 23 24 const ( 25 // Store the last saved serial in tablestore with this suffix for consistency checks. 26 stateIDSuffix = "-md5" 27 28 pkName = "LockID" 29 ) 30 31 var ( 32 // The amount of time we will retry a state waiting for it to match the 33 // expected checksum. 34 consistencyRetryTimeout = 10 * time.Second 35 36 // delay when polling the state 37 consistencyRetryPollInterval = 2 * time.Second 38 ) 39 40 // test hook called when checksums don't match 41 var testChecksumHook func() 42 43 type RemoteClient struct { 44 ossClient *oss.Client 45 otsClient *tablestore.TableStoreClient 46 bucketName string 47 stateFile string 48 lockFile string 49 serverSideEncryption bool 50 acl string 51 info *statemgr.LockInfo 52 mu sync.Mutex 53 otsTable string 54 } 55 56 func (c *RemoteClient) Get() (payload *remote.Payload, err error) { 57 deadline := time.Now().Add(consistencyRetryTimeout) 58 59 // If we have a checksum, and the returned payload doesn't match, we retry 60 // up until deadline. 61 for { 62 payload, err = c.getObj() 63 if err != nil { 64 return nil, err 65 } 66 67 // If the remote state was manually removed the payload will be nil, 68 // but if there's still a digest entry for that state we will still try 69 // to compare the MD5 below. 70 var digest []byte 71 if payload != nil { 72 digest = payload.MD5 73 } 74 75 // verify that this state is what we expect 76 if expected, err := c.getMD5(); err != nil { 77 log.Printf("[WARN] failed to fetch state md5: %s", err) 78 } else if len(expected) > 0 && !bytes.Equal(expected, digest) { 79 log.Printf("[WARN] state md5 mismatch: expected '%x', got '%x'", expected, digest) 80 81 if testChecksumHook != nil { 82 testChecksumHook() 83 } 84 85 if time.Now().Before(deadline) { 86 time.Sleep(consistencyRetryPollInterval) 87 log.Println("[INFO] retrying OSS RemoteClient.Get...") 88 continue 89 } 90 91 return nil, fmt.Errorf(errBadChecksumFmt, digest) 92 } 93 94 break 95 } 96 return payload, nil 97 } 98 99 func (c *RemoteClient) Put(data []byte) error { 100 bucket, err := c.ossClient.Bucket(c.bucketName) 101 if err != nil { 102 return fmt.Errorf("Error getting bucket: %#v", err) 103 } 104 105 body := bytes.NewReader(data) 106 107 var options []oss.Option 108 if c.acl != "" { 109 options = append(options, oss.ACL(oss.ACLType(c.acl))) 110 } 111 options = append(options, oss.ContentType("application/json")) 112 if c.serverSideEncryption { 113 options = append(options, oss.ServerSideEncryption("AES256")) 114 } 115 options = append(options, oss.ContentLength(int64(len(data)))) 116 117 if body != nil { 118 if err := bucket.PutObject(c.stateFile, body, options...); err != nil { 119 return fmt.Errorf("Failed to upload state %s: %#v", c.stateFile, err) 120 } 121 } 122 123 sum := md5.Sum(data) 124 if err := c.putMD5(sum[:]); err != nil { 125 // if this errors out, we unfortunately have to error out altogether, 126 // since the next Get will inevitably fail. 127 return fmt.Errorf("Failed to store state MD5: %s", err) 128 } 129 return nil 130 } 131 132 func (c *RemoteClient) Delete() error { 133 bucket, err := c.ossClient.Bucket(c.bucketName) 134 if err != nil { 135 return fmt.Errorf("Error getting bucket %s: %#v", c.bucketName, err) 136 } 137 138 log.Printf("[DEBUG] Deleting remote state from OSS: %#v", c.stateFile) 139 140 if err := bucket.DeleteObject(c.stateFile); err != nil { 141 return fmt.Errorf("Error deleting state %s: %#v", c.stateFile, err) 142 } 143 144 if err := c.deleteMD5(); err != nil { 145 log.Printf("[WARN] Error deleting state MD5: %s", err) 146 } 147 return nil 148 } 149 150 func (c *RemoteClient) Lock(info *statemgr.LockInfo) (string, error) { 151 if c.otsTable == "" { 152 return "", nil 153 } 154 155 info.Path = c.lockPath() 156 157 if info.ID == "" { 158 lockID, err := uuid.GenerateUUID() 159 if err != nil { 160 return "", err 161 } 162 info.ID = lockID 163 } 164 165 putParams := &tablestore.PutRowChange{ 166 TableName: c.otsTable, 167 PrimaryKey: &tablestore.PrimaryKey{ 168 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 169 { 170 ColumnName: pkName, 171 Value: c.lockPath(), 172 }, 173 }, 174 }, 175 Columns: []tablestore.AttributeColumn{ 176 { 177 ColumnName: "Info", 178 Value: string(info.Marshal()), 179 }, 180 }, 181 Condition: &tablestore.RowCondition{ 182 RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_NOT_EXIST, 183 }, 184 } 185 186 log.Printf("[DEBUG] Recording state lock in tablestore: %#v", putParams) 187 188 _, err := c.otsClient.PutRow(&tablestore.PutRowRequest{ 189 PutRowChange: putParams, 190 }) 191 if err != nil { 192 log.Printf("[WARN] Error storing state lock in tablestore: %#v", err) 193 lockInfo, infoErr := c.getLockInfo() 194 if infoErr != nil { 195 log.Printf("[WARN] Error getting lock info: %#v", err) 196 err = multierror.Append(err, infoErr) 197 } 198 lockErr := &statemgr.LockError{ 199 Err: err, 200 Info: lockInfo, 201 } 202 log.Printf("[WARN] state lock error: %#v", lockErr) 203 return "", lockErr 204 } 205 206 return info.ID, nil 207 } 208 209 func (c *RemoteClient) getMD5() ([]byte, error) { 210 if c.otsTable == "" { 211 return nil, nil 212 } 213 214 getParams := &tablestore.SingleRowQueryCriteria{ 215 TableName: c.otsTable, 216 PrimaryKey: &tablestore.PrimaryKey{ 217 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 218 { 219 ColumnName: pkName, 220 Value: c.lockPath() + stateIDSuffix, 221 }, 222 }, 223 }, 224 ColumnsToGet: []string{pkName, "Digest"}, 225 MaxVersion: 1, 226 } 227 228 log.Printf("[DEBUG] Retrieving state serial in tablestore: %#v", getParams) 229 230 object, err := c.otsClient.GetRow(&tablestore.GetRowRequest{ 231 SingleRowQueryCriteria: getParams, 232 }) 233 234 if err != nil { 235 return nil, err 236 } 237 238 var val string 239 if v, ok := object.GetColumnMap().Columns["Digest"]; ok && len(v) > 0 { 240 val = v[0].Value.(string) 241 } 242 243 sum, err := hex.DecodeString(val) 244 if err != nil || len(sum) != md5.Size { 245 return nil, errors.New("invalid md5") 246 } 247 248 return sum, nil 249 } 250 251 // store the hash of the state to that clients can check for stale state files. 252 func (c *RemoteClient) putMD5(sum []byte) error { 253 if c.otsTable == "" { 254 return nil 255 } 256 257 if len(sum) != md5.Size { 258 return errors.New("invalid payload md5") 259 } 260 261 putParams := &tablestore.PutRowChange{ 262 TableName: c.otsTable, 263 PrimaryKey: &tablestore.PrimaryKey{ 264 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 265 { 266 ColumnName: pkName, 267 Value: c.lockPath() + stateIDSuffix, 268 }, 269 }, 270 }, 271 Columns: []tablestore.AttributeColumn{ 272 { 273 ColumnName: "Digest", 274 Value: hex.EncodeToString(sum), 275 }, 276 }, 277 Condition: &tablestore.RowCondition{ 278 RowExistenceExpectation: tablestore.RowExistenceExpectation_IGNORE, 279 }, 280 } 281 282 log.Printf("[DEBUG] Recoring state serial in tablestore: %#v", putParams) 283 284 _, err := c.otsClient.PutRow(&tablestore.PutRowRequest{ 285 PutRowChange: putParams, 286 }) 287 288 if err != nil { 289 log.Printf("[WARN] failed to record state serial in tablestore: %s", err) 290 } 291 292 return nil 293 } 294 295 // remove the hash value for a deleted state 296 func (c *RemoteClient) deleteMD5() error { 297 if c.otsTable == "" { 298 return nil 299 } 300 301 params := &tablestore.DeleteRowRequest{ 302 DeleteRowChange: &tablestore.DeleteRowChange{ 303 TableName: c.otsTable, 304 PrimaryKey: &tablestore.PrimaryKey{ 305 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 306 { 307 ColumnName: pkName, 308 Value: c.lockPath() + stateIDSuffix, 309 }, 310 }, 311 }, 312 Condition: &tablestore.RowCondition{ 313 RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_EXIST, 314 }, 315 }, 316 } 317 318 log.Printf("[DEBUG] Deleting state serial in tablestore: %#v", params) 319 320 if _, err := c.otsClient.DeleteRow(params); err != nil { 321 return err 322 } 323 324 return nil 325 } 326 327 func (c *RemoteClient) getLockInfo() (*statemgr.LockInfo, error) { 328 getParams := &tablestore.SingleRowQueryCriteria{ 329 TableName: c.otsTable, 330 PrimaryKey: &tablestore.PrimaryKey{ 331 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 332 { 333 ColumnName: pkName, 334 Value: c.lockPath(), 335 }, 336 }, 337 }, 338 ColumnsToGet: []string{pkName, "Info"}, 339 MaxVersion: 1, 340 } 341 342 log.Printf("[DEBUG] Retrieving state lock info from tablestore: %#v", getParams) 343 344 object, err := c.otsClient.GetRow(&tablestore.GetRowRequest{ 345 SingleRowQueryCriteria: getParams, 346 }) 347 if err != nil { 348 return nil, err 349 } 350 351 var infoData string 352 if v, ok := object.GetColumnMap().Columns["Info"]; ok && len(v) > 0 { 353 infoData = v[0].Value.(string) 354 } 355 lockInfo := &statemgr.LockInfo{} 356 err = json.Unmarshal([]byte(infoData), lockInfo) 357 if err != nil { 358 return nil, err 359 } 360 return lockInfo, nil 361 } 362 func (c *RemoteClient) Unlock(id string) error { 363 if c.otsTable == "" { 364 return nil 365 } 366 367 lockErr := &statemgr.LockError{} 368 369 lockInfo, err := c.getLockInfo() 370 if err != nil { 371 lockErr.Err = fmt.Errorf("failed to retrieve lock info: %s", err) 372 return lockErr 373 } 374 lockErr.Info = lockInfo 375 376 if lockInfo.ID != id { 377 lockErr.Err = fmt.Errorf("lock id %q does not match existing lock", id) 378 return lockErr 379 } 380 params := &tablestore.DeleteRowRequest{ 381 DeleteRowChange: &tablestore.DeleteRowChange{ 382 TableName: c.otsTable, 383 PrimaryKey: &tablestore.PrimaryKey{ 384 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 385 { 386 ColumnName: pkName, 387 Value: c.lockPath(), 388 }, 389 }, 390 }, 391 Condition: &tablestore.RowCondition{ 392 RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_EXIST, 393 }, 394 }, 395 } 396 397 log.Printf("[DEBUG] Deleting state lock from tablestore: %#v", params) 398 399 _, err = c.otsClient.DeleteRow(params) 400 401 if err != nil { 402 lockErr.Err = err 403 return lockErr 404 } 405 406 return nil 407 } 408 409 func (c *RemoteClient) lockPath() string { 410 return fmt.Sprintf("%s/%s", c.bucketName, c.stateFile) 411 } 412 413 func (c *RemoteClient) getObj() (*remote.Payload, error) { 414 bucket, err := c.ossClient.Bucket(c.bucketName) 415 if err != nil { 416 return nil, fmt.Errorf("Error getting bucket %s: %#v", c.bucketName, err) 417 } 418 419 if exist, err := bucket.IsObjectExist(c.stateFile); err != nil { 420 return nil, fmt.Errorf("Estimating object %s is exist got an error: %#v", c.stateFile, err) 421 } else if !exist { 422 return nil, nil 423 } 424 425 var options []oss.Option 426 output, err := bucket.GetObject(c.stateFile, options...) 427 if err != nil { 428 return nil, fmt.Errorf("Error getting object: %#v", err) 429 } 430 431 buf := bytes.NewBuffer(nil) 432 if _, err := io.Copy(buf, output); err != nil { 433 return nil, fmt.Errorf("Failed to read remote state: %s", err) 434 } 435 sum := md5.Sum(buf.Bytes()) 436 payload := &remote.Payload{ 437 Data: buf.Bytes(), 438 MD5: sum[:], 439 } 440 441 // If there was no data, then return nil 442 if len(payload.Data) == 0 { 443 return nil, nil 444 } 445 446 return payload, nil 447 } 448 449 const errBadChecksumFmt = `state data in OSS does not have the expected content. 450 451 This may be caused by unusually long delays in OSS processing a previous state 452 update. Please wait for a minute or two and try again. If this problem 453 persists, and neither OSS nor TableStore are experiencing an outage, you may need 454 to manually verify the remote state and update the Digest value stored in the 455 TableStore table to the following value: %x 456 `