github.com/opentofu/opentofu@v1.7.1/internal/backend/remote-state/oss/client.go (about) 1 // Copyright (c) The OpenTofu Authors 2 // SPDX-License-Identifier: MPL-2.0 3 // Copyright (c) 2023 HashiCorp, Inc. 4 // SPDX-License-Identifier: MPL-2.0 5 6 package oss 7 8 import ( 9 "bytes" 10 "crypto/md5" 11 "encoding/hex" 12 "encoding/json" 13 "fmt" 14 "io" 15 "log" 16 "time" 17 18 "github.com/aliyun/aliyun-oss-go-sdk/oss" 19 "github.com/aliyun/aliyun-tablestore-go-sdk/tablestore" 20 "github.com/hashicorp/go-multierror" 21 uuid "github.com/hashicorp/go-uuid" 22 "github.com/pkg/errors" 23 24 "github.com/opentofu/opentofu/internal/states/remote" 25 "github.com/opentofu/opentofu/internal/states/statemgr" 26 ) 27 28 const ( 29 // Store the last saved serial in tablestore with this suffix for consistency checks. 30 stateIDSuffix = "-md5" 31 32 pkName = "LockID" 33 ) 34 35 var ( 36 // The amount of time we will retry a state waiting for it to match the 37 // expected checksum. 38 consistencyRetryTimeout = 10 * time.Second 39 40 // delay when polling the state 41 consistencyRetryPollInterval = 2 * time.Second 42 ) 43 44 // test hook called when checksums don't match 45 var testChecksumHook func() 46 47 type RemoteClient struct { 48 ossClient *oss.Client 49 otsClient *tablestore.TableStoreClient 50 bucketName string 51 stateFile string 52 lockFile string 53 serverSideEncryption bool 54 acl string 55 otsTable string 56 } 57 58 func (c *RemoteClient) Get() (payload *remote.Payload, err error) { 59 deadline := time.Now().Add(consistencyRetryTimeout) 60 61 // If we have a checksum, and the returned payload doesn't match, we retry 62 // up until deadline. 63 for { 64 payload, err = c.getObj() 65 if err != nil { 66 return nil, err 67 } 68 69 // If the remote state was manually removed the payload will be nil, 70 // but if there's still a digest entry for that state we will still try 71 // to compare the MD5 below. 72 var digest []byte 73 if payload != nil { 74 digest = payload.MD5 75 } 76 77 // verify that this state is what we expect 78 if expected, err := c.getMD5(); err != nil { 79 log.Printf("[WARN] failed to fetch state md5: %s", err) 80 } else if len(expected) > 0 && !bytes.Equal(expected, digest) { 81 log.Printf("[WARN] state md5 mismatch: expected '%x', got '%x'", expected, digest) 82 83 if testChecksumHook != nil { 84 testChecksumHook() 85 } 86 87 if time.Now().Before(deadline) { 88 time.Sleep(consistencyRetryPollInterval) 89 log.Println("[INFO] retrying OSS RemoteClient.Get...") 90 continue 91 } 92 93 return nil, fmt.Errorf(errBadChecksumFmt, digest) 94 } 95 96 break 97 } 98 return payload, nil 99 } 100 101 func (c *RemoteClient) Put(data []byte) error { 102 bucket, err := c.ossClient.Bucket(c.bucketName) 103 if err != nil { 104 return fmt.Errorf("error getting bucket: %w", err) 105 } 106 107 body := bytes.NewReader(data) 108 109 var options []oss.Option 110 if c.acl != "" { 111 options = append(options, oss.ACL(oss.ACLType(c.acl))) 112 } 113 options = append(options, oss.ContentType("application/json")) 114 if c.serverSideEncryption { 115 options = append(options, oss.ServerSideEncryption("AES256")) 116 } 117 options = append(options, oss.ContentLength(int64(len(data)))) 118 119 if body != nil { 120 if err := bucket.PutObject(c.stateFile, body, options...); err != nil { 121 return fmt.Errorf("failed to upload state %s: %w", c.stateFile, err) 122 } 123 } 124 125 sum := md5.Sum(data) 126 if err := c.putMD5(sum[:]); err != nil { 127 // if this errors out, we unfortunately have to error out altogether, 128 // since the next Get will inevitably fail. 129 return fmt.Errorf("failed to store state MD5: %w", err) 130 } 131 return nil 132 } 133 134 func (c *RemoteClient) Delete() error { 135 bucket, err := c.ossClient.Bucket(c.bucketName) 136 if err != nil { 137 return fmt.Errorf("error getting bucket %s: %w", c.bucketName, err) 138 } 139 140 log.Printf("[DEBUG] Deleting remote state from OSS: %#v", c.stateFile) 141 142 if err := bucket.DeleteObject(c.stateFile); err != nil { 143 return fmt.Errorf("error deleting state %s: %w", c.stateFile, err) 144 } 145 146 if err := c.deleteMD5(); err != nil { 147 log.Printf("[WARN] Error deleting state MD5: %s", err) 148 } 149 return nil 150 } 151 152 func (c *RemoteClient) Lock(info *statemgr.LockInfo) (string, error) { 153 if c.otsTable == "" { 154 return "", nil 155 } 156 157 info.Path = c.lockPath() 158 159 if info.ID == "" { 160 lockID, err := uuid.GenerateUUID() 161 if err != nil { 162 return "", err 163 } 164 info.ID = lockID 165 } 166 167 putParams := &tablestore.PutRowChange{ 168 TableName: c.otsTable, 169 PrimaryKey: &tablestore.PrimaryKey{ 170 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 171 { 172 ColumnName: pkName, 173 Value: c.lockPath(), 174 }, 175 }, 176 }, 177 Columns: []tablestore.AttributeColumn{ 178 { 179 ColumnName: "Info", 180 Value: string(info.Marshal()), 181 }, 182 }, 183 Condition: &tablestore.RowCondition{ 184 RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_NOT_EXIST, 185 }, 186 } 187 188 log.Printf("[DEBUG] Recording state lock in tablestore: %#v; LOCKID:%s", putParams, c.lockPath()) 189 190 _, err := c.otsClient.PutRow(&tablestore.PutRowRequest{ 191 PutRowChange: putParams, 192 }) 193 if err != nil { 194 err = fmt.Errorf("invoking PutRow got an error: %w", err) 195 lockInfo, infoErr := c.getLockInfo() 196 if infoErr != nil { 197 err = multierror.Append(err, fmt.Errorf("\ngetting lock info got an error: %w", infoErr)) 198 } 199 lockErr := &statemgr.LockError{ 200 Err: err, 201 Info: lockInfo, 202 } 203 log.Printf("[ERROR] state lock error: %s", lockErr.Error()) 204 return "", lockErr 205 } 206 207 return info.ID, nil 208 } 209 210 func (c *RemoteClient) getMD5() ([]byte, error) { 211 if c.otsTable == "" { 212 return nil, nil 213 } 214 215 getParams := &tablestore.SingleRowQueryCriteria{ 216 TableName: c.otsTable, 217 PrimaryKey: &tablestore.PrimaryKey{ 218 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 219 { 220 ColumnName: pkName, 221 Value: c.lockPath() + stateIDSuffix, 222 }, 223 }, 224 }, 225 ColumnsToGet: []string{pkName, "Digest"}, 226 MaxVersion: 1, 227 } 228 229 log.Printf("[DEBUG] Retrieving state serial in tablestore: %#v", getParams) 230 231 object, err := c.otsClient.GetRow(&tablestore.GetRowRequest{ 232 SingleRowQueryCriteria: getParams, 233 }) 234 235 if err != nil { 236 return nil, err 237 } 238 239 var val string 240 if v, ok := object.GetColumnMap().Columns["Digest"]; ok && len(v) > 0 { 241 val = v[0].Value.(string) 242 } 243 244 sum, err := hex.DecodeString(val) 245 if err != nil || len(sum) != md5.Size { 246 return nil, errors.New("invalid md5") 247 } 248 249 return sum, nil 250 } 251 252 // store the hash of the state to that clients can check for stale state files. 253 func (c *RemoteClient) putMD5(sum []byte) error { 254 if c.otsTable == "" { 255 return nil 256 } 257 258 if len(sum) != md5.Size { 259 return errors.New("invalid payload md5") 260 } 261 262 putParams := &tablestore.PutRowChange{ 263 TableName: c.otsTable, 264 PrimaryKey: &tablestore.PrimaryKey{ 265 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 266 { 267 ColumnName: pkName, 268 Value: c.lockPath() + stateIDSuffix, 269 }, 270 }, 271 }, 272 Columns: []tablestore.AttributeColumn{ 273 { 274 ColumnName: "Digest", 275 Value: hex.EncodeToString(sum), 276 }, 277 }, 278 Condition: &tablestore.RowCondition{ 279 RowExistenceExpectation: tablestore.RowExistenceExpectation_IGNORE, 280 }, 281 } 282 283 log.Printf("[DEBUG] Recoring state serial in tablestore: %#v", putParams) 284 285 _, err := c.otsClient.PutRow(&tablestore.PutRowRequest{ 286 PutRowChange: putParams, 287 }) 288 289 if err != nil { 290 log.Printf("[WARN] failed to record state serial in tablestore: %s", err) 291 } 292 293 return nil 294 } 295 296 // remove the hash value for a deleted state 297 func (c *RemoteClient) deleteMD5() error { 298 if c.otsTable == "" { 299 return nil 300 } 301 302 params := &tablestore.DeleteRowRequest{ 303 DeleteRowChange: &tablestore.DeleteRowChange{ 304 TableName: c.otsTable, 305 PrimaryKey: &tablestore.PrimaryKey{ 306 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 307 { 308 ColumnName: pkName, 309 Value: c.lockPath() + stateIDSuffix, 310 }, 311 }, 312 }, 313 Condition: &tablestore.RowCondition{ 314 RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_EXIST, 315 }, 316 }, 317 } 318 319 log.Printf("[DEBUG] Deleting state serial in tablestore: %#v", params) 320 321 if _, err := c.otsClient.DeleteRow(params); err != nil { 322 return err 323 } 324 325 return nil 326 } 327 328 func (c *RemoteClient) getLockInfo() (*statemgr.LockInfo, error) { 329 getParams := &tablestore.SingleRowQueryCriteria{ 330 TableName: c.otsTable, 331 PrimaryKey: &tablestore.PrimaryKey{ 332 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 333 { 334 ColumnName: pkName, 335 Value: c.lockPath(), 336 }, 337 }, 338 }, 339 ColumnsToGet: []string{pkName, "Info"}, 340 MaxVersion: 1, 341 } 342 343 log.Printf("[DEBUG] Retrieving state lock info from tablestore: %#v", getParams) 344 345 object, err := c.otsClient.GetRow(&tablestore.GetRowRequest{ 346 SingleRowQueryCriteria: getParams, 347 }) 348 if err != nil { 349 return nil, err 350 } 351 352 var infoData string 353 if v, ok := object.GetColumnMap().Columns["Info"]; ok && len(v) > 0 { 354 infoData = v[0].Value.(string) 355 } 356 lockInfo := &statemgr.LockInfo{} 357 err = json.Unmarshal([]byte(infoData), lockInfo) 358 if err != nil { 359 return nil, err 360 } 361 return lockInfo, nil 362 } 363 func (c *RemoteClient) Unlock(id string) error { 364 if c.otsTable == "" { 365 return nil 366 } 367 368 lockErr := &statemgr.LockError{} 369 370 lockInfo, err := c.getLockInfo() 371 if err != nil { 372 lockErr.Err = fmt.Errorf("failed to retrieve lock info: %w", err) 373 return lockErr 374 } 375 lockErr.Info = lockInfo 376 377 if lockInfo.ID != id { 378 lockErr.Err = fmt.Errorf("lock id %q does not match existing lock", id) 379 return lockErr 380 } 381 params := &tablestore.DeleteRowRequest{ 382 DeleteRowChange: &tablestore.DeleteRowChange{ 383 TableName: c.otsTable, 384 PrimaryKey: &tablestore.PrimaryKey{ 385 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 386 { 387 ColumnName: pkName, 388 Value: c.lockPath(), 389 }, 390 }, 391 }, 392 Condition: &tablestore.RowCondition{ 393 RowExistenceExpectation: tablestore.RowExistenceExpectation_IGNORE, 394 }, 395 }, 396 } 397 398 _, err = c.otsClient.DeleteRow(params) 399 400 if err != nil { 401 lockErr.Err = err 402 return lockErr 403 } 404 405 return nil 406 } 407 408 func (c *RemoteClient) lockPath() string { 409 return fmt.Sprintf("%s/%s", c.bucketName, c.stateFile) 410 } 411 412 func (c *RemoteClient) getObj() (*remote.Payload, error) { 413 bucket, err := c.ossClient.Bucket(c.bucketName) 414 if err != nil { 415 return nil, fmt.Errorf("error getting bucket %s: %w", c.bucketName, err) 416 } 417 418 if exist, err := bucket.IsObjectExist(c.stateFile); err != nil { 419 return nil, fmt.Errorf("estimating object %s is exist got an error: %w", c.stateFile, err) 420 } else if !exist { 421 return nil, nil 422 } 423 424 var options []oss.Option 425 output, err := bucket.GetObject(c.stateFile, options...) 426 if err != nil { 427 return nil, fmt.Errorf("error getting object: %w", err) 428 } 429 430 buf := bytes.NewBuffer(nil) 431 if _, err := io.Copy(buf, output); err != nil { 432 return nil, fmt.Errorf("failed to read remote state: %w", err) 433 } 434 sum := md5.Sum(buf.Bytes()) 435 payload := &remote.Payload{ 436 Data: buf.Bytes(), 437 MD5: sum[:], 438 } 439 440 // If there was no data, then return nil 441 if len(payload.Data) == 0 { 442 return nil, nil 443 } 444 445 return payload, nil 446 } 447 448 const errBadChecksumFmt = `state data in OSS does not have the expected content. 449 450 This may be caused by unusually long delays in OSS processing a previous state 451 update. Please wait for a minute or two and try again. If this problem 452 persists, and neither OSS nor TableStore are experiencing an outage, you may need 453 to manually verify the remote state and update the Digest value stored in the 454 TableStore table to the following value: %x`