github.com/hugorut/terraform@v1.1.3/src/backend/remote-state/oss/client.go (about) 1 package oss 2 3 import ( 4 "bytes" 5 "crypto/md5" 6 "encoding/hex" 7 "encoding/json" 8 "fmt" 9 "io" 10 "log" 11 "time" 12 13 "github.com/aliyun/aliyun-oss-go-sdk/oss" 14 "github.com/aliyun/aliyun-tablestore-go-sdk/tablestore" 15 "github.com/hashicorp/go-multierror" 16 uuid "github.com/hashicorp/go-uuid" 17 "github.com/pkg/errors" 18 19 "github.com/hugorut/terraform/src/states/remote" 20 "github.com/hugorut/terraform/src/states/statemgr" 21 ) 22 23 const ( 24 // Store the last saved serial in tablestore with this suffix for consistency checks. 25 stateIDSuffix = "-md5" 26 27 pkName = "LockID" 28 ) 29 30 var ( 31 // The amount of time we will retry a state waiting for it to match the 32 // expected checksum. 33 consistencyRetryTimeout = 10 * time.Second 34 35 // delay when polling the state 36 consistencyRetryPollInterval = 2 * time.Second 37 ) 38 39 // test hook called when checksums don't match 40 var testChecksumHook func() 41 42 type RemoteClient struct { 43 ossClient *oss.Client 44 otsClient *tablestore.TableStoreClient 45 bucketName string 46 stateFile string 47 lockFile string 48 serverSideEncryption bool 49 acl string 50 otsTable string 51 } 52 53 func (c *RemoteClient) Get() (payload *remote.Payload, err error) { 54 deadline := time.Now().Add(consistencyRetryTimeout) 55 56 // If we have a checksum, and the returned payload doesn't match, we retry 57 // up until deadline. 58 for { 59 payload, err = c.getObj() 60 if err != nil { 61 return nil, err 62 } 63 64 // If the remote state was manually removed the payload will be nil, 65 // but if there's still a digest entry for that state we will still try 66 // to compare the MD5 below. 67 var digest []byte 68 if payload != nil { 69 digest = payload.MD5 70 } 71 72 // verify that this state is what we expect 73 if expected, err := c.getMD5(); err != nil { 74 log.Printf("[WARN] failed to fetch state md5: %s", err) 75 } else if len(expected) > 0 && !bytes.Equal(expected, digest) { 76 log.Printf("[WARN] state md5 mismatch: expected '%x', got '%x'", expected, digest) 77 78 if testChecksumHook != nil { 79 testChecksumHook() 80 } 81 82 if time.Now().Before(deadline) { 83 time.Sleep(consistencyRetryPollInterval) 84 log.Println("[INFO] retrying OSS RemoteClient.Get...") 85 continue 86 } 87 88 return nil, fmt.Errorf(errBadChecksumFmt, digest) 89 } 90 91 break 92 } 93 return payload, nil 94 } 95 96 func (c *RemoteClient) Put(data []byte) error { 97 bucket, err := c.ossClient.Bucket(c.bucketName) 98 if err != nil { 99 return fmt.Errorf("error getting bucket: %#v", err) 100 } 101 102 body := bytes.NewReader(data) 103 104 var options []oss.Option 105 if c.acl != "" { 106 options = append(options, oss.ACL(oss.ACLType(c.acl))) 107 } 108 options = append(options, oss.ContentType("application/json")) 109 if c.serverSideEncryption { 110 options = append(options, oss.ServerSideEncryption("AES256")) 111 } 112 options = append(options, oss.ContentLength(int64(len(data)))) 113 114 if body != nil { 115 if err := bucket.PutObject(c.stateFile, body, options...); err != nil { 116 return fmt.Errorf("failed to upload state %s: %#v", c.stateFile, err) 117 } 118 } 119 120 sum := md5.Sum(data) 121 if err := c.putMD5(sum[:]); err != nil { 122 // if this errors out, we unfortunately have to error out altogether, 123 // since the next Get will inevitably fail. 124 return fmt.Errorf("failed to store state MD5: %s", err) 125 } 126 return nil 127 } 128 129 func (c *RemoteClient) Delete() error { 130 bucket, err := c.ossClient.Bucket(c.bucketName) 131 if err != nil { 132 return fmt.Errorf("error getting bucket %s: %#v", c.bucketName, err) 133 } 134 135 log.Printf("[DEBUG] Deleting remote state from OSS: %#v", c.stateFile) 136 137 if err := bucket.DeleteObject(c.stateFile); err != nil { 138 return fmt.Errorf("error deleting state %s: %#v", c.stateFile, err) 139 } 140 141 if err := c.deleteMD5(); err != nil { 142 log.Printf("[WARN] Error deleting state MD5: %s", err) 143 } 144 return nil 145 } 146 147 func (c *RemoteClient) Lock(info *statemgr.LockInfo) (string, error) { 148 if c.otsTable == "" { 149 return "", nil 150 } 151 152 info.Path = c.lockPath() 153 154 if info.ID == "" { 155 lockID, err := uuid.GenerateUUID() 156 if err != nil { 157 return "", err 158 } 159 info.ID = lockID 160 } 161 162 putParams := &tablestore.PutRowChange{ 163 TableName: c.otsTable, 164 PrimaryKey: &tablestore.PrimaryKey{ 165 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 166 { 167 ColumnName: pkName, 168 Value: c.lockPath(), 169 }, 170 }, 171 }, 172 Columns: []tablestore.AttributeColumn{ 173 { 174 ColumnName: "Info", 175 Value: string(info.Marshal()), 176 }, 177 }, 178 Condition: &tablestore.RowCondition{ 179 RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_NOT_EXIST, 180 }, 181 } 182 183 log.Printf("[DEBUG] Recording state lock in tablestore: %#v", putParams) 184 185 _, err := c.otsClient.PutRow(&tablestore.PutRowRequest{ 186 PutRowChange: putParams, 187 }) 188 if err != nil { 189 log.Printf("[WARN] Error storing state lock in tablestore: %#v", err) 190 lockInfo, infoErr := c.getLockInfo() 191 if infoErr != nil { 192 log.Printf("[WARN] Error getting lock info: %#v", err) 193 err = multierror.Append(err, infoErr) 194 } 195 lockErr := &statemgr.LockError{ 196 Err: err, 197 Info: lockInfo, 198 } 199 log.Printf("[WARN] state lock error: %#v", lockErr) 200 return "", lockErr 201 } 202 203 return info.ID, nil 204 } 205 206 func (c *RemoteClient) getMD5() ([]byte, error) { 207 if c.otsTable == "" { 208 return nil, nil 209 } 210 211 getParams := &tablestore.SingleRowQueryCriteria{ 212 TableName: c.otsTable, 213 PrimaryKey: &tablestore.PrimaryKey{ 214 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 215 { 216 ColumnName: pkName, 217 Value: c.lockPath() + stateIDSuffix, 218 }, 219 }, 220 }, 221 ColumnsToGet: []string{pkName, "Digest"}, 222 MaxVersion: 1, 223 } 224 225 log.Printf("[DEBUG] Retrieving state serial in tablestore: %#v", getParams) 226 227 object, err := c.otsClient.GetRow(&tablestore.GetRowRequest{ 228 SingleRowQueryCriteria: getParams, 229 }) 230 231 if err != nil { 232 return nil, err 233 } 234 235 var val string 236 if v, ok := object.GetColumnMap().Columns["Digest"]; ok && len(v) > 0 { 237 val = v[0].Value.(string) 238 } 239 240 sum, err := hex.DecodeString(val) 241 if err != nil || len(sum) != md5.Size { 242 return nil, errors.New("invalid md5") 243 } 244 245 return sum, nil 246 } 247 248 // store the hash of the state to that clients can check for stale state files. 249 func (c *RemoteClient) putMD5(sum []byte) error { 250 if c.otsTable == "" { 251 return nil 252 } 253 254 if len(sum) != md5.Size { 255 return errors.New("invalid payload md5") 256 } 257 258 putParams := &tablestore.PutRowChange{ 259 TableName: c.otsTable, 260 PrimaryKey: &tablestore.PrimaryKey{ 261 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 262 { 263 ColumnName: pkName, 264 Value: c.lockPath() + stateIDSuffix, 265 }, 266 }, 267 }, 268 Columns: []tablestore.AttributeColumn{ 269 { 270 ColumnName: "Digest", 271 Value: hex.EncodeToString(sum), 272 }, 273 }, 274 Condition: &tablestore.RowCondition{ 275 RowExistenceExpectation: tablestore.RowExistenceExpectation_IGNORE, 276 }, 277 } 278 279 log.Printf("[DEBUG] Recoring state serial in tablestore: %#v", putParams) 280 281 _, err := c.otsClient.PutRow(&tablestore.PutRowRequest{ 282 PutRowChange: putParams, 283 }) 284 285 if err != nil { 286 log.Printf("[WARN] failed to record state serial in tablestore: %s", err) 287 } 288 289 return nil 290 } 291 292 // remove the hash value for a deleted state 293 func (c *RemoteClient) deleteMD5() error { 294 if c.otsTable == "" { 295 return nil 296 } 297 298 params := &tablestore.DeleteRowRequest{ 299 DeleteRowChange: &tablestore.DeleteRowChange{ 300 TableName: c.otsTable, 301 PrimaryKey: &tablestore.PrimaryKey{ 302 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 303 { 304 ColumnName: pkName, 305 Value: c.lockPath() + stateIDSuffix, 306 }, 307 }, 308 }, 309 Condition: &tablestore.RowCondition{ 310 RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_EXIST, 311 }, 312 }, 313 } 314 315 log.Printf("[DEBUG] Deleting state serial in tablestore: %#v", params) 316 317 if _, err := c.otsClient.DeleteRow(params); err != nil { 318 return err 319 } 320 321 return nil 322 } 323 324 func (c *RemoteClient) getLockInfo() (*statemgr.LockInfo, error) { 325 getParams := &tablestore.SingleRowQueryCriteria{ 326 TableName: c.otsTable, 327 PrimaryKey: &tablestore.PrimaryKey{ 328 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 329 { 330 ColumnName: pkName, 331 Value: c.lockPath(), 332 }, 333 }, 334 }, 335 ColumnsToGet: []string{pkName, "Info"}, 336 MaxVersion: 1, 337 } 338 339 log.Printf("[DEBUG] Retrieving state lock info from tablestore: %#v", getParams) 340 341 object, err := c.otsClient.GetRow(&tablestore.GetRowRequest{ 342 SingleRowQueryCriteria: getParams, 343 }) 344 if err != nil { 345 return nil, err 346 } 347 348 var infoData string 349 if v, ok := object.GetColumnMap().Columns["Info"]; ok && len(v) > 0 { 350 infoData = v[0].Value.(string) 351 } 352 lockInfo := &statemgr.LockInfo{} 353 err = json.Unmarshal([]byte(infoData), lockInfo) 354 if err != nil { 355 return nil, err 356 } 357 return lockInfo, nil 358 } 359 func (c *RemoteClient) Unlock(id string) error { 360 if c.otsTable == "" { 361 return nil 362 } 363 364 lockErr := &statemgr.LockError{} 365 366 lockInfo, err := c.getLockInfo() 367 if err != nil { 368 lockErr.Err = fmt.Errorf("failed to retrieve lock info: %s", err) 369 return lockErr 370 } 371 lockErr.Info = lockInfo 372 373 if lockInfo.ID != id { 374 lockErr.Err = fmt.Errorf("lock id %q does not match existing lock", id) 375 return lockErr 376 } 377 params := &tablestore.DeleteRowRequest{ 378 DeleteRowChange: &tablestore.DeleteRowChange{ 379 TableName: c.otsTable, 380 PrimaryKey: &tablestore.PrimaryKey{ 381 PrimaryKeys: []*tablestore.PrimaryKeyColumn{ 382 { 383 ColumnName: pkName, 384 Value: c.lockPath(), 385 }, 386 }, 387 }, 388 Condition: &tablestore.RowCondition{ 389 RowExistenceExpectation: tablestore.RowExistenceExpectation_EXPECT_EXIST, 390 }, 391 }, 392 } 393 394 log.Printf("[DEBUG] Deleting state lock from tablestore: %#v", params) 395 396 _, err = c.otsClient.DeleteRow(params) 397 398 if err != nil { 399 lockErr.Err = err 400 return lockErr 401 } 402 403 return nil 404 } 405 406 func (c *RemoteClient) lockPath() string { 407 return fmt.Sprintf("%s/%s", c.bucketName, c.stateFile) 408 } 409 410 func (c *RemoteClient) getObj() (*remote.Payload, error) { 411 bucket, err := c.ossClient.Bucket(c.bucketName) 412 if err != nil { 413 return nil, fmt.Errorf("error getting bucket %s: %#v", c.bucketName, err) 414 } 415 416 if exist, err := bucket.IsObjectExist(c.stateFile); err != nil { 417 return nil, fmt.Errorf("estimating object %s is exist got an error: %#v", c.stateFile, err) 418 } else if !exist { 419 return nil, nil 420 } 421 422 var options []oss.Option 423 output, err := bucket.GetObject(c.stateFile, options...) 424 if err != nil { 425 return nil, fmt.Errorf("error getting object: %#v", err) 426 } 427 428 buf := bytes.NewBuffer(nil) 429 if _, err := io.Copy(buf, output); err != nil { 430 return nil, fmt.Errorf("failed to read remote state: %s", err) 431 } 432 sum := md5.Sum(buf.Bytes()) 433 payload := &remote.Payload{ 434 Data: buf.Bytes(), 435 MD5: sum[:], 436 } 437 438 // If there was no data, then return nil 439 if len(payload.Data) == 0 { 440 return nil, nil 441 } 442 443 return payload, nil 444 } 445 446 const errBadChecksumFmt = `state data in OSS does not have the expected content. 447 448 This may be caused by unusually long delays in OSS processing a previous state 449 update. Please wait for a minute or two and try again. If this problem 450 persists, and neither OSS nor TableStore are experiencing an outage, you may need 451 to manually verify the remote state and update the Digest value stored in the 452 TableStore table to the following value: %x`