github.com/mingfang/terraform@v0.11.12-beta1/backend/remote-state/s3/client.go (about) 1 package s3 2 3 import ( 4 "bytes" 5 "crypto/md5" 6 "encoding/hex" 7 "encoding/json" 8 "errors" 9 "fmt" 10 "io" 11 "log" 12 "time" 13 14 "github.com/aws/aws-sdk-go/aws" 15 "github.com/aws/aws-sdk-go/aws/awserr" 16 "github.com/aws/aws-sdk-go/service/dynamodb" 17 "github.com/aws/aws-sdk-go/service/s3" 18 multierror "github.com/hashicorp/go-multierror" 19 uuid "github.com/hashicorp/go-uuid" 20 "github.com/hashicorp/terraform/state" 21 "github.com/hashicorp/terraform/state/remote" 22 ) 23 24 // Store the last saved serial in dynamo with this suffix for consistency checks. 25 const ( 26 stateIDSuffix = "-md5" 27 s3ErrCodeInternalError = "InternalError" 28 ) 29 30 type RemoteClient struct { 31 s3Client *s3.S3 32 dynClient *dynamodb.DynamoDB 33 bucketName string 34 path string 35 serverSideEncryption bool 36 acl string 37 kmsKeyID string 38 ddbTable string 39 } 40 41 var ( 42 // The amount of time we will retry a state waiting for it to match the 43 // expected checksum. 44 consistencyRetryTimeout = 10 * time.Second 45 46 // delay when polling the state 47 consistencyRetryPollInterval = 2 * time.Second 48 ) 49 50 // test hook called when checksums don't match 51 var testChecksumHook func() 52 53 func (c *RemoteClient) Get() (payload *remote.Payload, err error) { 54 deadline := time.Now().Add(consistencyRetryTimeout) 55 56 // If we have a checksum, and the returned payload doesn't match, we retry 57 // up until deadline. 58 for { 59 payload, err = c.get() 60 if err != nil { 61 return nil, err 62 } 63 64 // If the remote state was manually removed the payload will be nil, 65 // but if there's still a digest entry for that state we will still try 66 // to compare the MD5 below. 67 var digest []byte 68 if payload != nil { 69 digest = payload.MD5 70 } 71 72 // verify that this state is what we expect 73 if expected, err := c.getMD5(); err != nil { 74 log.Printf("[WARN] failed to fetch state md5: %s", err) 75 } else if len(expected) > 0 && !bytes.Equal(expected, digest) { 76 log.Printf("[WARN] state md5 mismatch: expected '%x', got '%x'", expected, digest) 77 78 if testChecksumHook != nil { 79 testChecksumHook() 80 } 81 82 if time.Now().Before(deadline) { 83 time.Sleep(consistencyRetryPollInterval) 84 log.Println("[INFO] retrying S3 RemoteClient.Get...") 85 continue 86 } 87 88 return nil, fmt.Errorf(errBadChecksumFmt, digest) 89 } 90 91 break 92 } 93 94 return payload, err 95 } 96 97 func (c *RemoteClient) get() (*remote.Payload, error) { 98 var output *s3.GetObjectOutput 99 var err error 100 101 // we immediately retry on an internal error, as those are usually transient 102 maxRetries := 2 103 for retryCount := 0; ; retryCount++ { 104 output, err = c.s3Client.GetObject(&s3.GetObjectInput{ 105 Bucket: &c.bucketName, 106 Key: &c.path, 107 }) 108 109 if err != nil { 110 if awserr, ok := err.(awserr.Error); ok { 111 switch awserr.Code() { 112 case s3.ErrCodeNoSuchKey: 113 return nil, nil 114 case s3ErrCodeInternalError: 115 if retryCount > maxRetries { 116 return nil, err 117 } 118 log.Println("[WARN] s3 internal error, retrying...") 119 continue 120 } 121 } 122 return nil, err 123 } 124 break 125 } 126 127 defer output.Body.Close() 128 129 buf := bytes.NewBuffer(nil) 130 if _, err := io.Copy(buf, output.Body); err != nil { 131 return nil, fmt.Errorf("Failed to read remote state: %s", err) 132 } 133 134 sum := md5.Sum(buf.Bytes()) 135 payload := &remote.Payload{ 136 Data: buf.Bytes(), 137 MD5: sum[:], 138 } 139 140 // If there was no data, then return nil 141 if len(payload.Data) == 0 { 142 return nil, nil 143 } 144 145 return payload, nil 146 } 147 148 func (c *RemoteClient) Put(data []byte) error { 149 contentType := "application/json" 150 contentLength := int64(len(data)) 151 152 // we immediately retry on an internal error, as those are usually transient 153 maxRetries := 2 154 for retryCount := 0; ; retryCount++ { 155 i := &s3.PutObjectInput{ 156 ContentType: &contentType, 157 ContentLength: &contentLength, 158 Body: bytes.NewReader(data), 159 Bucket: &c.bucketName, 160 Key: &c.path, 161 } 162 163 if c.serverSideEncryption { 164 if c.kmsKeyID != "" { 165 i.SSEKMSKeyId = &c.kmsKeyID 166 i.ServerSideEncryption = aws.String("aws:kms") 167 } else { 168 i.ServerSideEncryption = aws.String("AES256") 169 } 170 } 171 172 if c.acl != "" { 173 i.ACL = aws.String(c.acl) 174 } 175 176 log.Printf("[DEBUG] Uploading remote state to S3: %#v", i) 177 178 _, err := c.s3Client.PutObject(i) 179 if err != nil { 180 if awserr, ok := err.(awserr.Error); ok { 181 if awserr.Code() == s3ErrCodeInternalError { 182 if retryCount > maxRetries { 183 return fmt.Errorf("failed to upload state: %s", err) 184 } 185 log.Println("[WARN] s3 internal error, retrying...") 186 continue 187 } 188 } 189 return fmt.Errorf("failed to upload state: %s", err) 190 } 191 break 192 } 193 194 sum := md5.Sum(data) 195 if err := c.putMD5(sum[:]); err != nil { 196 // if this errors out, we unfortunately have to error out altogether, 197 // since the next Get will inevitably fail. 198 return fmt.Errorf("failed to store state MD5: %s", err) 199 200 } 201 202 return nil 203 } 204 205 func (c *RemoteClient) Delete() error { 206 _, err := c.s3Client.DeleteObject(&s3.DeleteObjectInput{ 207 Bucket: &c.bucketName, 208 Key: &c.path, 209 }) 210 211 if err != nil { 212 return err 213 } 214 215 if err := c.deleteMD5(); err != nil { 216 log.Printf("error deleting state md5: %s", err) 217 } 218 219 return nil 220 } 221 222 func (c *RemoteClient) Lock(info *state.LockInfo) (string, error) { 223 if c.ddbTable == "" { 224 return "", nil 225 } 226 227 info.Path = c.lockPath() 228 229 if info.ID == "" { 230 lockID, err := uuid.GenerateUUID() 231 if err != nil { 232 return "", err 233 } 234 235 info.ID = lockID 236 } 237 238 putParams := &dynamodb.PutItemInput{ 239 Item: map[string]*dynamodb.AttributeValue{ 240 "LockID": {S: aws.String(c.lockPath())}, 241 "Info": {S: aws.String(string(info.Marshal()))}, 242 }, 243 TableName: aws.String(c.ddbTable), 244 ConditionExpression: aws.String("attribute_not_exists(LockID)"), 245 } 246 _, err := c.dynClient.PutItem(putParams) 247 248 if err != nil { 249 lockInfo, infoErr := c.getLockInfo() 250 if infoErr != nil { 251 err = multierror.Append(err, infoErr) 252 } 253 254 lockErr := &state.LockError{ 255 Err: err, 256 Info: lockInfo, 257 } 258 return "", lockErr 259 } 260 261 return info.ID, nil 262 } 263 264 func (c *RemoteClient) getMD5() ([]byte, error) { 265 if c.ddbTable == "" { 266 return nil, nil 267 } 268 269 getParams := &dynamodb.GetItemInput{ 270 Key: map[string]*dynamodb.AttributeValue{ 271 "LockID": {S: aws.String(c.lockPath() + stateIDSuffix)}, 272 }, 273 ProjectionExpression: aws.String("LockID, Digest"), 274 TableName: aws.String(c.ddbTable), 275 ConsistentRead: aws.Bool(true), 276 } 277 278 resp, err := c.dynClient.GetItem(getParams) 279 if err != nil { 280 return nil, err 281 } 282 283 var val string 284 if v, ok := resp.Item["Digest"]; ok && v.S != nil { 285 val = *v.S 286 } 287 288 sum, err := hex.DecodeString(val) 289 if err != nil || len(sum) != md5.Size { 290 return nil, errors.New("invalid md5") 291 } 292 293 return sum, nil 294 } 295 296 // store the hash of the state to that clients can check for stale state files. 297 func (c *RemoteClient) putMD5(sum []byte) error { 298 if c.ddbTable == "" { 299 return nil 300 } 301 302 if len(sum) != md5.Size { 303 return errors.New("invalid payload md5") 304 } 305 306 putParams := &dynamodb.PutItemInput{ 307 Item: map[string]*dynamodb.AttributeValue{ 308 "LockID": {S: aws.String(c.lockPath() + stateIDSuffix)}, 309 "Digest": {S: aws.String(hex.EncodeToString(sum))}, 310 }, 311 TableName: aws.String(c.ddbTable), 312 } 313 _, err := c.dynClient.PutItem(putParams) 314 if err != nil { 315 log.Printf("[WARN] failed to record state serial in dynamodb: %s", err) 316 } 317 318 return nil 319 } 320 321 // remove the hash value for a deleted state 322 func (c *RemoteClient) deleteMD5() error { 323 if c.ddbTable == "" { 324 return nil 325 } 326 327 params := &dynamodb.DeleteItemInput{ 328 Key: map[string]*dynamodb.AttributeValue{ 329 "LockID": {S: aws.String(c.lockPath() + stateIDSuffix)}, 330 }, 331 TableName: aws.String(c.ddbTable), 332 } 333 if _, err := c.dynClient.DeleteItem(params); err != nil { 334 return err 335 } 336 return nil 337 } 338 339 func (c *RemoteClient) getLockInfo() (*state.LockInfo, error) { 340 getParams := &dynamodb.GetItemInput{ 341 Key: map[string]*dynamodb.AttributeValue{ 342 "LockID": {S: aws.String(c.lockPath())}, 343 }, 344 ProjectionExpression: aws.String("LockID, Info"), 345 TableName: aws.String(c.ddbTable), 346 ConsistentRead: aws.Bool(true), 347 } 348 349 resp, err := c.dynClient.GetItem(getParams) 350 if err != nil { 351 return nil, err 352 } 353 354 var infoData string 355 if v, ok := resp.Item["Info"]; ok && v.S != nil { 356 infoData = *v.S 357 } 358 359 lockInfo := &state.LockInfo{} 360 err = json.Unmarshal([]byte(infoData), lockInfo) 361 if err != nil { 362 return nil, err 363 } 364 365 return lockInfo, nil 366 } 367 368 func (c *RemoteClient) Unlock(id string) error { 369 if c.ddbTable == "" { 370 return nil 371 } 372 373 lockErr := &state.LockError{} 374 375 // TODO: store the path and lock ID in separate fields, and have proper 376 // projection expression only delete the lock if both match, rather than 377 // checking the ID from the info field first. 378 lockInfo, err := c.getLockInfo() 379 if err != nil { 380 lockErr.Err = fmt.Errorf("failed to retrieve lock info: %s", err) 381 return lockErr 382 } 383 lockErr.Info = lockInfo 384 385 if lockInfo.ID != id { 386 lockErr.Err = fmt.Errorf("lock id %q does not match existing lock", id) 387 return lockErr 388 } 389 390 params := &dynamodb.DeleteItemInput{ 391 Key: map[string]*dynamodb.AttributeValue{ 392 "LockID": {S: aws.String(c.lockPath())}, 393 }, 394 TableName: aws.String(c.ddbTable), 395 } 396 _, err = c.dynClient.DeleteItem(params) 397 398 if err != nil { 399 lockErr.Err = err 400 return lockErr 401 } 402 return nil 403 } 404 405 func (c *RemoteClient) lockPath() string { 406 return fmt.Sprintf("%s/%s", c.bucketName, c.path) 407 } 408 409 const errBadChecksumFmt = `state data in S3 does not have the expected content. 410 411 This may be caused by unusually long delays in S3 processing a previous state 412 update. Please wait for a minute or two and try again. If this problem 413 persists, and neither S3 nor DynamoDB are experiencing an outage, you may need 414 to manually verify the remote state and update the Digest value stored in the 415 DynamoDB table to the following value: %x 416 `