github.com/paybyphone/terraform@v0.9.5-0.20170613192930-9706042ddd51/backend/remote-state/consul/client.go (about) 1 package consul 2 3 import ( 4 "bytes" 5 "compress/gzip" 6 "crypto/md5" 7 "encoding/json" 8 "errors" 9 "fmt" 10 "log" 11 "sync" 12 "time" 13 14 consulapi "github.com/hashicorp/consul/api" 15 multierror "github.com/hashicorp/go-multierror" 16 "github.com/hashicorp/terraform/state" 17 "github.com/hashicorp/terraform/state/remote" 18 ) 19 20 const ( 21 lockSuffix = "/.lock" 22 lockInfoSuffix = "/.lockinfo" 23 ) 24 25 // RemoteClient is a remote client that stores data in Consul. 26 type RemoteClient struct { 27 Client *consulapi.Client 28 Path string 29 GZip bool 30 31 mu sync.Mutex 32 // lockState is true if we're using locks 33 lockState bool 34 35 // The index of the last state we wrote. 36 // If this is > 0, Put will perform a CAS to ensure that the state wasn't 37 // changed during the operation. This is important even with locks, because 38 // if the client loses the lock for some reason, then reacquires it, we 39 // need to make sure that the state was not modified. 40 modifyIndex uint64 41 42 consulLock *consulapi.Lock 43 lockCh <-chan struct{} 44 45 info *state.LockInfo 46 47 // cancel the goroutine which is monitoring the lock. 48 monitorCancel chan struct{} 49 monitorDone chan struct{} 50 } 51 52 func (c *RemoteClient) Get() (*remote.Payload, error) { 53 c.mu.Lock() 54 defer c.mu.Unlock() 55 56 pair, _, err := c.Client.KV().Get(c.Path, nil) 57 if err != nil { 58 return nil, err 59 } 60 if pair == nil { 61 return nil, nil 62 } 63 64 c.modifyIndex = pair.ModifyIndex 65 66 payload := pair.Value 67 // If the payload starts with 0x1f, it's gzip, not json 68 if len(pair.Value) >= 1 && pair.Value[0] == '\x1f' { 69 if data, err := uncompressState(pair.Value); err == nil { 70 payload = data 71 } else { 72 return nil, err 73 } 74 } 75 76 md5 := md5.Sum(pair.Value) 77 return &remote.Payload{ 78 Data: payload, 79 MD5: md5[:], 80 }, nil 81 } 82 83 func (c *RemoteClient) Put(data []byte) error { 84 c.mu.Lock() 85 defer c.mu.Unlock() 86 87 payload := data 88 if c.GZip { 89 if compressedState, err := compressState(data); err == nil { 90 payload = compressedState 91 } else { 92 return err 93 } 94 } 95 96 kv := c.Client.KV() 97 98 // default to doing a CAS 99 verb := consulapi.KVCAS 100 101 // Assume a 0 index doesn't need a CAS for now, since we are either 102 // creating a new state or purposely overwriting one. 103 if c.modifyIndex == 0 { 104 verb = consulapi.KVSet 105 } 106 107 // KV.Put doesn't return the new index, so we use a single operation 108 // transaction to get the new index with a single request. 109 txOps := consulapi.KVTxnOps{ 110 &consulapi.KVTxnOp{ 111 Verb: verb, 112 Key: c.Path, 113 Value: payload, 114 Index: c.modifyIndex, 115 }, 116 } 117 118 ok, resp, _, err := kv.Txn(txOps, nil) 119 if err != nil { 120 return err 121 } 122 123 // transaction was rolled back 124 if !ok { 125 return fmt.Errorf("consul CAS failed with transaction errors: %v", resp.Errors) 126 } 127 128 if len(resp.Results) != 1 { 129 // this probably shouldn't happen 130 return fmt.Errorf("expected on 1 response value, got: %d", len(resp.Results)) 131 } 132 133 c.modifyIndex = resp.Results[0].ModifyIndex 134 return nil 135 } 136 137 func (c *RemoteClient) Delete() error { 138 c.mu.Lock() 139 defer c.mu.Unlock() 140 141 kv := c.Client.KV() 142 _, err := kv.Delete(c.Path, nil) 143 return err 144 } 145 146 func (c *RemoteClient) putLockInfo(info *state.LockInfo) error { 147 info.Path = c.Path 148 info.Created = time.Now().UTC() 149 150 kv := c.Client.KV() 151 _, err := kv.Put(&consulapi.KVPair{ 152 Key: c.Path + lockInfoSuffix, 153 Value: info.Marshal(), 154 }, nil) 155 156 return err 157 } 158 159 func (c *RemoteClient) getLockInfo() (*state.LockInfo, error) { 160 path := c.Path + lockInfoSuffix 161 pair, _, err := c.Client.KV().Get(path, nil) 162 if err != nil { 163 return nil, err 164 } 165 if pair == nil { 166 return nil, nil 167 } 168 169 li := &state.LockInfo{} 170 err = json.Unmarshal(pair.Value, li) 171 if err != nil { 172 return nil, fmt.Errorf("error unmarshaling lock info: %s", err) 173 } 174 175 return li, nil 176 } 177 178 func (c *RemoteClient) Lock(info *state.LockInfo) (string, error) { 179 c.mu.Lock() 180 defer c.mu.Unlock() 181 182 if !c.lockState { 183 return "", nil 184 } 185 186 c.info = info 187 188 // These checks only are to ensure we strictly follow the specification. 189 // Terraform shouldn't ever re-lock, so provide errors for the 2 possible 190 // states if this is called. 191 select { 192 case <-c.lockCh: 193 // We had a lock, but lost it. 194 return "", errors.New("lost consul lock, cannot re-lock") 195 default: 196 if c.lockCh != nil { 197 // we have an active lock already 198 return "", fmt.Errorf("state %q already locked", c.Path) 199 } 200 } 201 202 return c.lock() 203 } 204 205 // called after a lock is acquired 206 var testLockHook func() 207 208 func (c *RemoteClient) lock() (string, error) { 209 if c.consulLock == nil { 210 opts := &consulapi.LockOptions{ 211 Key: c.Path + lockSuffix, 212 // only wait briefly, so terraform has the choice to fail fast or 213 // retry as needed. 214 LockWaitTime: time.Second, 215 LockTryOnce: true, 216 } 217 218 lock, err := c.Client.LockOpts(opts) 219 if err != nil { 220 return "", err 221 } 222 223 c.consulLock = lock 224 } 225 226 lockErr := &state.LockError{} 227 228 lockCh, err := c.consulLock.Lock(make(chan struct{})) 229 if err != nil { 230 lockErr.Err = err 231 return "", lockErr 232 } 233 234 if lockCh == nil { 235 lockInfo, e := c.getLockInfo() 236 if e != nil { 237 lockErr.Err = e 238 return "", lockErr 239 } 240 241 lockErr.Info = lockInfo 242 return "", lockErr 243 } 244 245 c.lockCh = lockCh 246 247 err = c.putLockInfo(c.info) 248 if err != nil { 249 if unlockErr := c.unlock(c.info.ID); unlockErr != nil { 250 err = multierror.Append(err, unlockErr) 251 } 252 253 return "", err 254 } 255 256 // Start a goroutine to monitor the lock state. 257 // If we lose the lock to due communication issues with the consul agent, 258 // attempt to immediately reacquire the lock. Put will verify the integrity 259 // of the state by using a CAS operation. 260 c.monitorCancel = make(chan struct{}) 261 c.monitorDone = make(chan struct{}) 262 go func(cancel, done chan struct{}) { 263 defer func() { 264 close(done) 265 }() 266 select { 267 case <-c.lockCh: 268 for { 269 c.mu.Lock() 270 c.consulLock = nil 271 _, err := c.lock() 272 c.mu.Unlock() 273 274 if err != nil { 275 // We failed to get the lock, keep trying as long as 276 // terraform is running. There may be changes in progress, 277 // so there's no use in aborting. Either we eventually 278 // reacquire the lock, or a Put will fail on a CAS. 279 log.Printf("[ERROR] attempting to reacquire lock: %s", err) 280 time.Sleep(time.Second) 281 282 select { 283 case <-cancel: 284 return 285 default: 286 } 287 continue 288 } 289 290 // if the error was nil, the new lock started a new copy of 291 // this goroutine. 292 return 293 } 294 295 case <-cancel: 296 return 297 } 298 }(c.monitorCancel, c.monitorDone) 299 300 if testLockHook != nil { 301 testLockHook() 302 } 303 304 return c.info.ID, nil 305 } 306 307 func (c *RemoteClient) Unlock(id string) error { 308 c.mu.Lock() 309 defer c.mu.Unlock() 310 311 if !c.lockState { 312 return nil 313 } 314 315 return c.unlock(id) 316 } 317 318 func (c *RemoteClient) unlock(id string) error { 319 // cancel our monitoring goroutine 320 if c.monitorCancel != nil { 321 close(c.monitorCancel) 322 } 323 324 // this doesn't use the lock id, because the lock is tied to the consul client. 325 if c.consulLock == nil || c.lockCh == nil { 326 return nil 327 } 328 329 select { 330 case <-c.lockCh: 331 return errors.New("consul lock was lost") 332 default: 333 } 334 335 kv := c.Client.KV() 336 337 var errs error 338 339 if _, err := kv.Delete(c.Path+lockInfoSuffix, nil); err != nil { 340 errs = multierror.Append(errs, err) 341 } 342 343 if err := c.consulLock.Unlock(); err != nil { 344 errs = multierror.Append(errs, err) 345 } 346 347 // the monitoring goroutine may be in a select on this chan, so we need to 348 // wait for it to return before changing the value. 349 <-c.monitorDone 350 c.lockCh = nil 351 352 // This is only cleanup, and will fail if the lock was immediately taken by 353 // another client, so we don't report an error to the user here. 354 c.consulLock.Destroy() 355 356 return errs 357 } 358 359 func compressState(data []byte) ([]byte, error) { 360 b := new(bytes.Buffer) 361 gz := gzip.NewWriter(b) 362 if _, err := gz.Write(data); err != nil { 363 return nil, err 364 } 365 if err := gz.Flush(); err != nil { 366 return nil, err 367 } 368 if err := gz.Close(); err != nil { 369 return nil, err 370 } 371 return b.Bytes(), nil 372 } 373 374 func uncompressState(data []byte) ([]byte, error) { 375 b := new(bytes.Buffer) 376 gz, err := gzip.NewReader(bytes.NewReader(data)) 377 if err != nil { 378 return nil, err 379 } 380 b.ReadFrom(gz) 381 if err := gz.Close(); err != nil { 382 return nil, err 383 } 384 return b.Bytes(), nil 385 }