github.com/kevinklinger/open_terraform@v1.3.6/noninternal/cloud/state.go (about)

     1  package cloud
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"crypto/md5"
     7  	"encoding/base64"
     8  	"encoding/json"
     9  	"errors"
    10  	"fmt"
    11  	"log"
    12  	"os"
    13  	"strings"
    14  	"sync"
    15  
    16  	"github.com/zclconf/go-cty/cty"
    17  	"github.com/zclconf/go-cty/cty/gocty"
    18  
    19  	tfe "github.com/hashicorp/go-tfe"
    20  	uuid "github.com/hashicorp/go-uuid"
    21  	"github.com/kevinklinger/open_terraform/noninternal/command/jsonstate"
    22  	"github.com/kevinklinger/open_terraform/noninternal/states"
    23  	"github.com/kevinklinger/open_terraform/noninternal/states/remote"
    24  	"github.com/kevinklinger/open_terraform/noninternal/states/statefile"
    25  	"github.com/kevinklinger/open_terraform/noninternal/states/statemgr"
    26  	"github.com/kevinklinger/open_terraform/noninternal/terraform"
    27  )
    28  
    29  // State implements the State interfaces in the state package to handle
    30  // reading and writing the remote state to TFC. This State on its own does no
    31  // local caching so every persist will go to the remote storage and local
    32  // writes will go to memory.
    33  type State struct {
    34  	mu sync.Mutex
    35  
    36  	// We track two pieces of meta data in addition to the state itself:
    37  	//
    38  	// lineage - the state's unique ID
    39  	// serial  - the monotonic counter of "versions" of the state
    40  	//
    41  	// Both of these (along with state) have a sister field
    42  	// that represents the values read in from an existing source.
    43  	// All three of these values are used to determine if the new
    44  	// state has changed from an existing state we read in.
    45  	lineage, readLineage string
    46  	serial, readSerial   uint64
    47  	state, readState     *states.State
    48  	disableLocks         bool
    49  	tfeClient            *tfe.Client
    50  	organization         string
    51  	workspace            *tfe.Workspace
    52  	stateUploadErr       bool
    53  	forcePush            bool
    54  	lockInfo             *statemgr.LockInfo
    55  }
    56  
    57  var ErrStateVersionUnauthorizedUpgradeState = errors.New(strings.TrimSpace(`
    58  You are not authorized to read the full state version containing outputs.
    59  State versions created by terraform v1.3.0 and newer do not require this level
    60  of authorization and therefore this error can usually be fixed by upgrading the
    61  remote state version.
    62  `))
    63  
    64  var _ statemgr.Full = (*State)(nil)
    65  var _ statemgr.Migrator = (*State)(nil)
    66  
    67  // statemgr.Reader impl.
    68  func (s *State) State() *states.State {
    69  	s.mu.Lock()
    70  	defer s.mu.Unlock()
    71  
    72  	return s.state.DeepCopy()
    73  }
    74  
    75  // StateForMigration is part of our implementation of statemgr.Migrator.
    76  func (s *State) StateForMigration() *statefile.File {
    77  	s.mu.Lock()
    78  	defer s.mu.Unlock()
    79  
    80  	return statefile.New(s.state.DeepCopy(), s.lineage, s.serial)
    81  }
    82  
    83  // WriteStateForMigration is part of our implementation of statemgr.Migrator.
    84  func (s *State) WriteStateForMigration(f *statefile.File, force bool) error {
    85  	s.mu.Lock()
    86  	defer s.mu.Unlock()
    87  
    88  	if !force {
    89  		checkFile := statefile.New(s.state, s.lineage, s.serial)
    90  		if err := statemgr.CheckValidImport(f, checkFile); err != nil {
    91  			return err
    92  		}
    93  	}
    94  
    95  	// The remote backend needs to pass the `force` flag through to its client.
    96  	// For backends that support such operations, inform the client
    97  	// that a force push has been requested
    98  	if force {
    99  		s.EnableForcePush()
   100  	}
   101  
   102  	// We create a deep copy of the state here, because the caller also has
   103  	// a reference to the given object and can potentially go on to mutate
   104  	// it after we return, but we want the snapshot at this point in time.
   105  	s.state = f.State.DeepCopy()
   106  	s.lineage = f.Lineage
   107  	s.serial = f.Serial
   108  
   109  	return nil
   110  }
   111  
   112  // DisableLocks turns the Lock and Unlock methods into no-ops. This is intended
   113  // to be called during initialization of a state manager and should not be
   114  // called after any of the statemgr.Full interface methods have been called.
   115  func (s *State) DisableLocks() {
   116  	s.disableLocks = true
   117  }
   118  
   119  // StateSnapshotMeta returns the metadata from the most recently persisted
   120  // or refreshed persistent state snapshot.
   121  //
   122  // This is an implementation of statemgr.PersistentMeta.
   123  func (s *State) StateSnapshotMeta() statemgr.SnapshotMeta {
   124  	return statemgr.SnapshotMeta{
   125  		Lineage: s.lineage,
   126  		Serial:  s.serial,
   127  	}
   128  }
   129  
   130  // statemgr.Writer impl.
   131  func (s *State) WriteState(state *states.State) error {
   132  	s.mu.Lock()
   133  	defer s.mu.Unlock()
   134  
   135  	// We create a deep copy of the state here, because the caller also has
   136  	// a reference to the given object and can potentially go on to mutate
   137  	// it after we return, but we want the snapshot at this point in time.
   138  	s.state = state.DeepCopy()
   139  
   140  	return nil
   141  }
   142  
   143  // PersistState uploads a snapshot of the latest state as a StateVersion to Terraform Cloud
   144  func (s *State) PersistState(schemas *terraform.Schemas) error {
   145  	s.mu.Lock()
   146  	defer s.mu.Unlock()
   147  
   148  	if s.readState != nil {
   149  		lineageUnchanged := s.readLineage != "" && s.lineage == s.readLineage
   150  		serialUnchanged := s.readSerial != 0 && s.serial == s.readSerial
   151  		stateUnchanged := statefile.StatesMarshalEqual(s.state, s.readState)
   152  		if stateUnchanged && lineageUnchanged && serialUnchanged {
   153  			// If the state, lineage or serial haven't changed at all then we have nothing to do.
   154  			return nil
   155  		}
   156  		s.serial++
   157  	} else {
   158  		// We might be writing a new state altogether, but before we do that
   159  		// we'll check to make sure there isn't already a snapshot present
   160  		// that we ought to be updating.
   161  		err := s.refreshState()
   162  		if err != nil {
   163  			return fmt.Errorf("failed checking for existing remote state: %s", err)
   164  		}
   165  		if s.lineage == "" { // indicates that no state snapshot is present yet
   166  			lineage, err := uuid.GenerateUUID()
   167  			if err != nil {
   168  				return fmt.Errorf("failed to generate initial lineage: %v", err)
   169  			}
   170  			s.lineage = lineage
   171  			s.serial = 0
   172  		}
   173  	}
   174  
   175  	f := statefile.New(s.state, s.lineage, s.serial)
   176  
   177  	var buf bytes.Buffer
   178  	err := statefile.Write(f, &buf)
   179  	if err != nil {
   180  		return err
   181  	}
   182  
   183  	var jsonState []byte
   184  	if schemas != nil {
   185  		jsonState, err = jsonstate.Marshal(f, schemas)
   186  		if err != nil {
   187  			return err
   188  		}
   189  	}
   190  
   191  	stateFile, err := statefile.Read(bytes.NewReader(buf.Bytes()))
   192  	if err != nil {
   193  		return fmt.Errorf("failed to read state: %w", err)
   194  	}
   195  
   196  	ov, err := jsonstate.MarshalOutputs(stateFile.State.RootModule().OutputValues)
   197  	if err != nil {
   198  		return fmt.Errorf("failed to translate outputs: %w", err)
   199  	}
   200  	jsonStateOutputs, err := json.Marshal(ov)
   201  	if err != nil {
   202  		return fmt.Errorf("failed to marshal outputs to json: %w", err)
   203  	}
   204  
   205  	err = s.uploadState(s.lineage, s.serial, s.forcePush, buf.Bytes(), jsonState, jsonStateOutputs)
   206  	if err != nil {
   207  		s.stateUploadErr = true
   208  		return fmt.Errorf("error uploading state: %w", err)
   209  	}
   210  	// After we've successfully persisted, what we just wrote is our new
   211  	// reference state until someone calls RefreshState again.
   212  	// We've potentially overwritten (via force) the state, lineage
   213  	// and / or serial (and serial was incremented) so we copy over all
   214  	// three fields so everything matches the new state and a subsequent
   215  	// operation would correctly detect no changes to the lineage, serial or state.
   216  	s.readState = s.state.DeepCopy()
   217  	s.readLineage = s.lineage
   218  	s.readSerial = s.serial
   219  	return nil
   220  }
   221  
   222  func (s *State) uploadState(lineage string, serial uint64, isForcePush bool, state, jsonState, jsonStateOutputs []byte) error {
   223  	ctx := context.Background()
   224  
   225  	options := tfe.StateVersionCreateOptions{
   226  		Lineage:          tfe.String(lineage),
   227  		Serial:           tfe.Int64(int64(serial)),
   228  		MD5:              tfe.String(fmt.Sprintf("%x", md5.Sum(state))),
   229  		State:            tfe.String(base64.StdEncoding.EncodeToString(state)),
   230  		Force:            tfe.Bool(isForcePush),
   231  		JSONState:        tfe.String(base64.StdEncoding.EncodeToString(jsonState)),
   232  		JSONStateOutputs: tfe.String(base64.StdEncoding.EncodeToString(jsonStateOutputs)),
   233  	}
   234  
   235  	// If we have a run ID, make sure to add it to the options
   236  	// so the state will be properly associated with the run.
   237  	runID := os.Getenv("TFE_RUN_ID")
   238  	if runID != "" {
   239  		options.Run = &tfe.Run{ID: runID}
   240  	}
   241  	// Create the new state.
   242  	_, err := s.tfeClient.StateVersions.Create(ctx, s.workspace.ID, options)
   243  	return err
   244  }
   245  
   246  // Lock calls the Client's Lock method if it's implemented.
   247  func (s *State) Lock(info *statemgr.LockInfo) (string, error) {
   248  	s.mu.Lock()
   249  	defer s.mu.Unlock()
   250  
   251  	if s.disableLocks {
   252  		return "", nil
   253  	}
   254  	ctx := context.Background()
   255  
   256  	lockErr := &statemgr.LockError{Info: s.lockInfo}
   257  
   258  	// Lock the workspace.
   259  	_, err := s.tfeClient.Workspaces.Lock(ctx, s.workspace.ID, tfe.WorkspaceLockOptions{
   260  		Reason: tfe.String("Locked by Terraform"),
   261  	})
   262  	if err != nil {
   263  		if err == tfe.ErrWorkspaceLocked {
   264  			lockErr.Info = info
   265  			err = fmt.Errorf("%s (lock ID: \"%s/%s\")", err, s.organization, s.workspace.Name)
   266  		}
   267  		lockErr.Err = err
   268  		return "", lockErr
   269  	}
   270  
   271  	s.lockInfo = info
   272  
   273  	return s.lockInfo.ID, nil
   274  }
   275  
   276  // statemgr.Refresher impl.
   277  func (s *State) RefreshState() error {
   278  	s.mu.Lock()
   279  	defer s.mu.Unlock()
   280  	return s.refreshState()
   281  }
   282  
   283  // refreshState is the main implementation of RefreshState, but split out so
   284  // that we can make internal calls to it from methods that are already holding
   285  // the s.mu lock.
   286  func (s *State) refreshState() error {
   287  	payload, err := s.getStatePayload()
   288  	if err != nil {
   289  		return err
   290  	}
   291  
   292  	// no remote state is OK
   293  	if payload == nil {
   294  		s.readState = nil
   295  		s.lineage = ""
   296  		s.serial = 0
   297  		return nil
   298  	}
   299  
   300  	stateFile, err := statefile.Read(bytes.NewReader(payload.Data))
   301  	if err != nil {
   302  		return err
   303  	}
   304  
   305  	s.lineage = stateFile.Lineage
   306  	s.serial = stateFile.Serial
   307  	s.state = stateFile.State
   308  
   309  	// Properties from the remote must be separate so we can
   310  	// track changes as lineage, serial and/or state are mutated
   311  	s.readLineage = stateFile.Lineage
   312  	s.readSerial = stateFile.Serial
   313  	s.readState = s.state.DeepCopy()
   314  	return nil
   315  }
   316  
   317  func (s *State) getStatePayload() (*remote.Payload, error) {
   318  	ctx := context.Background()
   319  
   320  	sv, err := s.tfeClient.StateVersions.ReadCurrent(ctx, s.workspace.ID)
   321  	if err != nil {
   322  		if err == tfe.ErrResourceNotFound {
   323  			// If no state exists, then return nil.
   324  			return nil, nil
   325  		}
   326  		return nil, fmt.Errorf("error retrieving state: %v", err)
   327  	}
   328  
   329  	state, err := s.tfeClient.StateVersions.Download(ctx, sv.DownloadURL)
   330  	if err != nil {
   331  		return nil, fmt.Errorf("error downloading state: %v", err)
   332  	}
   333  
   334  	// If the state is empty, then return nil.
   335  	if len(state) == 0 {
   336  		return nil, nil
   337  	}
   338  
   339  	// Get the MD5 checksum of the state.
   340  	sum := md5.Sum(state)
   341  
   342  	return &remote.Payload{
   343  		Data: state,
   344  		MD5:  sum[:],
   345  	}, nil
   346  }
   347  
   348  // Unlock calls the Client's Unlock method if it's implemented.
   349  func (s *State) Unlock(id string) error {
   350  	s.mu.Lock()
   351  	defer s.mu.Unlock()
   352  
   353  	if s.disableLocks {
   354  		return nil
   355  	}
   356  
   357  	ctx := context.Background()
   358  
   359  	// We first check if there was an error while uploading the latest
   360  	// state. If so, we will not unlock the workspace to prevent any
   361  	// changes from being applied until the correct state is uploaded.
   362  	if s.stateUploadErr {
   363  		return nil
   364  	}
   365  
   366  	lockErr := &statemgr.LockError{Info: s.lockInfo}
   367  
   368  	// With lock info this should be treated as a normal unlock.
   369  	if s.lockInfo != nil {
   370  		// Verify the expected lock ID.
   371  		if s.lockInfo.ID != id {
   372  			lockErr.Err = fmt.Errorf("lock ID does not match existing lock")
   373  			return lockErr
   374  		}
   375  
   376  		// Unlock the workspace.
   377  		_, err := s.tfeClient.Workspaces.Unlock(ctx, s.workspace.ID)
   378  		if err != nil {
   379  			lockErr.Err = err
   380  			return lockErr
   381  		}
   382  
   383  		return nil
   384  	}
   385  
   386  	// Verify the optional force-unlock lock ID.
   387  	if s.organization+"/"+s.workspace.Name != id {
   388  		lockErr.Err = fmt.Errorf(
   389  			"lock ID %q does not match existing lock ID \"%s/%s\"",
   390  			id,
   391  			s.organization,
   392  			s.workspace.Name,
   393  		)
   394  		return lockErr
   395  	}
   396  
   397  	// Force unlock the workspace.
   398  	_, err := s.tfeClient.Workspaces.ForceUnlock(ctx, s.workspace.ID)
   399  	if err != nil {
   400  		lockErr.Err = err
   401  		return lockErr
   402  	}
   403  
   404  	return nil
   405  }
   406  
   407  // Delete the remote state.
   408  func (s *State) Delete() error {
   409  	err := s.tfeClient.Workspaces.Delete(context.Background(), s.organization, s.workspace.Name)
   410  	if err != nil && err != tfe.ErrResourceNotFound {
   411  		return fmt.Errorf("error deleting workspace %s: %v", s.workspace.Name, err)
   412  	}
   413  
   414  	return nil
   415  }
   416  
   417  // EnableForcePush to allow the remote client to overwrite state
   418  // by implementing remote.ClientForcePusher
   419  func (s *State) EnableForcePush() {
   420  	s.forcePush = true
   421  }
   422  
   423  // GetRootOutputValues fetches output values from Terraform Cloud
   424  func (s *State) GetRootOutputValues() (map[string]*states.OutputValue, error) {
   425  	ctx := context.Background()
   426  
   427  	so, err := s.tfeClient.StateVersionOutputs.ReadCurrent(ctx, s.workspace.ID)
   428  
   429  	if err != nil {
   430  		return nil, fmt.Errorf("could not read state version outputs: %w", err)
   431  	}
   432  
   433  	result := make(map[string]*states.OutputValue)
   434  
   435  	for _, output := range so.Items {
   436  		if output.DetailedType == nil {
   437  			// If there is no detailed type information available, this state was probably created
   438  			// with a version of terraform < 1.3.0. In this case, we'll eject completely from this
   439  			// function and fall back to the old behavior of reading the entire state file, which
   440  			// requires a higher level of authorization.
   441  			log.Printf("[DEBUG] falling back to reading full state")
   442  
   443  			if err := s.RefreshState(); err != nil {
   444  				return nil, fmt.Errorf("failed to load state: %w", err)
   445  			}
   446  
   447  			state := s.State()
   448  			if state == nil {
   449  				// We know that there is supposed to be state (and this is not simply a new workspace
   450  				// without state) because the fallback is only invoked when outputs are present but
   451  				// detailed types are not available.
   452  				return nil, ErrStateVersionUnauthorizedUpgradeState
   453  			}
   454  
   455  			return state.RootModule().OutputValues, nil
   456  		}
   457  
   458  		if output.Sensitive {
   459  			// Since this is a sensitive value, the output must be requested explicitly in order to
   460  			// read its value, which is assumed to be present by callers
   461  			sensitiveOutput, err := s.tfeClient.StateVersionOutputs.Read(ctx, output.ID)
   462  			if err != nil {
   463  				return nil, fmt.Errorf("could not read state version output %s: %w", output.ID, err)
   464  			}
   465  			output.Value = sensitiveOutput.Value
   466  		}
   467  
   468  		cval, err := tfeOutputToCtyValue(*output)
   469  		if err != nil {
   470  			return nil, fmt.Errorf("could not decode output %s (ID %s)", output.Name, output.ID)
   471  		}
   472  
   473  		result[output.Name] = &states.OutputValue{
   474  			Value:     cval,
   475  			Sensitive: output.Sensitive,
   476  		}
   477  	}
   478  
   479  	return result, nil
   480  }
   481  
   482  // tfeOutputToCtyValue decodes a combination of TFE output value and detailed-type to create a
   483  // cty value that is suitable for use in terraform.
   484  func tfeOutputToCtyValue(output tfe.StateVersionOutput) (cty.Value, error) {
   485  	var result cty.Value
   486  	bufType, err := json.Marshal(output.DetailedType)
   487  	if err != nil {
   488  		return result, fmt.Errorf("could not marshal output %s type: %w", output.ID, err)
   489  	}
   490  
   491  	var ctype cty.Type
   492  	err = ctype.UnmarshalJSON(bufType)
   493  	if err != nil {
   494  		return result, fmt.Errorf("could not interpret output %s type: %w", output.ID, err)
   495  	}
   496  
   497  	result, err = gocty.ToCtyValue(output.Value, ctype)
   498  	if err != nil {
   499  		return result, fmt.Errorf("could not interpret value %v as type %s for output %s: %w", result, ctype.FriendlyName(), output.ID, err)
   500  	}
   501  
   502  	return result, nil
   503  }