github.com/opentofu/opentofu@v1.7.1/internal/states/statefile/version3_upgrade.go (about)

     1  // Copyright (c) The OpenTofu Authors
     2  // SPDX-License-Identifier: MPL-2.0
     3  // Copyright (c) 2023 HashiCorp, Inc.
     4  // SPDX-License-Identifier: MPL-2.0
     5  
     6  package statefile
     7  
     8  import (
     9  	"encoding/json"
    10  	"fmt"
    11  	"strconv"
    12  	"strings"
    13  
    14  	"github.com/hashicorp/hcl/v2/hclsyntax"
    15  	"github.com/zclconf/go-cty/cty"
    16  	ctyjson "github.com/zclconf/go-cty/cty/json"
    17  
    18  	"github.com/opentofu/opentofu/internal/addrs"
    19  	"github.com/opentofu/opentofu/internal/configs"
    20  	"github.com/opentofu/opentofu/internal/states"
    21  	"github.com/opentofu/opentofu/internal/tfdiags"
    22  )
    23  
    24  func upgradeStateV3ToV4(old *stateV3) (*stateV4, error) {
    25  
    26  	if old.Serial < 0 {
    27  		// The new format is using uint64 here, which should be fine for any
    28  		// real state (we only used positive integers in practice) but we'll
    29  		// catch this explicitly here to avoid weird behavior if a state file
    30  		// has been tampered with in some way.
    31  		return nil, fmt.Errorf("state has serial less than zero, which is invalid")
    32  	}
    33  
    34  	new := &stateV4{
    35  		TerraformVersion: old.TFVersion,
    36  		Serial:           uint64(old.Serial),
    37  		Lineage:          old.Lineage,
    38  		RootOutputs:      map[string]outputStateV4{},
    39  		Resources:        []resourceStateV4{},
    40  	}
    41  
    42  	if new.TerraformVersion == "" {
    43  		// Older formats considered this to be optional, but now it's required
    44  		// and so we'll stub it out with something that's definitely older
    45  		// than the version that really created this state.
    46  		new.TerraformVersion = "0.0.0"
    47  	}
    48  
    49  	for _, msOld := range old.Modules {
    50  		if len(msOld.Path) < 1 || msOld.Path[0] != "root" {
    51  			return nil, fmt.Errorf("state contains invalid module path %#v", msOld.Path)
    52  		}
    53  
    54  		// Convert legacy-style module address into our newer address type.
    55  		// Since these old formats are only generated by versions of OpenTofu
    56  		// that don't support count and for_each on modules, we can just assume
    57  		// all of the modules are unkeyed.
    58  		moduleAddr := make(addrs.ModuleInstance, len(msOld.Path)-1)
    59  		for i, name := range msOld.Path[1:] {
    60  			if !hclsyntax.ValidIdentifier(name) {
    61  				// If we don't fail here then we'll produce an invalid state
    62  				// version 4 which subsequent operations will reject, so we'll
    63  				// fail early here for safety to make sure we can never
    64  				// inadvertently commit an invalid snapshot to a backend.
    65  				//
    66  				// This is a user-facing usage of Terraform but refers to a very
    67  				// old historical version of Terraform which has no corresponding
    68  				// OpenTofu version yet.
    69  				// If we ever get OpenTofu 0.11.x and 0.12.x, we should update this
    70  				// message to mention OpenTofu instead.
    71  				return nil, fmt.Errorf("state contains invalid module path %#v: %q is not a valid identifier; rename it in Terraform 0.11 before upgrading to Terraform 0.12", msOld.Path, name)
    72  			}
    73  			moduleAddr[i] = addrs.ModuleInstanceStep{
    74  				Name:        name,
    75  				InstanceKey: addrs.NoKey,
    76  			}
    77  		}
    78  
    79  		// In a v3 state file, a "resource state" is actually an instance
    80  		// state, so we need to fill in a missing level of hierarchy here
    81  		// by lazily creating resource states as we encounter them.
    82  		// We'll track them in here, keyed on the string representation of
    83  		// the resource address.
    84  		resourceStates := map[string]*resourceStateV4{}
    85  
    86  		for legacyAddr, rsOld := range msOld.Resources {
    87  			instAddr, err := parseLegacyResourceAddress(legacyAddr)
    88  			if err != nil {
    89  				return nil, err
    90  			}
    91  
    92  			resAddr := instAddr.Resource
    93  			rs, exists := resourceStates[resAddr.String()]
    94  			if !exists {
    95  				var modeStr string
    96  				switch resAddr.Mode {
    97  				case addrs.ManagedResourceMode:
    98  					modeStr = "managed"
    99  				case addrs.DataResourceMode:
   100  					modeStr = "data"
   101  				default:
   102  					return nil, fmt.Errorf("state contains resource %s with an unsupported resource mode %#v", resAddr, resAddr.Mode)
   103  				}
   104  
   105  				// In state versions prior to 4 we allowed each instance of a
   106  				// resource to have its own provider configuration address,
   107  				// which makes no real sense in practice because providers
   108  				// are associated with resources in the configuration. We
   109  				// elevate that to the resource level during this upgrade,
   110  				// implicitly taking the provider address of the first instance
   111  				// we encounter for each resource. While this is lossy in
   112  				// theory, in practice there is no reason for these values to
   113  				// differ between instances.
   114  				var providerAddr addrs.AbsProviderConfig
   115  				oldProviderAddr := rsOld.Provider
   116  				if strings.Contains(oldProviderAddr, "provider.") {
   117  					// Smells like a new-style provider address, but we'll test it.
   118  					var diags tfdiags.Diagnostics
   119  					providerAddr, diags = addrs.ParseLegacyAbsProviderConfigStr(oldProviderAddr)
   120  					if diags.HasErrors() {
   121  						if strings.Contains(oldProviderAddr, "${") {
   122  							// There seems to be a common misconception that
   123  							// interpolation was valid in provider aliases
   124  							// in 0.11, so we'll use a specialized error
   125  							// message for that case.
   126  							//
   127  							// This is a user-facing usage of Terraform but refers
   128  							// to a very old historical version of Terraform
   129  							// which has no corresponding OpenTofu version.
   130  							// If we ever get OpenTofu 0.11.x and 0.12.x, we should
   131  							// update this message to mention OpenTofu instead.
   132  							return nil, fmt.Errorf("invalid provider config reference %q for %s: this alias seems to contain a template interpolation sequence, which was not supported but also not error-checked in Terraform 0.11. To proceed, rename the associated provider alias to a valid identifier and apply the change with Terraform 0.11 before upgrading to Terraform 0.12", oldProviderAddr, instAddr)
   133  						}
   134  						return nil, fmt.Errorf("invalid provider config reference %q for %s: %w", oldProviderAddr, instAddr, diags.Err())
   135  					}
   136  				} else {
   137  					// Smells like an old-style module-local provider address,
   138  					// which we'll need to migrate. We'll assume it's referring
   139  					// to the same module the resource is in, which might be
   140  					// incorrect but it'll get fixed up next time any updates
   141  					// are made to an instance.
   142  					if oldProviderAddr != "" {
   143  						localAddr, diags := configs.ParseProviderConfigCompactStr(oldProviderAddr)
   144  						if diags.HasErrors() {
   145  							if strings.Contains(oldProviderAddr, "${") {
   146  								// There seems to be a common misconception that
   147  								// interpolation was valid in provider aliases
   148  								// in 0.11, so we'll use a specialized error
   149  								// message for that case.
   150  								//
   151  								// This is a user-facing usage of Terraform but refers
   152  								// to a very old historical version of Terraform
   153  								// which has no corresponding OpenTofu version.
   154  								// If we ever get OpenTofu 0.11.x and 0.12.x, we should
   155  								// update this message to mention OpenTofu instead.
   156  								return nil, fmt.Errorf("invalid legacy provider config reference %q for %s: this alias seems to contain a template interpolation sequence, which was not supported but also not error-checked in Terraform 0.11. To proceed, rename the associated provider alias to a valid identifier and apply the change with Terraform 0.11 before upgrading to Terraform 0.12", oldProviderAddr, instAddr)
   157  							}
   158  							return nil, fmt.Errorf("invalid legacy provider config reference %q for %s: %w", oldProviderAddr, instAddr, diags.Err())
   159  						}
   160  						providerAddr = addrs.AbsProviderConfig{
   161  							Module: moduleAddr.Module(),
   162  							// We use NewLegacyProvider here so we can use
   163  							// LegacyString() below to get the appropriate
   164  							// legacy-style provider string.
   165  							Provider: addrs.NewLegacyProvider(localAddr.LocalName),
   166  							Alias:    localAddr.Alias,
   167  						}
   168  					} else {
   169  						providerAddr = addrs.AbsProviderConfig{
   170  							Module: moduleAddr.Module(),
   171  							// We use NewLegacyProvider here so we can use
   172  							// LegacyString() below to get the appropriate
   173  							// legacy-style provider string.
   174  							Provider: addrs.NewLegacyProvider(resAddr.ImpliedProvider()),
   175  						}
   176  					}
   177  				}
   178  
   179  				rs = &resourceStateV4{
   180  					Module:         moduleAddr.String(),
   181  					Mode:           modeStr,
   182  					Type:           resAddr.Type,
   183  					Name:           resAddr.Name,
   184  					Instances:      []instanceObjectStateV4{},
   185  					ProviderConfig: providerAddr.LegacyString(),
   186  				}
   187  				resourceStates[resAddr.String()] = rs
   188  			}
   189  
   190  			// Now we'll deal with the instance itself, which may either be
   191  			// the first instance in a resource we just created or an additional
   192  			// instance for a resource added on a prior loop.
   193  			instKey := instAddr.Key
   194  			if isOld := rsOld.Primary; isOld != nil {
   195  				isNew, err := upgradeInstanceObjectV3ToV4(rsOld, isOld, instKey, states.NotDeposed)
   196  				if err != nil {
   197  					return nil, fmt.Errorf("failed to migrate primary generation of %s: %w", instAddr, err)
   198  				}
   199  				rs.Instances = append(rs.Instances, *isNew)
   200  			}
   201  			for i, isOld := range rsOld.Deposed {
   202  				// When we migrate old instances we'll use sequential deposed
   203  				// keys just so that the upgrade result is deterministic. New
   204  				// deposed keys allocated moving forward will be pseudorandomly
   205  				// selected, but we check for collisions and so these
   206  				// non-random ones won't hurt.
   207  				deposedKey := states.DeposedKey(fmt.Sprintf("%08x", i+1))
   208  				isNew, err := upgradeInstanceObjectV3ToV4(rsOld, isOld, instKey, deposedKey)
   209  				if err != nil {
   210  					return nil, fmt.Errorf("failed to migrate deposed generation index %d of %s: %w", i, instAddr, err)
   211  				}
   212  				rs.Instances = append(rs.Instances, *isNew)
   213  			}
   214  
   215  			if instKey != addrs.NoKey && rs.EachMode == "" {
   216  				rs.EachMode = "list"
   217  			}
   218  		}
   219  
   220  		for _, rs := range resourceStates {
   221  			new.Resources = append(new.Resources, *rs)
   222  		}
   223  
   224  		if len(msOld.Path) == 1 && msOld.Path[0] == "root" {
   225  			// We'll migrate the outputs for this module too, then.
   226  			for name, oldOS := range msOld.Outputs {
   227  				newOS := outputStateV4{
   228  					Sensitive: oldOS.Sensitive,
   229  				}
   230  
   231  				valRaw := oldOS.Value
   232  				valSrc, err := json.Marshal(valRaw)
   233  				if err != nil {
   234  					// Should never happen, because this value came from JSON
   235  					// in the first place and so we're just round-tripping here.
   236  					return nil, fmt.Errorf("failed to serialize output %q value as JSON: %w", name, err)
   237  				}
   238  
   239  				// The "type" field in state V2 wasn't really that useful
   240  				// since it was only able to capture string vs. list vs. map.
   241  				// For this reason, during upgrade we'll just discard it
   242  				// altogether and use cty's idea of the implied type of
   243  				// turning our old value into JSON.
   244  				ty, err := ctyjson.ImpliedType(valSrc)
   245  				if err != nil {
   246  					// REALLY should never happen, because we literally just
   247  					// encoded this as JSON above!
   248  					return nil, fmt.Errorf("failed to parse output %q value from JSON: %w", name, err)
   249  				}
   250  
   251  				// ImpliedType tends to produce structural types, but since older
   252  				// version of Terraform didn't support those a collection type
   253  				// is probably what was intended, so we'll see if we can
   254  				// interpret our value as one.
   255  				ty = simplifyImpliedValueType(ty)
   256  
   257  				tySrc, err := ctyjson.MarshalType(ty)
   258  				if err != nil {
   259  					return nil, fmt.Errorf("failed to serialize output %q type as JSON: %w", name, err)
   260  				}
   261  
   262  				newOS.ValueRaw = json.RawMessage(valSrc)
   263  				newOS.ValueTypeRaw = json.RawMessage(tySrc)
   264  
   265  				new.RootOutputs[name] = newOS
   266  			}
   267  		}
   268  	}
   269  
   270  	new.normalize()
   271  
   272  	return new, nil
   273  }
   274  
   275  func upgradeInstanceObjectV3ToV4(rsOld *resourceStateV2, isOld *instanceStateV2, instKey addrs.InstanceKey, deposedKey states.DeposedKey) (*instanceObjectStateV4, error) {
   276  
   277  	// Schema versions were, in prior formats, a private concern of the provider
   278  	// SDK, and not a first-class concept in the state format. Here we're
   279  	// sniffing for the pre-0.12 SDK's way of representing schema versions
   280  	// and promoting it to our first-class field if we find it. We'll ignore
   281  	// it if it doesn't look like what the SDK would've written. If this
   282  	// sniffing fails then we'll assume schema version 0.
   283  	var schemaVersion uint64
   284  	migratedSchemaVersion := false
   285  	if raw, exists := isOld.Meta["schema_version"]; exists {
   286  		switch tv := raw.(type) {
   287  		case string:
   288  			v, err := strconv.ParseUint(tv, 10, 64)
   289  			if err == nil {
   290  				schemaVersion = v
   291  				migratedSchemaVersion = true
   292  			}
   293  		case int:
   294  			schemaVersion = uint64(tv)
   295  			migratedSchemaVersion = true
   296  		case float64:
   297  			schemaVersion = uint64(tv)
   298  			migratedSchemaVersion = true
   299  		}
   300  	}
   301  
   302  	private := map[string]interface{}{}
   303  	for k, v := range isOld.Meta {
   304  		if k == "schema_version" && migratedSchemaVersion {
   305  			// We're gonna promote this into our first-class schema version field
   306  			continue
   307  		}
   308  		private[k] = v
   309  	}
   310  	var privateJSON []byte
   311  	if len(private) != 0 {
   312  		var err error
   313  		privateJSON, err = json.Marshal(private)
   314  		if err != nil {
   315  			// This shouldn't happen, because the Meta values all came from JSON
   316  			// originally anyway.
   317  			return nil, fmt.Errorf("cannot serialize private instance object data: %w", err)
   318  		}
   319  	}
   320  
   321  	var status string
   322  	if isOld.Tainted {
   323  		status = "tainted"
   324  	}
   325  
   326  	var instKeyRaw interface{}
   327  	switch tk := instKey.(type) {
   328  	case addrs.IntKey:
   329  		instKeyRaw = int(tk)
   330  	case addrs.StringKey:
   331  		instKeyRaw = string(tk)
   332  	default:
   333  		if instKeyRaw != nil {
   334  			return nil, fmt.Errorf("unsupported instance key: %#v", instKey)
   335  		}
   336  	}
   337  
   338  	var attributes map[string]string
   339  	if isOld.Attributes != nil {
   340  		attributes = make(map[string]string, len(isOld.Attributes))
   341  		for k, v := range isOld.Attributes {
   342  			attributes[k] = v
   343  		}
   344  	}
   345  	if isOld.ID != "" {
   346  		// As a special case, if we don't already have an "id" attribute and
   347  		// yet there's a non-empty first-class ID on the old object then we'll
   348  		// create a synthetic id attribute to avoid losing that first-class id.
   349  		// In practice this generally arises only in tests where state literals
   350  		// are hand-written in a non-standard way; real code prior to 0.12
   351  		// would always force the first-class ID to be copied into the
   352  		// id attribute before storing.
   353  		if attributes == nil {
   354  			attributes = make(map[string]string, len(isOld.Attributes))
   355  		}
   356  		if idVal := attributes["id"]; idVal == "" {
   357  			attributes["id"] = isOld.ID
   358  		}
   359  	}
   360  
   361  	return &instanceObjectStateV4{
   362  		IndexKey:       instKeyRaw,
   363  		Status:         status,
   364  		Deposed:        string(deposedKey),
   365  		AttributesFlat: attributes,
   366  		SchemaVersion:  schemaVersion,
   367  		PrivateRaw:     privateJSON,
   368  	}, nil
   369  }
   370  
   371  // parseLegacyResourceAddress parses the different identifier format used
   372  // state formats before version 4, like "instance.name.0".
   373  func parseLegacyResourceAddress(s string) (addrs.ResourceInstance, error) {
   374  	var ret addrs.ResourceInstance
   375  
   376  	// Split based on ".". Every resource address should have at least two
   377  	// elements (type and name).
   378  	parts := strings.Split(s, ".")
   379  	if len(parts) < 2 || len(parts) > 4 {
   380  		return ret, fmt.Errorf("invalid internal resource address format: %s", s)
   381  	}
   382  
   383  	// Data resource if we have at least 3 parts and the first one is data
   384  	ret.Resource.Mode = addrs.ManagedResourceMode
   385  	if len(parts) > 2 && parts[0] == "data" {
   386  		ret.Resource.Mode = addrs.DataResourceMode
   387  		parts = parts[1:]
   388  	}
   389  
   390  	// If we're not a data resource and we have more than 3, then it is an error
   391  	if len(parts) > 3 && ret.Resource.Mode != addrs.DataResourceMode {
   392  		return ret, fmt.Errorf("invalid internal resource address format: %s", s)
   393  	}
   394  
   395  	// Build the parts of the resource address that are guaranteed to exist
   396  	ret.Resource.Type = parts[0]
   397  	ret.Resource.Name = parts[1]
   398  	ret.Key = addrs.NoKey
   399  
   400  	// If we have more parts, then we have an index. Parse that.
   401  	if len(parts) > 2 {
   402  		idx, err := strconv.ParseInt(parts[2], 0, 0)
   403  		if err != nil {
   404  			return ret, fmt.Errorf("error parsing resource address %q: %w", s, err)
   405  		}
   406  
   407  		ret.Key = addrs.IntKey(idx)
   408  	}
   409  
   410  	return ret, nil
   411  }
   412  
   413  // simplifyImpliedValueType attempts to heuristically simplify a value type
   414  // derived from a legacy stored output value into something simpler that
   415  // is closer to what would've fitted into the pre-v0.12 value type system.
   416  func simplifyImpliedValueType(ty cty.Type) cty.Type {
   417  	switch {
   418  	case ty.IsTupleType():
   419  		// If all of the element types are the same then we'll make this
   420  		// a list instead. This is very likely to be true, since prior versions
   421  		// of Terraform did not officially support mixed-type collections.
   422  
   423  		if ty.Equals(cty.EmptyTuple) {
   424  			// Don't know what the element type would be, then.
   425  			return ty
   426  		}
   427  
   428  		etys := ty.TupleElementTypes()
   429  		ety := etys[0]
   430  		for _, other := range etys[1:] {
   431  			if !other.Equals(ety) {
   432  				// inconsistent types
   433  				return ty
   434  			}
   435  		}
   436  		ety = simplifyImpliedValueType(ety)
   437  		return cty.List(ety)
   438  
   439  	case ty.IsObjectType():
   440  		// If all of the attribute types are the same then we'll make this
   441  		// a map instead. This is very likely to be true, since prior versions
   442  		// of Terraform did not officially support mixed-type collections.
   443  
   444  		if ty.Equals(cty.EmptyObject) {
   445  			// Don't know what the element type would be, then.
   446  			return ty
   447  		}
   448  
   449  		atys := ty.AttributeTypes()
   450  		var ety cty.Type
   451  		for _, other := range atys {
   452  			if ety == cty.NilType {
   453  				ety = other
   454  				continue
   455  			}
   456  			if !other.Equals(ety) {
   457  				// inconsistent types
   458  				return ty
   459  			}
   460  		}
   461  		ety = simplifyImpliedValueType(ety)
   462  		return cty.Map(ety)
   463  
   464  	default:
   465  		// No other normalizations are possible
   466  		return ty
   467  	}
   468  }