go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/bq/schemaapplyer.go (about)

     1  // Copyright 2021 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bq
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"net/http"
    21  	"regexp"
    22  	"strconv"
    23  	"strings"
    24  	"time"
    25  
    26  	"cloud.google.com/go/bigquery"
    27  	"google.golang.org/api/googleapi"
    28  
    29  	"go.chromium.org/luci/common/clock"
    30  	"go.chromium.org/luci/common/errors"
    31  	"go.chromium.org/luci/common/logging"
    32  	"go.chromium.org/luci/common/retry"
    33  	"go.chromium.org/luci/common/retry/transient"
    34  	"go.chromium.org/luci/server/caching"
    35  )
    36  
    37  // ErrWrongTableKind represents a mismatch in BigQuery table type.
    38  var ErrWrongTableKind = errors.New("cannot change a regular table into a view table or vice-versa")
    39  
    40  var errMetadataVersionLabelMissing = errors.New("table definition is missing MetadataVersionKey label or label value is not a positive integer")
    41  
    42  var errViewRefreshEnabledOnNonView = errors.New("RefreshViewInterval option cannot be used on a table without a ViewQuery")
    43  
    44  // Table is implemented by *bigquery.Table.
    45  // See its documentation for description of the methods below.
    46  type Table interface {
    47  	FullyQualifiedName() string
    48  	Metadata(ctx context.Context, opts ...bigquery.TableMetadataOption) (md *bigquery.TableMetadata, err error)
    49  	Create(ctx context.Context, md *bigquery.TableMetadata) error
    50  	Update(ctx context.Context, md bigquery.TableMetadataToUpdate, etag string, opts ...bigquery.TableUpdateOption) (*bigquery.TableMetadata, error)
    51  }
    52  
    53  // SchemaApplyerCache is used by SchemaApplyer to avoid making redundant BQ
    54  // calls.
    55  //
    56  // Instantiate it with RegisterSchemaApplyerCache(capacity) during init time.
    57  type SchemaApplyerCache struct {
    58  	handle caching.LRUHandle[string, error]
    59  }
    60  
    61  // RegisterSchemaApplyerCache allocates a process cached used by SchemaApplier.
    62  //
    63  // The capacity should roughly match expected number of tables the schema
    64  // applier will work on.
    65  //
    66  // Must be called during init time.
    67  func RegisterSchemaApplyerCache(capacity int) SchemaApplyerCache {
    68  	return SchemaApplyerCache{caching.RegisterLRUCache[string, error](capacity)}
    69  }
    70  
    71  // SchemaApplyer provides methods to synchronise BigQuery schema
    72  // to match a desired state.
    73  type SchemaApplyer struct {
    74  	cache SchemaApplyerCache
    75  }
    76  
    77  // NewSchemaApplyer initialises a new schema applyer, using the given cache
    78  // to cache BQ schema to avoid making duplicate BigQuery calls.
    79  func NewSchemaApplyer(cache SchemaApplyerCache) *SchemaApplyer {
    80  	return &SchemaApplyer{
    81  		cache: cache,
    82  	}
    83  }
    84  
    85  // EnsureTable creates a BigQuery table if it doesn't exist and updates its
    86  // schema (or a view query for view tables) if it is stale. Non-schema options,
    87  // like Partitioning and Clustering settings, will be applied if the table is
    88  // being created but will not be synchronized after creation.
    89  //
    90  // Existing fields will not be deleted.
    91  //
    92  // Example usage:
    93  // // At top of file
    94  // var schemaApplyer = bq.NewSchemaApplyer(
    95  //
    96  //	bq.RegisterSchemaApplyerCache(50 ) // depending on how many different
    97  //	                                   // tables will be used.
    98  //
    99  // )
   100  //
   101  // ...
   102  // // In method.
   103  // table := client.Dataset("my_dataset").Table("my_table")
   104  // schema := ... // e.g. from SchemaConverter.
   105  //
   106  //	spec := &bigquery.TableMetadata{
   107  //	   TimePartitioning: &bigquery.TimePartitioning{
   108  //	     Field:      "partition_time",
   109  //	     Expiration: 540 * time.Day,
   110  //	   },
   111  //	   Schema: schema.Relax(), // Ensure no mandatory fields.
   112  //	}
   113  //
   114  // err := schemaApplyer.EnsureBQTable(ctx, table, spec)
   115  //
   116  //	if err != nil {
   117  //	   if transient.Tag.In(err) {
   118  //	      // Handle retriable error.
   119  //	   } else {
   120  //	      // Handle fatal error.
   121  //	   }
   122  //	}
   123  func (s *SchemaApplyer) EnsureTable(ctx context.Context, t Table, spec *bigquery.TableMetadata, options ...EnsureTableOption) error {
   124  	// Note: creating/updating the table inside GetOrCreate ensures that different
   125  	// goroutines do not attempt to create/update the same table concurrently.
   126  	cachedErr, err := s.cache.handle.LRU(ctx).GetOrCreate(ctx, t.FullyQualifiedName(), func() (error, time.Duration, error) {
   127  		if err := EnsureTable(ctx, t, spec, options...); err != nil {
   128  			if !transient.Tag.In(err) {
   129  				// Cache the fatal error for one minute.
   130  				return err, time.Minute, nil
   131  			}
   132  			return nil, 0, err
   133  		}
   134  		// Table is successfully ensured, remember for 5 minutes.
   135  		return nil, 5 * time.Minute, nil
   136  	})
   137  	if err != nil {
   138  		return err
   139  	}
   140  	return cachedErr
   141  }
   142  
   143  // ensureTableOpts captures options passed to EnsureTable(...).
   144  type ensureTableOpts struct {
   145  	// Whether metadata versioning is enabled and metadata
   146  	// updates can be rolled out, not just schema.
   147  	metadataVersioned bool
   148  
   149  	// Whether view definitions should be periodically refreshed
   150  	// so that indirect schema updates can be propogated to schema.
   151  	viewRefreshEnabled bool
   152  	// The view refresh interval.
   153  	viewRefreshInterval time.Duration
   154  }
   155  
   156  // EnsureTableOption defines an option passed to EnsureTable(...).
   157  type EnsureTableOption func(opts *ensureTableOpts)
   158  
   159  // RefreshViewInterval ensures the BigQuery view definition is
   160  // updated if it has not been updated for duration d. This is
   161  // to ensure indirect schema changes are propogated.
   162  //
   163  // Scenario:
   164  // You have a view defined with the SQL:
   165  //
   166  //	`SELECT * FROM base_table WHERE project = 'chromium'`.
   167  //
   168  // By default, schema changes to base_table will not be
   169  // reflected in the schema for the view (e.g. as seen in BigQuery UI).
   170  // This is a usability issue for users of the view.
   171  //
   172  // To cause indirect schema changes to propogate, when this
   173  // option is set, the view definition will be prefixed with a
   174  // one line comment like:
   175  // -- Indirect schema version: 2023-05-01T12:34:56Z
   176  //
   177  // The view (including comment) will be periodically refreshed
   178  // if duration d has elapsed, triggering BigQuery to refresh the
   179  // view schema.
   180  //
   181  // If this option is set but the table definition is not for
   182  // a view, an error will be returned by EnsureTable(...).
   183  func RefreshViewInterval(d time.Duration) EnsureTableOption {
   184  	return func(opts *ensureTableOpts) {
   185  		opts.viewRefreshEnabled = true
   186  		opts.viewRefreshInterval = d
   187  	}
   188  }
   189  
   190  // MetadataVersionKey is the label key used to version table
   191  // metadata. Increment the integer assigned to this label
   192  // to push updated table metadata.
   193  //
   194  // This label must be used in conjunction with the UpdateMetadata()
   195  // EnsureTable(...) option to have effect.
   196  //
   197  // The value assigned to this label must be a positive integer,
   198  // like "1" or "9127".
   199  //
   200  // See UpdateMetadata option for usage.
   201  const MetadataVersionKey = "metadata_version"
   202  
   203  // UpdateMetadata allows the non-schema metadata to be updated in
   204  // EnsureTable(...), namely the view definition, clustering settings,
   205  // description and labels.
   206  //
   207  // This option requires the caller to use the `MetadataVersionKey`
   208  // label to control metadata update rollouts.
   209  //
   210  // Usage:
   211  //
   212  // The table definition passed to EnsureTable(...) must define
   213  // a label with the key `MetadataVersionKey`. The value of
   214  // the label must be a positive integer. Incrementing the integer
   215  // will trigger an update of table metadata.
   216  //
   217  //	table := client.Dataset("my_dataset").Table("my_table")
   218  //	spec :=	&bigquery.TableMetadata{
   219  //	   ...
   220  //	   Labels: map[string]string {
   221  //	      // Increment to update table metadata.
   222  //	      MetadataVersionKey: "2",
   223  //	   }
   224  //	}
   225  //	err := EnsureTable(ctx, table, spec, UpdateMetadata())
   226  //
   227  // Rationale:
   228  // Without a system to control rollouts, if there are multiple
   229  // versions of the table metadata in production simultaneously
   230  // (e.g. in canary and stable deployments), an edit war may
   231  // ensue.
   232  //
   233  // Such an edit war scenario is not an issue when we update
   234  // schema only as columns are only added, never removed, so
   235  // schema will always converge to the union of all columns.
   236  func UpdateMetadata() EnsureTableOption {
   237  	return func(opts *ensureTableOpts) {
   238  		opts.metadataVersioned = true
   239  	}
   240  }
   241  
   242  // EnsureTable creates a BigQuery table if it doesn't exist and updates its
   243  // schema if it is stale.
   244  //
   245  // By default, non-schema metadata, like View Definition, Partitioning and
   246  // Clustering settings, will be applied if the table is being created but
   247  // will not be synchronised after creation.
   248  //
   249  // To synchronise more of the table metadata, including view definition,
   250  // description and labels, see the MetadataVersioned option.
   251  //
   252  // Existing fields will not be deleted.
   253  func EnsureTable(ctx context.Context, t Table, spec *bigquery.TableMetadata, options ...EnsureTableOption) error {
   254  	var opts ensureTableOpts
   255  	for _, apply := range options {
   256  		apply(&opts)
   257  	}
   258  	return ensureTable(ctx, t, spec, opts)
   259  }
   260  
   261  // ensureTable creates a BigQuery table if it doesn't exist and updates its
   262  // schema if it is stale.
   263  func ensureTable(ctx context.Context, t Table, spec *bigquery.TableMetadata, opts ensureTableOpts) error {
   264  	// If metadata versioning is enabled, confirm the caller has
   265  	// specified a version.
   266  	if opts.metadataVersioned && metadataVersion(spec.Labels) <= 0 {
   267  		return errMetadataVersionLabelMissing
   268  	}
   269  	if spec.ViewQuery == "" && opts.viewRefreshEnabled {
   270  		return errViewRefreshEnabledOnNonView
   271  	}
   272  
   273  	md, err := t.Metadata(ctx)
   274  	apiErr, ok := err.(*googleapi.Error)
   275  	switch {
   276  	case ok && apiErr.Code == http.StatusNotFound:
   277  		// Table doesn't exist. Create it now.
   278  		if err = createBQTable(ctx, t, spec); err != nil {
   279  			return errors.Annotate(err, "create bq table").Err()
   280  		}
   281  		return nil
   282  	case ok && apiErr.Code == http.StatusForbidden:
   283  		// No read table permission.
   284  		return err
   285  	case err != nil:
   286  		return transient.Tag.Apply(err)
   287  	}
   288  
   289  	// Table exists and is accessible.
   290  	// Ensure its specification is up to date.
   291  	if (md.Type == bigquery.ViewTable) != (spec.ViewQuery != "") {
   292  		// View without view query or non-View table with View query.
   293  		return ErrWrongTableKind
   294  	}
   295  
   296  	if err = ensureBQTable(ctx, t, spec, opts); err != nil {
   297  		return errors.Annotate(err, "ensure bq table").Err()
   298  	}
   299  	return nil
   300  }
   301  
   302  func createBQTable(ctx context.Context, t Table, spec *bigquery.TableMetadata) error {
   303  	err := t.Create(ctx, spec)
   304  	apiErr, ok := err.(*googleapi.Error)
   305  	switch {
   306  	case ok && apiErr.Code == http.StatusConflict:
   307  		// Table just got created. This is fine.
   308  		return nil
   309  	case ok && apiErr.Code == http.StatusForbidden:
   310  		// No create table permission.
   311  		return err
   312  	case err != nil:
   313  		return transient.Tag.Apply(err)
   314  	default:
   315  		logging.Infof(ctx, "Created BigQuery table %s", t.FullyQualifiedName())
   316  		return nil
   317  	}
   318  }
   319  
   320  // ensureBQTable updates the BigQuery table t to match specification spec.
   321  func ensureBQTable(ctx context.Context, t Table, spec *bigquery.TableMetadata, opts ensureTableOpts) error {
   322  	err := retry.Retry(ctx, transient.Only(retry.Default), func() error {
   323  		// We should retrieve Metadata in a retry loop because of the ETag check
   324  		// below.
   325  		md, err := t.Metadata(ctx)
   326  		if err != nil {
   327  			return err
   328  		}
   329  
   330  		var update bigquery.TableMetadataToUpdate
   331  		mutated := false
   332  
   333  		if md.Type != bigquery.ViewTable {
   334  			// Only consider Schema updates for tables that are not views.
   335  			combinedSchema, updated := appendMissing(md.Schema, spec.Schema)
   336  			if updated {
   337  				mutated = true
   338  				update.Schema = combinedSchema
   339  			}
   340  		}
   341  
   342  		// The following fields are subject of a possible edit war
   343  		// if there are multiple versions of the table specification
   344  		// deployed simultaneously (e.g. to canary and stable).
   345  		// Only perform them if there is a versioning scheme
   346  		// in place to prevent an edit war.
   347  		pushMetadata := opts.metadataVersioned && metadataVersion(spec.Labels) > metadataVersion(md.Labels)
   348  
   349  		if md.Type == bigquery.ViewTable {
   350  			// Update view query, if necessary.
   351  
   352  			existingQuery := parseViewQuery(md.ViewQuery)
   353  
   354  			now := clock.Now(ctx)
   355  			updateQueryContent := pushMetadata && spec.ViewQuery != existingQuery.query
   356  			isViewStale := opts.viewRefreshEnabled && now.Sub(existingQuery.lastIndirectSchemaUpdate) > opts.viewRefreshInterval
   357  
   358  			if updateQueryContent || isViewStale {
   359  				var newQuery viewQuery
   360  				if updateQueryContent {
   361  					newQuery.query = spec.ViewQuery
   362  				} else {
   363  					// Only update the indirect schema version header
   364  					// without changing the rest of the query. New
   365  					// query contents should only be rolled out with
   366  					// an uprev of the schema version.
   367  					newQuery.query = existingQuery.query
   368  				}
   369  
   370  				if opts.viewRefreshEnabled {
   371  					newQuery.lastIndirectSchemaUpdate = now
   372  				} else {
   373  					newQuery.lastIndirectSchemaUpdate = time.Time{}
   374  				}
   375  
   376  				update.ViewQuery = newQuery.SQL()
   377  				mutated = true
   378  			}
   379  		}
   380  		if pushMetadata && isClusteringDifferent(md.Clustering, spec.Clustering) {
   381  			// Update clustering.
   382  			update.Clustering = spec.Clustering
   383  			mutated = true
   384  		}
   385  		if pushMetadata && md.Description != spec.Description {
   386  			// Update description.
   387  			update.Description = spec.Description
   388  			mutated = true
   389  		}
   390  		setLabels, deleteLabels := diffLabels(md.Labels, spec.Labels)
   391  		if pushMetadata && (len(setLabels) > 0 || len(deleteLabels) > 0) {
   392  			// Update labels.
   393  			for k, v := range setLabels {
   394  				update.SetLabel(k, v)
   395  			}
   396  			for k := range deleteLabels {
   397  				update.DeleteLabel(k)
   398  			}
   399  			mutated = true
   400  		}
   401  
   402  		if !mutated {
   403  			// Nothing to update.
   404  			return nil
   405  		}
   406  
   407  		_, err = t.Update(ctx, update, md.ETag)
   408  		apiErr, ok := err.(*googleapi.Error)
   409  		switch {
   410  		case ok && apiErr.Code == http.StatusConflict:
   411  			// ETag became stale since we requested it. Try again.
   412  			return transient.Tag.Apply(err)
   413  
   414  		case err != nil:
   415  			return err
   416  
   417  		default:
   418  			logging.Infof(ctx, "Updated BigQuery table %s", t.FullyQualifiedName())
   419  			return nil
   420  		}
   421  	}, nil)
   422  
   423  	apiErr, ok := err.(*googleapi.Error)
   424  	switch {
   425  	case ok && apiErr.Code == http.StatusForbidden:
   426  		// No read or modify table permission.
   427  		return err
   428  	case err != nil:
   429  		return transient.Tag.Apply(err)
   430  	}
   431  	return nil
   432  }
   433  
   434  // appendMissing merges BigQuery schemas, adding to schema
   435  // those fields that only exist in newFields and returning
   436  // the result.
   437  // Schema merging happens recursively, e.g. so that missing
   438  // fields are added to struct-valued fields as well.
   439  func appendMissing(schema, newFields bigquery.Schema) (combinedSchema bigquery.Schema, updated bool) {
   440  	// Shallow copy schema to avoid changes propogating backwards
   441  	// to the caller via arguments.
   442  	combinedSchema = copySchema(schema)
   443  
   444  	updated = false
   445  	indexed := make(map[string]*bigquery.FieldSchema, len(combinedSchema))
   446  	for _, c := range combinedSchema {
   447  		indexed[c.Name] = c
   448  	}
   449  
   450  	for _, newField := range newFields {
   451  		if existingField := indexed[newField.Name]; existingField == nil {
   452  			// The field is missing.
   453  			combinedSchema = append(combinedSchema, newField)
   454  			updated = true
   455  		} else {
   456  			var didUpdate bool
   457  			existingField.Schema, didUpdate = appendMissing(existingField.Schema, newField.Schema)
   458  			if didUpdate {
   459  				updated = true
   460  			}
   461  		}
   462  	}
   463  	return combinedSchema, updated
   464  }
   465  
   466  // metadataVersion attempts to extract the metadata version from the
   467  // given table labels, and returns it if it is found.
   468  // If it cannot be found, or is invalid, zero (0) is returned.
   469  func metadataVersion(labels map[string]string) int64 {
   470  	if labels == nil {
   471  		// No labels. Invalid.
   472  		return 0
   473  	}
   474  	versionString := labels[MetadataVersionKey]
   475  	i, err := strconv.ParseInt(versionString, 10, 64)
   476  	if err != nil || i < 0 {
   477  		// No version string or negative version. Invalid.
   478  		return 0
   479  	}
   480  	if versionString != fmt.Sprintf("%v", i) {
   481  		// Integer is not in its canoncial string representation,
   482  		// e.g. "+1" instead of "1". Invalid.
   483  		return 0
   484  	}
   485  	return i
   486  }
   487  
   488  // copySchema creates a shallow copy of the existing schema.
   489  func copySchema(schema bigquery.Schema) bigquery.Schema {
   490  	if schema == nil {
   491  		// Preserve existing 'nil' schemas as nil instead of
   492  		// converting them to zero-length slices.
   493  		return nil
   494  	}
   495  	copy := make(bigquery.Schema, 0, len(schema))
   496  	for _, fieldSchema := range schema {
   497  		fieldCopy := *fieldSchema
   498  		copy = append(copy, &fieldCopy)
   499  	}
   500  	return copy
   501  }
   502  
   503  // diffLabels returns the difference between two set of BigQuery
   504  // labels, in terms of the updates that need to be applied to
   505  // currentLabels to reach newLabels:
   506  // - the set of new/updated labels
   507  // - the set of labels that should be deleted
   508  func diffLabels(currentLabels, newLabels map[string]string) (setLabels map[string]string, deleteLabels map[string]struct{}) {
   509  	setLabels = make(map[string]string)
   510  	deleteLabels = make(map[string]struct{})
   511  
   512  	// Identify new and updated labels in newLabels.
   513  	for k, v := range newLabels {
   514  		existingValue, ok := currentLabels[k]
   515  		if !(ok && existingValue == v) {
   516  			setLabels[k] = v
   517  		}
   518  	}
   519  	// Identify labels that were deleted (in currentLabels but not in newLabels).
   520  	for k := range currentLabels {
   521  		if _, ok := newLabels[k]; !ok {
   522  			deleteLabels[k] = struct{}{}
   523  		}
   524  	}
   525  	return setLabels, deleteLabels
   526  }
   527  
   528  // isClusteringDifferent returns whether clusterings settings a and b
   529  // are semantically different.
   530  func isClusteringDifferent(a, b *bigquery.Clustering) bool {
   531  	aLength := 0
   532  	if a != nil {
   533  		aLength = len(a.Fields)
   534  	}
   535  	bLength := 0
   536  	if b != nil {
   537  		bLength = len(b.Fields)
   538  	}
   539  	if aLength != bLength {
   540  		return true
   541  	}
   542  	for i := 0; i < aLength; i++ {
   543  		if !strings.EqualFold(a.Fields[i], b.Fields[i]) {
   544  			return true
   545  		}
   546  	}
   547  	return false
   548  }
   549  
   550  var indirectSchemaVersionRE = regexp.MustCompile(`^-- Indirect schema version: ([0-9\-:TZ]+)$`)
   551  
   552  // viewQuery is the logical representation of a SQL query
   553  // in a BigQuery view, separating the indirect schema version
   554  // header comment (if any) from the residual query content.
   555  type viewQuery struct {
   556  	// lastIndirectSchemaUpdate is the timestamp in the Indirect schema version
   557  	// header that may appear before the query, if any.
   558  	// If there is no such header, this is the zero time (time.Time{}).
   559  	lastIndirectSchemaUpdate time.Time
   560  	// query is the residual query content.
   561  	query string
   562  }
   563  
   564  // parseViewQuery parses the SQL of a view query, separating
   565  // the indirect schema version header from the rest of the
   566  // query content.
   567  func parseViewQuery(sql string) viewQuery {
   568  	lines := strings.Split(sql, "\n")
   569  
   570  	// Try to find the header in the first line of the SQL.
   571  	matches := indirectSchemaVersionRE.FindStringSubmatch(lines[0])
   572  	if len(matches) == 0 {
   573  		// No indirect schema version header found.
   574  		return viewQuery{
   575  			lastIndirectSchemaUpdate: time.Time{}, // Use zero time.
   576  			query:                    sql,
   577  		}
   578  	}
   579  
   580  	// Indirect schema version header is present.
   581  	timestampString := matches[1]
   582  	residualSQL := strings.Join(lines[1:], "\n")
   583  
   584  	lastUpdateTime, err := time.Parse(time.RFC3339, timestampString)
   585  	if err != nil {
   586  		// Invalid timestamp.
   587  		return viewQuery{
   588  			lastIndirectSchemaUpdate: time.Time{}, // Use zero time.
   589  			query:                    residualSQL,
   590  		}
   591  	}
   592  	return viewQuery{
   593  		lastIndirectSchemaUpdate: lastUpdateTime,
   594  		query:                    residualSQL,
   595  	}
   596  }
   597  
   598  // SQL returns the raw SQL representation of the view query.
   599  func (v viewQuery) SQL() string {
   600  	var b strings.Builder
   601  	if (v.lastIndirectSchemaUpdate != time.Time{}) {
   602  		b.WriteString(fmt.Sprintf("-- Indirect schema version: %s\n", v.lastIndirectSchemaUpdate.Format(time.RFC3339)))
   603  	}
   604  	b.WriteString(v.query)
   605  	return b.String()
   606  }