go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/clustering/state/span.go

go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/clustering/state/span.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package state
    16  
    17  import (
    18  	"context"
    19  	"encoding/hex"
    20  	"fmt"
    21  	"math"
    22  	"math/big"
    23  	"strings"
    24  	"time"
    25  
    26  	"cloud.google.com/go/spanner"
    27  
    28  	"go.chromium.org/luci/common/errors"
    29  	"go.chromium.org/luci/server/span"
    30  
    31  	"go.chromium.org/luci/analysis/internal/clustering"
    32  	cpb "go.chromium.org/luci/analysis/internal/clustering/proto"
    33  	"go.chromium.org/luci/analysis/internal/clustering/rules"
    34  	"go.chromium.org/luci/analysis/internal/config"
    35  	spanutil "go.chromium.org/luci/analysis/internal/span"
    36  	"go.chromium.org/luci/analysis/pbutil"
    37  )
    38  
    39  // Entry represents the clustering state of a chunk, consisting of:
    40  //   - Metadata about what test results were clustered.
    41  //   - Metadata about how the test results were clustered (the algorithms
    42  //     and failure association rules used).
    43  //   - The clusters each test result are in.
    44  type Entry struct {
    45  	// Project is the LUCI Project the chunk belongs to.
    46  	Project string
    47  	// ChunkID is the identity of the chunk of test results. 32 lowercase hexadecimal
    48  	// characters assigned by the ingestion process.
    49  	ChunkID string
    50  	// PartitionTime is the start of the retention period of the test results in the chunk.
    51  	PartitionTime time.Time
    52  	// ObjectID is the identity of the object in GCS containing the chunk's test results.
    53  	// 32 lowercase hexadecimal characters.
    54  	ObjectID string
    55  	// Clustering describes the latest clustering of test results in
    56  	// the chunk.
    57  	Clustering clustering.ClusterResults
    58  	// LastUpdated is the Spanner commit time the row was last updated. Output only.
    59  	LastUpdated time.Time
    60  }
    61  
    62  // NotFound is the error returned by Read if the row could not be found.
    63  var NotFoundErr = errors.New("clustering state row not found")
    64  
    65  // EndOfTable is the highest possible chunk ID that can be stored.
    66  var EndOfTable = strings.Repeat("ff", 16)
    67  
    68  // Create inserts clustering state for a chunk. Must be
    69  // called in the context of a Spanner transaction.
    70  func Create(ctx context.Context, e *Entry) error {
    71  	if err := validateEntry(e); err != nil {
    72  		return err
    73  	}
    74  	clusters, err := encodeClusters(e.Clustering.Algorithms, e.Clustering.Clusters)
    75  	if err != nil {
    76  		return err
    77  	}
    78  	ms := spanutil.InsertMap("ClusteringState", map[string]any{
    79  		"Project":           e.Project,
    80  		"ChunkID":           e.ChunkID,
    81  		"PartitionTime":     e.PartitionTime,
    82  		"ObjectID":          e.ObjectID,
    83  		"AlgorithmsVersion": e.Clustering.AlgorithmsVersion,
    84  		"ConfigVersion":     e.Clustering.ConfigVersion,
    85  		"RulesVersion":      e.Clustering.RulesVersion,
    86  		"Clusters":          clusters,
    87  		"LastUpdated":       spanner.CommitTimestamp,
    88  	})
    89  	span.BufferWrite(ctx, ms)
    90  	return nil
    91  }
    92  
    93  // ChunkKey represents the identify of a chunk.
    94  type ChunkKey struct {
    95  	Project string
    96  	ChunkID string
    97  }
    98  
    99  // String returns a string representation of the key, for use in
   100  // dictionaries.
   101  func (k ChunkKey) String() string {
   102  	return fmt.Sprintf("%q/%q", k.Project, k.ChunkID)
   103  }
   104  
   105  // ReadLastUpdated reads the last updated time of the specified chunks.
   106  // If the chunk does not exist, the zero time value time.Time{} is returned.
   107  // Unless an error is returned, the returned slice will be of the same length
   108  // as chunkIDs. The i-th LastUpdated time returned will correspond
   109  // to the i-th chunk ID requested.
   110  func ReadLastUpdated(ctx context.Context, keys []ChunkKey) ([]time.Time, error) {
   111  	var ks []spanner.Key
   112  	for _, key := range keys {
   113  		ks = append(ks, spanner.Key{key.Project, key.ChunkID})
   114  	}
   115  
   116  	results := make(map[string]time.Time)
   117  	columns := []string{"Project", "ChunkID", "LastUpdated"}
   118  	it := span.Read(ctx, "ClusteringState", spanner.KeySetFromKeys(ks...), columns)
   119  	err := it.Do(func(r *spanner.Row) error {
   120  		var project string
   121  		var chunkID string
   122  		var lastUpdated time.Time
   123  		if err := r.Columns(&project, &chunkID, &lastUpdated); err != nil {
   124  			return errors.Annotate(err, "read clustering state row").Err()
   125  		}
   126  		key := ChunkKey{project, chunkID}
   127  		results[key.String()] = lastUpdated
   128  		return nil
   129  	})
   130  	if err != nil {
   131  		return nil, err
   132  	}
   133  	result := make([]time.Time, len(keys))
   134  	for i, key := range keys {
   135  		// If an entry does not exist in results, this will set the
   136  		// default value for *time.Time, which is nil.
   137  		result[i] = results[key.String()]
   138  	}
   139  	return result, nil
   140  }
   141  
   142  // UpdateClustering updates the clustering results on a chunk.
   143  //
   144  // To avoid clobbering other concurrent updates, the caller should read
   145  // the LastUpdated time of the chunk in the same transaction as it is
   146  // updated (i.e. using ReadLastUpdated) and verify it matches the previous
   147  // entry passed.
   148  //
   149  // The update uses the previous entry to avoid writing cluster data
   150  // if it has not changed, which optimises the performance of minor
   151  // reclusterings.
   152  func UpdateClustering(ctx context.Context, previous *Entry, update *clustering.ClusterResults) error {
   153  	if err := validateClusterResults(update); err != nil {
   154  		return err
   155  	}
   156  
   157  	upd := make(map[string]any)
   158  	upd["Project"] = previous.Project
   159  	upd["ChunkID"] = previous.ChunkID
   160  	upd["LastUpdated"] = spanner.CommitTimestamp
   161  	upd["AlgorithmsVersion"] = update.AlgorithmsVersion
   162  	upd["ConfigVersion"] = update.ConfigVersion
   163  	upd["RulesVersion"] = update.RulesVersion
   164  
   165  	if !clustering.AlgorithmsAndClustersEqual(&previous.Clustering, update) {
   166  		// Clusters is a field that may be many kilobytes in size.
   167  		// For efficiency, only write it to Spanner if it is changed.
   168  		clusters, err := encodeClusters(update.Algorithms, update.Clusters)
   169  		if err != nil {
   170  			return err
   171  		}
   172  		upd["Clusters"] = clusters
   173  	}
   174  
   175  	span.BufferWrite(ctx, spanutil.UpdateMap("ClusteringState", upd))
   176  	return nil
   177  }
   178  
   179  // Read reads clustering state for a chunk. Must be
   180  // called in the context of a Spanner transaction. If no clustering
   181  // state exists, the method returns the error NotFound.
   182  func Read(ctx context.Context, project, chunkID string) (*Entry, error) {
   183  	whereClause := "ChunkID = @chunkID"
   184  	params := make(map[string]any)
   185  	params["chunkID"] = chunkID
   186  
   187  	limit := 1
   188  	results, err := readWhere(ctx, project, whereClause, params, limit)
   189  	if err != nil {
   190  		return nil, err
   191  	}
   192  	if len(results) == 0 {
   193  		// Row does not exist.
   194  		return nil, NotFoundErr
   195  	}
   196  	return results[0], nil
   197  }
   198  
   199  // ReadNextOptions specifies options for ReadNextN.
   200  type ReadNextOptions struct {
   201  	// The exclusive lower bound of the range of ChunkIDs to read.
   202  	// To read from the start of the table, leave this blank ("").
   203  	StartChunkID string
   204  	// The inclusive upper bound of the range of ChunkIDs to read.
   205  	// To specify the end of the table, use the constant EndOfTable.
   206  	EndChunkID string
   207  	// The minimum AlgorithmsVersion that re-clustering wants to achieve.
   208  	// If a row has an AlgorithmsVersion less than this value, it will
   209  	// be eligble to be read.
   210  	AlgorithmsVersion int64
   211  	// The minimum ConfigVersion that re-clustering wants to achieve.
   212  	// If a row has an RulesVersion less than this value, it will
   213  	// be eligble to be read.
   214  	ConfigVersion time.Time
   215  	// The minimum RulesVersion that re-clustering wants to achieve.
   216  	// If a row has an RulesVersion less than this value, it will
   217  	// be eligble to be read.
   218  	RulesVersion time.Time
   219  }
   220  
   221  // ReadNextN reads the n consecutively next clustering state entries
   222  // matching ReadNextOptions.
   223  func ReadNextN(ctx context.Context, project string, opts ReadNextOptions, n int) ([]*Entry, error) {
   224  	params := make(map[string]any)
   225  	whereClause := `
   226  		ChunkId > @startChunkID AND ChunkId <= @endChunkID
   227  		AND (AlgorithmsVersion < @algorithmsVersion
   228  			OR ConfigVersion < @configVersion
   229  			OR RulesVersion < @rulesVersion)
   230  	`
   231  	params["startChunkID"] = opts.StartChunkID
   232  	params["endChunkID"] = opts.EndChunkID
   233  	params["algorithmsVersion"] = opts.AlgorithmsVersion
   234  	params["configVersion"] = opts.ConfigVersion
   235  	params["rulesVersion"] = opts.RulesVersion
   236  
   237  	return readWhere(ctx, project, whereClause, params, n)
   238  }
   239  
   240  func readWhere(ctx context.Context, project, whereClause string, params map[string]any, limit int) ([]*Entry, error) {
   241  	stmt := spanner.NewStatement(`
   242  		SELECT
   243  		  ChunkId, PartitionTime, ObjectId,
   244  		  AlgorithmsVersion,
   245  		  ConfigVersion, RulesVersion,
   246  		  LastUpdated, Clusters
   247  		FROM ClusteringState
   248  		WHERE Project = @project AND (` + whereClause + `)
   249  		ORDER BY ChunkId
   250  		LIMIT @limit
   251  	`)
   252  	for k, v := range params {
   253  		stmt.Params[k] = v
   254  	}
   255  	stmt.Params["project"] = project
   256  	stmt.Params["limit"] = limit
   257  
   258  	it := span.Query(ctx, stmt)
   259  	var b spanutil.Buffer
   260  	results := []*Entry{}
   261  	err := it.Do(func(r *spanner.Row) error {
   262  		clusters := &cpb.ChunkClusters{}
   263  		result := &Entry{Project: project}
   264  
   265  		err := b.FromSpanner(r,
   266  			&result.ChunkID, &result.PartitionTime, &result.ObjectID,
   267  			&result.Clustering.AlgorithmsVersion,
   268  			&result.Clustering.ConfigVersion, &result.Clustering.RulesVersion,
   269  			&result.LastUpdated, clusters)
   270  		if err != nil {
   271  			return errors.Annotate(err, "read clustering state row").Err()
   272  		}
   273  
   274  		result.Clustering.Algorithms, result.Clustering.Clusters, err = decodeClusters(clusters)
   275  		if err != nil {
   276  			return errors.Annotate(err, "decode clusters").Err()
   277  		}
   278  		results = append(results, result)
   279  		return nil
   280  	})
   281  	if err != nil {
   282  		return nil, err
   283  	}
   284  	return results, nil
   285  }
   286  
   287  // ReadProjects read all distinct projects with a clustering state entry..
   288  func ReadProjects(ctx context.Context) ([]string, error) {
   289  	stmt := spanner.NewStatement(`
   290  		SELECT Project
   291  		FROM ClusteringState
   292  		GROUP BY Project
   293  	`)
   294  	it := span.Query(ctx, stmt)
   295  	var projects []string
   296  	err := it.Do(func(r *spanner.Row) error {
   297  		var project string
   298  		if err := r.Columns(&project); err != nil {
   299  			return errors.Annotate(err, "read project row").Err()
   300  		}
   301  		projects = append(projects, project)
   302  		return nil
   303  	})
   304  	if err != nil {
   305  		return nil, err
   306  	}
   307  	return projects, nil
   308  }
   309  
   310  // EstimateChunks estimates the total number of chunks in the ClusteringState
   311  // table for the given project.
   312  func EstimateChunks(ctx context.Context, project string) (int, error) {
   313  	stmt := spanner.NewStatement(`
   314  	  SELECT ChunkId
   315  	  FROM ClusteringState
   316  	  WHERE Project = @project
   317  	  ORDER BY ChunkId ASC
   318  	  LIMIT 1 OFFSET 100
   319  	`)
   320  	stmt.Params["project"] = project
   321  
   322  	it := span.Query(ctx, stmt)
   323  	var chunkID string
   324  	err := it.Do(func(r *spanner.Row) error {
   325  		if err := r.Columns(&chunkID); err != nil {
   326  			return errors.Annotate(err, "read ChunkID row").Err()
   327  		}
   328  		return nil
   329  	})
   330  	if err != nil {
   331  		return 0, err
   332  	}
   333  	if chunkID == "" {
   334  		// There was no 100th chunk ID. There must be less
   335  		// than 100 chunks in the project.
   336  		return 99, nil
   337  	}
   338  	return estimateChunksFromID(chunkID)
   339  }
   340  
   341  // estimateChunksFromID estimates the number of chunks in a project
   342  // given the ID of the 100th chunk (in ascending keyspace order) in
   343  // that project. The maximum estimate that will be returned is one
   344  // billion. If there is no 100th chunk ID in the project, then
   345  // there are clearly 99 chunks or less in the project.
   346  func estimateChunksFromID(chunkID100 string) (int, error) {
   347  	const MaxEstimate = 1000 * 1000 * 1000
   348  	// This function uses the property that ChunkIDs are approximately
   349  	// uniformly distributed. We use the following estimator of the
   350  	// number of rows:
   351  	//   100 / (fraction of keyspace used up to 100th row)
   352  	// where fraction of keyspace used up to 100th row is:
   353  	//   (ChunkID_100th + 1) / 2^128
   354  	//
   355  	// Where ChunkID_100th is the ChunkID of the 100th row (in keyspace
   356  	// order), as a 128-bit integer (rather than hexadecimal string).
   357  	//
   358  	// Rearranging this estimator, we get:
   359  	//   100 * 2^128 / (ChunkID_100th + 1)
   360  
   361  	// numerator = 100 * 2 ^ 128
   362  	var numerator big.Int
   363  	numerator.Lsh(big.NewInt(100), 128)
   364  
   365  	idBytes, err := hex.DecodeString(chunkID100)
   366  	if err != nil {
   367  		return 0, err
   368  	}
   369  
   370  	// denominator = ChunkID_100th + 1. We add one because
   371  	// the keyspace consumed includes the ID itself.
   372  	var denominator big.Int
   373  	denominator.SetBytes(idBytes)
   374  	denominator.Add(&denominator, big.NewInt(1))
   375  
   376  	// estimate = numerator / denominator.
   377  	var estimate big.Int
   378  	estimate.Div(&numerator, &denominator)
   379  
   380  	result := uint64(math.MaxUint64)
   381  	if estimate.IsUint64() {
   382  		result = estimate.Uint64()
   383  	}
   384  	if result > MaxEstimate {
   385  		result = MaxEstimate
   386  	}
   387  	return int(result), nil
   388  }
   389  
   390  func validateEntry(e *Entry) error {
   391  	if err := pbutil.ValidateProject(e.Project); err != nil {
   392  		return errors.Annotate(err, "project").Err()
   393  	}
   394  	switch {
   395  	case !clustering.ChunkRe.MatchString(e.ChunkID):
   396  		return fmt.Errorf("chunk ID %q is not valid", e.ChunkID)
   397  	case e.PartitionTime.IsZero():
   398  		return errors.New("partition time must be specified")
   399  	case e.ObjectID == "":
   400  		return errors.New("object ID must be specified")
   401  	default:
   402  		if err := validateClusterResults(&e.Clustering); err != nil {
   403  			return err
   404  		}
   405  		return nil
   406  	}
   407  }
   408  
   409  func validateClusterResults(c *clustering.ClusterResults) error {
   410  	switch {
   411  	case c.AlgorithmsVersion <= 0:
   412  		return errors.New("algorithms version must be specified")
   413  	case c.ConfigVersion.Before(config.StartingEpoch):
   414  		return errors.New("config version must be valid")
   415  	case c.RulesVersion.Before(rules.StartingEpoch):
   416  		return errors.New("rules version must be valid")
   417  	default:
   418  		if err := validateAlgorithms(c.Algorithms); err != nil {
   419  			return errors.Annotate(err, "algorithms").Err()
   420  		}
   421  		if err := validateClusters(c.Clusters, c.Algorithms); err != nil {
   422  			return errors.Annotate(err, "clusters").Err()
   423  		}
   424  		return nil
   425  	}
   426  }
   427  
   428  func validateAlgorithms(algorithms map[string]struct{}) error {
   429  	for a := range algorithms {
   430  		if !clustering.AlgorithmRe.MatchString(a) {
   431  			return fmt.Errorf("algorithm %q is not valid", a)
   432  		}
   433  	}
   434  	return nil
   435  }
   436  
   437  func validateClusters(clusters [][]clustering.ClusterID, algorithms map[string]struct{}) error {
   438  	if len(clusters) == 0 {
   439  		// Each chunk must have at least one test result, even
   440  		// if that test result is in no clusters.
   441  		return errors.New("there must be clustered test results in the chunk")
   442  	}
   443  	// Outer slice has on entry per test result.
   444  	for i, tr := range clusters {
   445  		// Inner slice has the list of clusters per test result.
   446  		for j, c := range tr {
   447  			if _, ok := algorithms[c.Algorithm]; !ok {
   448  				return fmt.Errorf("test result %v: cluster %v: algorithm not in algorithms list: %q", i, j, c.Algorithm)
   449  			}
   450  			if err := c.ValidateIDPart(); err != nil {
   451  				return errors.Annotate(err, "test result %v: cluster %v: cluster ID is not valid", i, j).Err()
   452  			}
   453  		}
   454  		if !clustering.ClustersAreSortedNoDuplicates(tr) {
   455  			return fmt.Errorf("test result %v: clusters are not sorted, or there are duplicates: %v", i, tr)
   456  		}
   457  	}
   458  	return nil
   459  }