go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/invocations/graph/reachable.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package graph contains methods to explore reachable invocations.
    16  package graph
    17  
    18  import (
    19  	"google.golang.org/protobuf/proto"
    20  
    21  	"go.chromium.org/luci/common/errors"
    22  	"go.chromium.org/luci/resultdb/internal/invocations"
    23  	internalpb "go.chromium.org/luci/resultdb/internal/proto"
    24  	"go.chromium.org/luci/resultdb/internal/spanutil"
    25  	pb "go.chromium.org/luci/resultdb/proto/v1"
    26  )
    27  
    28  // ReachableInvocation contains summary information about a reachable
    29  // invocation.
    30  type ReachableInvocation struct {
    31  	// HasTestResults stores whether the invocation has any test results.
    32  	HasTestResults bool
    33  	// HasTestResults stores whether the invocation has any test exonerations.
    34  	HasTestExonerations bool
    35  	// The realm of the invocation.
    36  	Realm string
    37  	// The source associated with the invocation, which can be looked up in
    38  	// ReachableInvocations.Sources.
    39  	// If no sources could be resolved, this is EmptySourceHash.
    40  	SourceHash SourceHash
    41  }
    42  
    43  // ReachableInvocations is a set of reachable invocations,
    44  // including summary information about each invocation.
    45  // The set includes the root invocation(s) from which reachables were
    46  // explored.
    47  type ReachableInvocations struct {
    48  	// The set of reachable invocations, including the root
    49  	// invocation from which reachability was explored.
    50  	Invocations map[invocations.ID]ReachableInvocation
    51  	// The distinct code sources in the reachable invocation graph.
    52  	// Stored here rather than on the invocations themselves to
    53  	// simplify deduplicating sources objects as many will be the
    54  	// same between invocations.
    55  	Sources map[SourceHash]*pb.Sources
    56  }
    57  
    58  func NewReachableInvocations() ReachableInvocations {
    59  	return ReachableInvocations{
    60  		Invocations: make(map[invocations.ID]ReachableInvocation),
    61  		Sources:     make(map[SourceHash]*pb.Sources),
    62  	}
    63  }
    64  
    65  // Union adds other reachable invocations.
    66  func (r *ReachableInvocations) Union(other ReachableInvocations) {
    67  	for id, invocation := range other.Invocations {
    68  		r.Invocations[id] = invocation
    69  	}
    70  	for id, sources := range other.Sources {
    71  		r.Sources[id] = sources
    72  	}
    73  }
    74  
    75  // Batches splits s into batches.
    76  // The batches are sorted by RowID(), such that interval (minRowID, maxRowID)
    77  // of each batch does not overlap with any other batch.
    78  //
    79  // The size of batch is hardcoded 50, because that's the maximum parallelism
    80  // we get from Cloud Spanner.
    81  func (r ReachableInvocations) Batches() []ReachableInvocations {
    82  	return r.batches(50)
    83  }
    84  
    85  // IDSet returns the set of invocation IDs included in the list of
    86  // reachable invocations.
    87  func (r ReachableInvocations) IDSet() (invocations.IDSet, error) {
    88  	// Yes, this is an artificial limit.  With 20,000 invocations you are already likely
    89  	// to run into problems if you try to process all of these in one go (e.g. in a
    90  	// Spanner query).  If you want more, use the batched call and handle a batch at a time.
    91  	if len(r.Invocations) > MaxNodes {
    92  		return nil, errors.Reason("more than %d invocations match", MaxNodes).Tag(TooManyTag).Err()
    93  	}
    94  	return r.idSetNoLimit(), nil
    95  }
    96  
    97  // IDSet returns the set of invocation IDs included in the list of
    98  // reachable invocations.  This internal only version has no limit and
    99  // is only for use where the limit will be checked in other ways.
   100  func (r ReachableInvocations) idSetNoLimit() invocations.IDSet {
   101  	result := make(invocations.IDSet, len(r.Invocations))
   102  	for id := range r.Invocations {
   103  		result[id] = struct{}{}
   104  	}
   105  	return result
   106  }
   107  
   108  // WithTestResultsIDSet returns the set of invocation IDs
   109  // that contain test results.
   110  func (r ReachableInvocations) WithTestResultsIDSet() (invocations.IDSet, error) {
   111  	result := make(invocations.IDSet, len(r.Invocations))
   112  	for id, inv := range r.Invocations {
   113  		if inv.HasTestResults {
   114  			result[id] = struct{}{}
   115  		}
   116  	}
   117  	// Yes, this is an artificial limit.  With 20,000 invocations you are already likely
   118  	// to run into problems if you try to process all of these in one go (e.g. in a
   119  	// Spanner query).  If you want more, use the batched call and handle a batch at a time.
   120  	if len(result) > MaxNodes {
   121  		return nil, errors.Reason("more than %d invocations match", MaxNodes).Tag(TooManyTag).Err()
   122  	}
   123  	return result, nil
   124  }
   125  
   126  // WithExonerationsIDSet returns the set of invocation IDs
   127  // that contain test exonerations.
   128  func (r ReachableInvocations) WithExonerationsIDSet() (invocations.IDSet, error) {
   129  	result := make(invocations.IDSet, len(r.Invocations))
   130  	for id, inv := range r.Invocations {
   131  		if inv.HasTestExonerations {
   132  			result[id] = struct{}{}
   133  		}
   134  	}
   135  	// Yes, this is an artificial limit.  With 20,000 invocations you are already likely
   136  	// to run into problems if you try to process all of these in one go (e.g. in a
   137  	// Spanner query).  If you want more, use the batched call and handle a batch at a time.
   138  	if len(result) > MaxNodes {
   139  		return nil, errors.Reason("more than %d invocations match", MaxNodes).Tag(TooManyTag).Err()
   140  	}
   141  	return result, nil
   142  }
   143  
   144  func (r ReachableInvocations) batches(size int) []ReachableInvocations {
   145  	ids := r.idSetNoLimit().SortByRowID()
   146  	batches := make([]ReachableInvocations, 0, 1+len(ids)/size)
   147  	for len(ids) > 0 {
   148  		batchSize := size
   149  		if batchSize > len(ids) {
   150  			batchSize = len(ids)
   151  		}
   152  		batch := NewReachableInvocations()
   153  		for _, id := range ids[:batchSize] {
   154  			inv := r.Invocations[id]
   155  			batch.Invocations[id] = inv
   156  			if inv.SourceHash != EmptySourceHash {
   157  				batch.Sources[inv.SourceHash] = r.Sources[inv.SourceHash]
   158  			}
   159  		}
   160  		batches = append(batches, batch)
   161  		ids = ids[batchSize:]
   162  	}
   163  	return batches
   164  }
   165  
   166  // marshal marshals the ReachableInvocations into a Redis value.
   167  func (r ReachableInvocations) marshal() ([]byte, error) {
   168  	if len(r.Invocations) == 0 {
   169  		return nil, errors.Reason("reachable invocations is invalid; at minimum the root invocation itself should be included").Err()
   170  	}
   171  
   172  	indexBySourceHash := make(map[SourceHash]int)
   173  	distinctSources := make([]*pb.Sources, 0, len(r.Sources))
   174  	for id, source := range r.Sources {
   175  		distinctSources = append(distinctSources, source)
   176  		indexBySourceHash[id] = len(distinctSources) - 1
   177  	}
   178  
   179  	invocations := make([]*internalpb.ReachableInvocations_ReachableInvocation, 0, len(r.Invocations))
   180  	for id, inv := range r.Invocations {
   181  		proto := &internalpb.ReachableInvocations_ReachableInvocation{
   182  			InvocationId:        string(id),
   183  			HasTestResults:      inv.HasTestResults,
   184  			HasTestExonerations: inv.HasTestExonerations,
   185  			Realm:               inv.Realm,
   186  		}
   187  		if inv.SourceHash != EmptySourceHash {
   188  			proto.SourceOffset = int64(indexBySourceHash[inv.SourceHash]) + 1
   189  		}
   190  		invocations = append(invocations, proto)
   191  	}
   192  	message := &internalpb.ReachableInvocations{
   193  		Invocations: invocations,
   194  		Sources:     distinctSources,
   195  	}
   196  	result, err := proto.Marshal(message)
   197  	if err != nil {
   198  		return nil, err
   199  	}
   200  	return spanutil.Compress(result), nil
   201  }
   202  
   203  // unmarshalReachableInvocations unmarshals the ReachableInvocations from a Redis value.
   204  func unmarshalReachableInvocations(value []byte) (ReachableInvocations, error) {
   205  	// Assume 2x growth on decompression (will be resized as needed)
   206  	decompressed := make([]byte, 0, len(value)*2)
   207  	decompressed, err := spanutil.Decompress(value, decompressed)
   208  	if err != nil {
   209  		return ReachableInvocations{}, err
   210  	}
   211  
   212  	message := &internalpb.ReachableInvocations{}
   213  	if err := proto.Unmarshal(decompressed, message); err != nil {
   214  		return ReachableInvocations{}, err
   215  	}
   216  
   217  	sourceHashByIndex := make([]SourceHash, len(message.Sources))
   218  	sources := make(map[SourceHash]*pb.Sources)
   219  	for i, source := range message.Sources {
   220  		hash := HashSources(source)
   221  		sources[hash] = source
   222  		sourceHashByIndex[i] = hash
   223  	}
   224  
   225  	invs := make(map[invocations.ID]ReachableInvocation, len(message.Invocations))
   226  	for _, entry := range message.Invocations {
   227  		inv := ReachableInvocation{
   228  			HasTestResults:      entry.HasTestResults,
   229  			HasTestExonerations: entry.HasTestExonerations,
   230  			Realm:               entry.Realm,
   231  			SourceHash:          EmptySourceHash,
   232  		}
   233  		if entry.SourceOffset > 0 {
   234  			inv.SourceHash = sourceHashByIndex[entry.SourceOffset-1]
   235  		}
   236  		invs[invocations.ID(entry.InvocationId)] = inv
   237  	}
   238  	return ReachableInvocations{
   239  		Invocations: invs,
   240  		Sources:     sources,
   241  	}, nil
   242  }