go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/artifacts/query.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package artifacts
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"strings"
    21  	"text/template"
    22  
    23  	"cloud.google.com/go/spanner"
    24  
    25  	"go.chromium.org/luci/resultdb/internal/invocations"
    26  	"go.chromium.org/luci/resultdb/internal/pagination"
    27  	"go.chromium.org/luci/resultdb/internal/spanutil"
    28  	"go.chromium.org/luci/resultdb/internal/testresults"
    29  	"go.chromium.org/luci/resultdb/pbutil"
    30  	pb "go.chromium.org/luci/resultdb/proto/v1"
    31  )
    32  
    33  var followAllEdges = &pb.ArtifactPredicate_EdgeTypeSet{
    34  	IncludedInvocations: true,
    35  	TestResults:         true,
    36  }
    37  
    38  // Query specifies artifacts to fetch.
    39  type Query struct {
    40  	InvocationIDs       invocations.IDSet
    41  	ParentIDRegexp      string
    42  	FollowEdges         *pb.ArtifactPredicate_EdgeTypeSet
    43  	TestResultPredicate *pb.TestResultPredicate
    44  	ContentTypeRegexp   string
    45  	ArtifactIDRegexp    string
    46  	PageSize            int // must be positive
    47  	PageToken           string
    48  	WithRBECASHash      bool
    49  	WithGcsURI          bool
    50  }
    51  
    52  // Artifact contains pb.Artifact and its RBECAS hash.
    53  type Artifact struct {
    54  	*pb.Artifact
    55  	RBECASHash string
    56  }
    57  
    58  // tmplQueryArtifacts is a template for the SQL expression that queries
    59  // artifacts. See also ArtifactQuery.
    60  var tmplQueryArtifacts = template.Must(template.New("artifactQuery").Parse(`
    61  @{USE_ADDITIONAL_PARALLELISM=TRUE}
    62  WITH VariantsWithUnexpectedResults AS (
    63  	SELECT DISTINCT TestId, VariantHash
    64  	FROM TestResults@{FORCE_INDEX=UnexpectedTestResults, spanner_emulator.disable_query_null_filtered_index_check=true}
    65  	WHERE IsUnexpected AND InvocationId IN UNNEST(@invIDs)
    66  ),
    67  VariantsWithUnexpectedResultsOnly AS (
    68  	SELECT TestId, VariantHash
    69  	FROM VariantsWithUnexpectedResults vur
    70  		JOIN@{FORCE_JOIN_ORDER=TRUE, JOIN_METHOD=HASH_JOIN} TestResults tr
    71  			USING (TestId, VariantHash)
    72  	WHERE InvocationId IN UNNEST(@invIDs)
    73  	GROUP BY TestId, VariantHash
    74  	HAVING LOGICAL_AND(IFNULL(IsUnexpected, false))
    75  ),
    76  FilteredTestResults AS (
    77  	SELECT InvocationId, FORMAT("tr/%s/%s", TestId, ResultId) as ParentId
    78  	FROM
    79  	{{ if .InterestingTestResults }}
    80  		VariantsWithUnexpectedResults vur
    81  		JOIN@{FORCE_JOIN_ORDER=TRUE, JOIN_METHOD=HASH_JOIN} TestResults tr
    82  			USING (TestId, VariantHash)
    83  	{{ else if .OnlyUnexpectedTestResults }}
    84  		VariantsWithUnexpectedResultsOnly vuro
    85  		JOIN@{FORCE_JOIN_ORDER=TRUE, JOIN_METHOD=HASH_JOIN} TestResults tr
    86  			USING (TestId, VariantHash)
    87  	{{ else }}
    88  		TestResults tr
    89  	{{ end }}
    90  	WHERE InvocationId IN UNNEST(@invIDs)
    91  {{ if .Params.variantHashEquals }}
    92  		AND tr.VariantHash = @variantHashEquals
    93  {{ end }}
    94  {{ if .Params.variantContains }}
    95  		AND (SELECT LOGICAL_AND(kv IN UNNEST(Variant)) FROM UNNEST(@variantContains) kv)
    96  {{ end }}
    97  )
    98  SELECT InvocationId, ParentId, ArtifactId, ContentType, Size,
    99  {{ if .Q.WithRBECASHash }}
   100  	RBECASHash,
   101  {{ end }}
   102  {{ if .Q.WithGcsURI }}
   103  	GcsURI
   104  {{ end }}
   105  FROM Artifacts art
   106  {{ if .JoinWithTestResults }}
   107  LEFT JOIN FilteredTestResults tr USING (InvocationId, ParentId)
   108  {{ end }}
   109  WHERE art.InvocationId IN UNNEST(@invIDs)
   110  {{ if .Params.afterInvocationId }}
   111  	# Skip artifacts after the one specified in the page token.
   112  	AND (
   113  		(art.InvocationId > @afterInvocationId) OR
   114  		(art.InvocationId = @afterInvocationId AND art.ParentId > @afterParentId) OR
   115  		(art.InvocationId = @afterInvocationId AND art.ParentId = @afterParentId AND art.ArtifactId > @afterArtifactId)
   116  	)
   117  {{ end }}
   118  {{ if .Params.ParentIdRegexp }}
   119  	AND REGEXP_CONTAINS(art.ParentId, @ParentIdRegexp)
   120  {{end}}
   121  {{ if .JoinWithTestResults }} AND (art.ParentId = "" OR tr.ParentId IS NOT NULL) {{ end }}
   122  {{ if .Params.contentTypeRegexp }}
   123  		AND REGEXP_CONTAINS(IFNULL(art.ContentType, ""), @contentTypeRegexp)
   124  {{ end }}
   125  {{ if .Params.artifactIdRegexp }}
   126  		AND REGEXP_CONTAINS(IFNULL(art.ArtifactID, ""), @artifactIdRegexp)
   127  {{ end }}
   128  ORDER BY InvocationId, ParentId, ArtifactId
   129  {{ if gt .Q.PageSize 0 }} LIMIT @limit {{ end }}
   130  `))
   131  
   132  // genStmt generates a spanner statement and returns it without executing it.
   133  func (q *Query) genStmt(ctx context.Context) (spanner.Statement, error) {
   134  	if q.PageSize < 0 {
   135  		panic("PageSize < 0")
   136  	}
   137  
   138  	// Prepare query params.
   139  	params := map[string]any{}
   140  	params["invIDs"] = q.InvocationIDs
   141  	params["limit"] = q.PageSize
   142  	addREParamMaybe(params, "contentTypeRegexp", q.ContentTypeRegexp)
   143  	addREParamMaybe(params, "artifactIdRegexp", q.ArtifactIDRegexp)
   144  	addREParamMaybe(params, "ParentIdRegexp", q.parentIDRegexp())
   145  
   146  	if err := invocations.TokenToMap(q.PageToken, params, "afterInvocationId", "afterParentId", "afterArtifactId"); err != nil {
   147  		return spanner.Statement{}, err
   148  	}
   149  
   150  	testresults.PopulateVariantParams(params, q.TestResultPredicate.GetVariant())
   151  
   152  	// Prepeare statement generation input.
   153  	var input struct {
   154  		JoinWithTestResults       bool
   155  		InterestingTestResults    bool
   156  		OnlyUnexpectedTestResults bool
   157  		Q                         *Query
   158  		Params                    map[string]any
   159  	}
   160  	input.Params = params
   161  	// If we need to filter artifacts by attributes of test results, then
   162  	// join with test results table.
   163  	if q.FollowEdges == nil || q.FollowEdges.TestResults {
   164  		input.JoinWithTestResults = q.TestResultPredicate.GetVariant() != nil
   165  		switch q.TestResultPredicate.GetExpectancy() {
   166  		case pb.TestResultPredicate_VARIANTS_WITH_UNEXPECTED_RESULTS:
   167  			input.JoinWithTestResults = true
   168  			input.InterestingTestResults = true
   169  		case pb.TestResultPredicate_VARIANTS_WITH_ONLY_UNEXPECTED_RESULTS:
   170  			input.JoinWithTestResults = true
   171  			input.OnlyUnexpectedTestResults = true
   172  		}
   173  	}
   174  	input.Q = q
   175  
   176  	st, err := spanutil.GenerateStatement(tmplQueryArtifacts, input)
   177  	st.Params = params
   178  	return st, err
   179  }
   180  
   181  func (q *Query) run(ctx context.Context, f func(*Artifact) error) (err error) {
   182  	st, err := q.genStmt(ctx)
   183  	if err != nil {
   184  		return err
   185  	}
   186  	var b spanutil.Buffer
   187  	return spanutil.Query(ctx, st, func(row *spanner.Row) error {
   188  		a := &Artifact{
   189  			Artifact: &pb.Artifact{},
   190  		}
   191  		var invID invocations.ID
   192  		var parentID string
   193  		var contentType spanner.NullString
   194  		var size spanner.NullInt64
   195  		var rbecasHash spanner.NullString
   196  		var gcsURI spanner.NullString
   197  
   198  		ptrs := []any{
   199  			&invID, &parentID, &a.ArtifactId, &contentType, &size,
   200  		}
   201  		if q.WithRBECASHash {
   202  			ptrs = append(ptrs, &rbecasHash)
   203  		}
   204  		if q.WithGcsURI {
   205  			ptrs = append(ptrs, &gcsURI)
   206  		}
   207  		if err := b.FromSpanner(row, ptrs...); err != nil {
   208  			return err
   209  		}
   210  
   211  		// Initialize artifact name.
   212  		switch testID, resultID, err := ParseParentID(parentID); {
   213  		case err != nil:
   214  			return err
   215  		case testID == "":
   216  			a.Name = pbutil.InvocationArtifactName(string(invID), a.ArtifactId)
   217  		default:
   218  			a.Name = pbutil.TestResultArtifactName(string(invID), testID, resultID, a.ArtifactId)
   219  		}
   220  
   221  		a.ContentType = contentType.StringVal
   222  		a.SizeBytes = size.Int64
   223  		a.RBECASHash = rbecasHash.StringVal
   224  		a.GcsUri = gcsURI.StringVal
   225  
   226  		return f(a)
   227  	})
   228  }
   229  
   230  // Run calls f for artifacts matching the query.
   231  //
   232  // Refer to Fetch() for the ordering of returned artifacts.
   233  func (q *Query) Run(ctx context.Context, f func(*Artifact) error) error {
   234  	if q.PageSize != 0 {
   235  		panic("PageSize is specified when Query.Run")
   236  	}
   237  	return q.run(ctx, f)
   238  }
   239  
   240  // FetchProtos returns a page of artifact protos matching q.
   241  //
   242  // Returned artifacts are ordered by level (invocation or test result).
   243  // Test result artifacts are sorted by parent invocation ID, test ID and
   244  // artifact ID.
   245  func (q *Query) FetchProtos(ctx context.Context) (arts []*pb.Artifact, nextPageToken string, err error) {
   246  	if q.PageSize <= 0 {
   247  		panic("PageSize <= 0")
   248  	}
   249  
   250  	err = q.run(ctx, func(a *Artifact) error {
   251  		arts = append(arts, a.Artifact)
   252  		return nil
   253  	})
   254  	if err != nil {
   255  		arts = nil
   256  		return
   257  	}
   258  
   259  	// If we got pageSize results, then we haven't exhausted the collection and
   260  	// need to return the next page token.
   261  	if len(arts) == q.PageSize {
   262  		last := arts[q.PageSize-1]
   263  		invID, testID, resultID, artifactID := MustParseName(last.Name)
   264  		parentID := ParentID(testID, resultID)
   265  		nextPageToken = pagination.Token(string(invID), parentID, artifactID)
   266  	}
   267  	return
   268  }
   269  
   270  // parentIDRegexp returns a regular expression for ParentId column.
   271  // Uses q.FollowEdges and q.TestResultPredicate.TestIdRegexp to compute it.
   272  // The returned regexp is not necessarily surrounded with ^ or $.
   273  func (q *Query) parentIDRegexp() string {
   274  	// If it is explicitly specified, use it.
   275  	if q.ParentIDRegexp != "" {
   276  		if q.TestResultPredicate != nil || q.FollowEdges != nil {
   277  			// Do not ignore our bugs.
   278  			panic("explicit ParentIDRegexp is mutually exclusive with TestResultPredicate and FollowEdges")
   279  		}
   280  		return q.ParentIDRegexp
   281  	}
   282  
   283  	testIDRE := q.TestResultPredicate.GetTestIdRegexp()
   284  	hasTestIDRE := testIDRE != "" && testIDRE != ".*"
   285  
   286  	edges := q.FollowEdges
   287  	if edges == nil {
   288  		edges = followAllEdges
   289  	}
   290  
   291  	if edges.IncludedInvocations && edges.TestResults && !hasTestIDRE {
   292  		// Fast path.
   293  		return ".*"
   294  	}
   295  
   296  	// Collect alternatives and then combine them with "|".
   297  	var alts []string
   298  
   299  	if edges.IncludedInvocations {
   300  		// Invocation-level artifacts have empty parent ID.
   301  		alts = append(alts, "")
   302  	}
   303  
   304  	if edges.TestResults {
   305  		// TestResult-level artifacts have parent ID formatted as
   306  		// "tr/{testID}/{resultID}"
   307  		if hasTestIDRE {
   308  			alts = append(alts, fmt.Sprintf("tr/%s/[^/]+", testIDRE))
   309  		} else {
   310  			alts = append(alts, "tr/.+")
   311  		}
   312  	}
   313  
   314  	// Note: the surrounding parens are important. Without them any expression
   315  	// matches.
   316  	return fmt.Sprintf("(%s)", strings.Join(alts, "|"))
   317  }
   318  
   319  // addREParamMaybe adds a regexp parameter surrounded with ^ and $,
   320  // unless re matches everything.
   321  func addREParamMaybe(params map[string]any, name, re string) {
   322  	if re != "" && re != ".*" {
   323  		params[name] = fmt.Sprintf("^%s$", re)
   324  	}
   325  }