go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/artifacts/query.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package artifacts 16 17 import ( 18 "context" 19 "fmt" 20 "strings" 21 "text/template" 22 23 "cloud.google.com/go/spanner" 24 25 "go.chromium.org/luci/resultdb/internal/invocations" 26 "go.chromium.org/luci/resultdb/internal/pagination" 27 "go.chromium.org/luci/resultdb/internal/spanutil" 28 "go.chromium.org/luci/resultdb/internal/testresults" 29 "go.chromium.org/luci/resultdb/pbutil" 30 pb "go.chromium.org/luci/resultdb/proto/v1" 31 ) 32 33 var followAllEdges = &pb.ArtifactPredicate_EdgeTypeSet{ 34 IncludedInvocations: true, 35 TestResults: true, 36 } 37 38 // Query specifies artifacts to fetch. 39 type Query struct { 40 InvocationIDs invocations.IDSet 41 ParentIDRegexp string 42 FollowEdges *pb.ArtifactPredicate_EdgeTypeSet 43 TestResultPredicate *pb.TestResultPredicate 44 ContentTypeRegexp string 45 ArtifactIDRegexp string 46 PageSize int // must be positive 47 PageToken string 48 WithRBECASHash bool 49 WithGcsURI bool 50 } 51 52 // Artifact contains pb.Artifact and its RBECAS hash. 53 type Artifact struct { 54 *pb.Artifact 55 RBECASHash string 56 } 57 58 // tmplQueryArtifacts is a template for the SQL expression that queries 59 // artifacts. See also ArtifactQuery. 60 var tmplQueryArtifacts = template.Must(template.New("artifactQuery").Parse(` 61 @{USE_ADDITIONAL_PARALLELISM=TRUE} 62 WITH VariantsWithUnexpectedResults AS ( 63 SELECT DISTINCT TestId, VariantHash 64 FROM TestResults@{FORCE_INDEX=UnexpectedTestResults, spanner_emulator.disable_query_null_filtered_index_check=true} 65 WHERE IsUnexpected AND InvocationId IN UNNEST(@invIDs) 66 ), 67 VariantsWithUnexpectedResultsOnly AS ( 68 SELECT TestId, VariantHash 69 FROM VariantsWithUnexpectedResults vur 70 JOIN@{FORCE_JOIN_ORDER=TRUE, JOIN_METHOD=HASH_JOIN} TestResults tr 71 USING (TestId, VariantHash) 72 WHERE InvocationId IN UNNEST(@invIDs) 73 GROUP BY TestId, VariantHash 74 HAVING LOGICAL_AND(IFNULL(IsUnexpected, false)) 75 ), 76 FilteredTestResults AS ( 77 SELECT InvocationId, FORMAT("tr/%s/%s", TestId, ResultId) as ParentId 78 FROM 79 {{ if .InterestingTestResults }} 80 VariantsWithUnexpectedResults vur 81 JOIN@{FORCE_JOIN_ORDER=TRUE, JOIN_METHOD=HASH_JOIN} TestResults tr 82 USING (TestId, VariantHash) 83 {{ else if .OnlyUnexpectedTestResults }} 84 VariantsWithUnexpectedResultsOnly vuro 85 JOIN@{FORCE_JOIN_ORDER=TRUE, JOIN_METHOD=HASH_JOIN} TestResults tr 86 USING (TestId, VariantHash) 87 {{ else }} 88 TestResults tr 89 {{ end }} 90 WHERE InvocationId IN UNNEST(@invIDs) 91 {{ if .Params.variantHashEquals }} 92 AND tr.VariantHash = @variantHashEquals 93 {{ end }} 94 {{ if .Params.variantContains }} 95 AND (SELECT LOGICAL_AND(kv IN UNNEST(Variant)) FROM UNNEST(@variantContains) kv) 96 {{ end }} 97 ) 98 SELECT InvocationId, ParentId, ArtifactId, ContentType, Size, 99 {{ if .Q.WithRBECASHash }} 100 RBECASHash, 101 {{ end }} 102 {{ if .Q.WithGcsURI }} 103 GcsURI 104 {{ end }} 105 FROM Artifacts art 106 {{ if .JoinWithTestResults }} 107 LEFT JOIN FilteredTestResults tr USING (InvocationId, ParentId) 108 {{ end }} 109 WHERE art.InvocationId IN UNNEST(@invIDs) 110 {{ if .Params.afterInvocationId }} 111 # Skip artifacts after the one specified in the page token. 112 AND ( 113 (art.InvocationId > @afterInvocationId) OR 114 (art.InvocationId = @afterInvocationId AND art.ParentId > @afterParentId) OR 115 (art.InvocationId = @afterInvocationId AND art.ParentId = @afterParentId AND art.ArtifactId > @afterArtifactId) 116 ) 117 {{ end }} 118 {{ if .Params.ParentIdRegexp }} 119 AND REGEXP_CONTAINS(art.ParentId, @ParentIdRegexp) 120 {{end}} 121 {{ if .JoinWithTestResults }} AND (art.ParentId = "" OR tr.ParentId IS NOT NULL) {{ end }} 122 {{ if .Params.contentTypeRegexp }} 123 AND REGEXP_CONTAINS(IFNULL(art.ContentType, ""), @contentTypeRegexp) 124 {{ end }} 125 {{ if .Params.artifactIdRegexp }} 126 AND REGEXP_CONTAINS(IFNULL(art.ArtifactID, ""), @artifactIdRegexp) 127 {{ end }} 128 ORDER BY InvocationId, ParentId, ArtifactId 129 {{ if gt .Q.PageSize 0 }} LIMIT @limit {{ end }} 130 `)) 131 132 // genStmt generates a spanner statement and returns it without executing it. 133 func (q *Query) genStmt(ctx context.Context) (spanner.Statement, error) { 134 if q.PageSize < 0 { 135 panic("PageSize < 0") 136 } 137 138 // Prepare query params. 139 params := map[string]any{} 140 params["invIDs"] = q.InvocationIDs 141 params["limit"] = q.PageSize 142 addREParamMaybe(params, "contentTypeRegexp", q.ContentTypeRegexp) 143 addREParamMaybe(params, "artifactIdRegexp", q.ArtifactIDRegexp) 144 addREParamMaybe(params, "ParentIdRegexp", q.parentIDRegexp()) 145 146 if err := invocations.TokenToMap(q.PageToken, params, "afterInvocationId", "afterParentId", "afterArtifactId"); err != nil { 147 return spanner.Statement{}, err 148 } 149 150 testresults.PopulateVariantParams(params, q.TestResultPredicate.GetVariant()) 151 152 // Prepeare statement generation input. 153 var input struct { 154 JoinWithTestResults bool 155 InterestingTestResults bool 156 OnlyUnexpectedTestResults bool 157 Q *Query 158 Params map[string]any 159 } 160 input.Params = params 161 // If we need to filter artifacts by attributes of test results, then 162 // join with test results table. 163 if q.FollowEdges == nil || q.FollowEdges.TestResults { 164 input.JoinWithTestResults = q.TestResultPredicate.GetVariant() != nil 165 switch q.TestResultPredicate.GetExpectancy() { 166 case pb.TestResultPredicate_VARIANTS_WITH_UNEXPECTED_RESULTS: 167 input.JoinWithTestResults = true 168 input.InterestingTestResults = true 169 case pb.TestResultPredicate_VARIANTS_WITH_ONLY_UNEXPECTED_RESULTS: 170 input.JoinWithTestResults = true 171 input.OnlyUnexpectedTestResults = true 172 } 173 } 174 input.Q = q 175 176 st, err := spanutil.GenerateStatement(tmplQueryArtifacts, input) 177 st.Params = params 178 return st, err 179 } 180 181 func (q *Query) run(ctx context.Context, f func(*Artifact) error) (err error) { 182 st, err := q.genStmt(ctx) 183 if err != nil { 184 return err 185 } 186 var b spanutil.Buffer 187 return spanutil.Query(ctx, st, func(row *spanner.Row) error { 188 a := &Artifact{ 189 Artifact: &pb.Artifact{}, 190 } 191 var invID invocations.ID 192 var parentID string 193 var contentType spanner.NullString 194 var size spanner.NullInt64 195 var rbecasHash spanner.NullString 196 var gcsURI spanner.NullString 197 198 ptrs := []any{ 199 &invID, &parentID, &a.ArtifactId, &contentType, &size, 200 } 201 if q.WithRBECASHash { 202 ptrs = append(ptrs, &rbecasHash) 203 } 204 if q.WithGcsURI { 205 ptrs = append(ptrs, &gcsURI) 206 } 207 if err := b.FromSpanner(row, ptrs...); err != nil { 208 return err 209 } 210 211 // Initialize artifact name. 212 switch testID, resultID, err := ParseParentID(parentID); { 213 case err != nil: 214 return err 215 case testID == "": 216 a.Name = pbutil.InvocationArtifactName(string(invID), a.ArtifactId) 217 default: 218 a.Name = pbutil.TestResultArtifactName(string(invID), testID, resultID, a.ArtifactId) 219 } 220 221 a.ContentType = contentType.StringVal 222 a.SizeBytes = size.Int64 223 a.RBECASHash = rbecasHash.StringVal 224 a.GcsUri = gcsURI.StringVal 225 226 return f(a) 227 }) 228 } 229 230 // Run calls f for artifacts matching the query. 231 // 232 // Refer to Fetch() for the ordering of returned artifacts. 233 func (q *Query) Run(ctx context.Context, f func(*Artifact) error) error { 234 if q.PageSize != 0 { 235 panic("PageSize is specified when Query.Run") 236 } 237 return q.run(ctx, f) 238 } 239 240 // FetchProtos returns a page of artifact protos matching q. 241 // 242 // Returned artifacts are ordered by level (invocation or test result). 243 // Test result artifacts are sorted by parent invocation ID, test ID and 244 // artifact ID. 245 func (q *Query) FetchProtos(ctx context.Context) (arts []*pb.Artifact, nextPageToken string, err error) { 246 if q.PageSize <= 0 { 247 panic("PageSize <= 0") 248 } 249 250 err = q.run(ctx, func(a *Artifact) error { 251 arts = append(arts, a.Artifact) 252 return nil 253 }) 254 if err != nil { 255 arts = nil 256 return 257 } 258 259 // If we got pageSize results, then we haven't exhausted the collection and 260 // need to return the next page token. 261 if len(arts) == q.PageSize { 262 last := arts[q.PageSize-1] 263 invID, testID, resultID, artifactID := MustParseName(last.Name) 264 parentID := ParentID(testID, resultID) 265 nextPageToken = pagination.Token(string(invID), parentID, artifactID) 266 } 267 return 268 } 269 270 // parentIDRegexp returns a regular expression for ParentId column. 271 // Uses q.FollowEdges and q.TestResultPredicate.TestIdRegexp to compute it. 272 // The returned regexp is not necessarily surrounded with ^ or $. 273 func (q *Query) parentIDRegexp() string { 274 // If it is explicitly specified, use it. 275 if q.ParentIDRegexp != "" { 276 if q.TestResultPredicate != nil || q.FollowEdges != nil { 277 // Do not ignore our bugs. 278 panic("explicit ParentIDRegexp is mutually exclusive with TestResultPredicate and FollowEdges") 279 } 280 return q.ParentIDRegexp 281 } 282 283 testIDRE := q.TestResultPredicate.GetTestIdRegexp() 284 hasTestIDRE := testIDRE != "" && testIDRE != ".*" 285 286 edges := q.FollowEdges 287 if edges == nil { 288 edges = followAllEdges 289 } 290 291 if edges.IncludedInvocations && edges.TestResults && !hasTestIDRE { 292 // Fast path. 293 return ".*" 294 } 295 296 // Collect alternatives and then combine them with "|". 297 var alts []string 298 299 if edges.IncludedInvocations { 300 // Invocation-level artifacts have empty parent ID. 301 alts = append(alts, "") 302 } 303 304 if edges.TestResults { 305 // TestResult-level artifacts have parent ID formatted as 306 // "tr/{testID}/{resultID}" 307 if hasTestIDRE { 308 alts = append(alts, fmt.Sprintf("tr/%s/[^/]+", testIDRE)) 309 } else { 310 alts = append(alts, "tr/.+") 311 } 312 } 313 314 // Note: the surrounding parens are important. Without them any expression 315 // matches. 316 return fmt.Sprintf("(%s)", strings.Join(alts, "|")) 317 } 318 319 // addREParamMaybe adds a regexp parameter surrounded with ^ and $, 320 // unless re matches everything. 321 func addREParamMaybe(params map[string]any, name, re string) { 322 if re != "" && re != ".*" { 323 params[name] = fmt.Sprintf("^%s$", re) 324 } 325 }