github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/dashboard/app/asset_storage.go (about) 1 // Copyright 2022 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package main 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "net/http" 11 "sort" 12 "time" 13 14 "github.com/google/syzkaller/dashboard/dashapi" 15 "github.com/google/syzkaller/pkg/asset" 16 "github.com/google/syzkaller/sys/targets" 17 "golang.org/x/sync/errgroup" 18 "google.golang.org/appengine/v2" 19 db "google.golang.org/appengine/v2/datastore" 20 "google.golang.org/appengine/v2/log" 21 ) 22 23 // TODO: decide if we want to save job-related assets. 24 25 func appendBuildAssets(c context.Context, ns, buildID string, assets []Asset) (*Build, error) { 26 var retBuild *Build 27 tx := func(c context.Context) error { 28 build, err := loadBuild(c, ns, buildID) 29 if err != nil { 30 return err 31 } 32 retBuild = build 33 appendedOk := false 34 var appendErr error 35 for _, newAsset := range assets { 36 appendErr = build.AppendAsset(newAsset) 37 if appendErr == nil { 38 appendedOk = true 39 } 40 } 41 // It took quite a number of resources to upload the files, so we return success 42 // even if we managed to save at least one of the new assets. 43 if !appendedOk { 44 return fmt.Errorf("failed to append all assets, last error %w", appendErr) 45 } 46 if _, err := db.Put(c, buildKey(c, ns, buildID), build); err != nil { 47 return fmt.Errorf("failed to put build: %w", err) 48 } 49 log.Infof(c, "updated build: %#v", build) 50 return nil 51 } 52 if err := db.RunInTransaction(c, tx, &db.TransactionOptions{}); err != nil { 53 return nil, err 54 } 55 return retBuild, nil 56 } 57 58 var ErrAssetDuplicated = errors.New("an asset of this type is already present") 59 60 func (build *Build) AppendAsset(addAsset Asset) error { 61 typeInfo := asset.GetTypeDescription(addAsset.Type) 62 if typeInfo == nil { 63 return fmt.Errorf("unknown asset type") 64 } 65 if !typeInfo.AllowMultiple { 66 for _, obj := range build.Assets { 67 if obj.Type == addAsset.Type { 68 return ErrAssetDuplicated 69 } 70 } 71 } 72 build.Assets = append(build.Assets, addAsset) 73 return nil 74 } 75 76 func queryNeededAssets(c context.Context) (*dashapi.NeededAssetsResp, error) { 77 buildURLs, crashURLs := []string{}, []string{} 78 g, _ := errgroup.WithContext(c) 79 g.Go(func() error { 80 var err error 81 buildURLs, err = neededBuildURLs(c) 82 return err 83 }) 84 g.Go(func() error { 85 var err error 86 crashURLs, err = neededCrashURLs(c) 87 return err 88 }) 89 if err := g.Wait(); err != nil { 90 return nil, err 91 } 92 return &dashapi.NeededAssetsResp{ 93 DownloadURLs: append(buildURLs, crashURLs...), 94 }, nil 95 } 96 97 // nolint: dupl 98 func neededBuildURLs(c context.Context) ([]string, error) { 99 var builds []*Build 100 _, err := db.NewQuery("Build"). 101 Filter("Assets.DownloadURL>", ""). 102 Project("Assets.DownloadURL"). 103 GetAll(c, &builds) 104 if err != nil { 105 return nil, fmt.Errorf("failed to query builds: %w", err) 106 } 107 log.Infof(c, "queried %v builds with assets", len(builds)) 108 ret := []string{} 109 for _, build := range builds { 110 for _, asset := range build.Assets { 111 ret = append(ret, asset.DownloadURL) 112 } 113 } 114 return ret, nil 115 } 116 117 // nolint: dupl 118 func neededCrashURLs(c context.Context) ([]string, error) { 119 var crashes []*Crash 120 _, err := db.NewQuery("Crash"). 121 Filter("Assets.DownloadURL>", ""). 122 Project("Assets.DownloadURL"). 123 GetAll(c, &crashes) 124 if err != nil { 125 return nil, fmt.Errorf("failed to query assets: %w", err) 126 } 127 log.Infof(c, "queried %v crashes with assets", len(crashes)) 128 ret := []string{} 129 for _, crash := range crashes { 130 for _, asset := range crash.Assets { 131 ret = append(ret, asset.DownloadURL) 132 } 133 } 134 return ret, nil 135 } 136 137 func handleDeprecateAssets(w http.ResponseWriter, r *http.Request) { 138 c := appengine.NewContext(r) 139 for ns := range getConfig(c).Namespaces { 140 err := deprecateNamespaceAssets(c, ns) 141 if err != nil { 142 log.Errorf(c, "deprecateNamespaceAssets failed for ns=%v: %v", ns, err) 143 } 144 } 145 err := deprecateCrashAssets(c) 146 if err != nil { 147 log.Errorf(c, "deprecateCrashAssets failed: %v", err) 148 } 149 } 150 151 func deprecateCrashAssets(c context.Context) error { 152 ad := crashAssetDeprecator{c: c} 153 const crashBatchSize = 16 154 return ad.batchProcessCrashes(crashBatchSize) 155 } 156 157 func deprecateNamespaceAssets(c context.Context, ns string) error { 158 ad := buildAssetDeprecator{ 159 ns: ns, 160 c: c, 161 lastBuilds: map[string]*Build{}, 162 } 163 const buildBatchSize = 16 164 err := ad.batchProcessBuilds(buildBatchSize) 165 if err != nil { 166 return fmt.Errorf("build batch processing failed: %w", err) 167 } 168 return nil 169 } 170 171 type buildAssetDeprecator struct { 172 ns string 173 c context.Context 174 bugsQueried bool 175 relevantBugs map[string]bool 176 lastBuilds map[string]*Build 177 } 178 179 const keepAssetsForClosedBugs = time.Hour * 24 * 30 180 181 func (ad *buildAssetDeprecator) lastBuild(manager string) (*Build, error) { 182 build, ok := ad.lastBuilds[manager] 183 if ok { 184 return build, nil 185 } 186 lastBuild, err := lastManagerBuild(ad.c, ad.ns, manager) 187 if err != nil { 188 return nil, err 189 } 190 ad.lastBuilds[manager] = lastBuild 191 return lastBuild, err 192 } 193 194 func (ad *buildAssetDeprecator) queryBugs() error { 195 if ad.bugsQueried { 196 return nil 197 } 198 var openBugKeys []*db.Key 199 var closedBugKeys []*db.Key 200 g, _ := errgroup.WithContext(context.Background()) 201 g.Go(func() error { 202 // Query open bugs. 203 var err error 204 openBugKeys, err = db.NewQuery("Bug"). 205 Filter("Namespace=", ad.ns). 206 Filter("Status=", BugStatusOpen). 207 KeysOnly(). 208 GetAll(ad.c, nil) 209 if err != nil { 210 return fmt.Errorf("failed to fetch open builds: %w", err) 211 } 212 return nil 213 }) 214 g.Go(func() error { 215 // Query recently closed bugs. 216 var err error 217 closedBugKeys, err = db.NewQuery("Bug"). 218 Filter("Namespace=", ad.ns). 219 Filter("Closed>", timeNow(ad.c).Add(-keepAssetsForClosedBugs)). 220 KeysOnly(). 221 GetAll(ad.c, nil) 222 if err != nil { 223 return fmt.Errorf("failed to fetch closed builds: %w", err) 224 } 225 return nil 226 }) 227 err := g.Wait() 228 if err != nil { 229 return fmt.Errorf("failed to query bugs: %w", err) 230 } 231 ad.relevantBugs = map[string]bool{} 232 for _, key := range append(append([]*db.Key{}, openBugKeys...), closedBugKeys...) { 233 ad.relevantBugs[key.String()] = true 234 } 235 return nil 236 } 237 238 func (ad *buildAssetDeprecator) buildArchivePolicy(build *Build, asset *Asset) (bool, error) { 239 // Query builds to see whether there's a newer same-type asset on the same week. 240 var builds []*Build 241 _, err := db.NewQuery("Build"). 242 Filter("Namespace=", ad.ns). 243 Filter("Manager=", build.Manager). 244 Filter("Assets.Type=", asset.Type). 245 Filter("Assets.CreateDate>", asset.CreateDate). 246 Limit(1). 247 Order("Assets.CreateDate"). 248 GetAll(ad.c, &builds) 249 if err != nil { 250 return false, fmt.Errorf("failed to query newer assets: %w", err) 251 } 252 log.Infof(ad.c, "running archive policy for %s, date %s; queried %d builds", 253 asset.DownloadURL, asset.CreateDate, len(builds)) 254 sameWeek := false 255 if len(builds) > 0 { 256 origY, origW := asset.CreateDate.ISOWeek() 257 for _, nextAsset := range builds[0].Assets { 258 if nextAsset.Type != asset.Type { 259 continue 260 } 261 if nextAsset.CreateDate.Before(asset.CreateDate) || 262 nextAsset.CreateDate.Equal(asset.CreateDate) { 263 continue 264 } 265 nextY, nextW := nextAsset.CreateDate.ISOWeek() 266 if origY == nextY && origW == nextW { 267 log.Infof(ad.c, "found a newer asset: %s, date %s", 268 nextAsset.DownloadURL, nextAsset.CreateDate) 269 sameWeek = true 270 break 271 } 272 } 273 } 274 return !sameWeek, nil 275 } 276 277 func (ad *buildAssetDeprecator) buildBugStatusPolicy(build *Build) (bool, error) { 278 if err := ad.queryBugs(); err != nil { 279 return false, fmt.Errorf("failed to query bugs: %w", err) 280 } 281 keys, err := db.NewQuery("Crash"). 282 Filter("BuildID=", build.ID). 283 KeysOnly(). 284 GetAll(ad.c, nil) 285 if err != nil { 286 return false, fmt.Errorf("failed to query crashes: %w", err) 287 } 288 for _, key := range keys { 289 bugKey := key.Parent() 290 if _, ok := ad.relevantBugs[bugKey.String()]; ok { 291 // At least one crash is related to an opened/recently closed bug. 292 return true, nil 293 } 294 } 295 // If there are no crashes, but it's the latest build, they may still appear. 296 lastBuild, err := ad.lastBuild(build.Manager) 297 if err != nil { 298 return false, nil 299 } 300 return build.ID == lastBuild.ID, nil 301 } 302 303 func (ad *buildAssetDeprecator) needThisBuildAsset(build *Build, buildAsset *Asset) (bool, error) { 304 // If the asset is reasonably new, we always keep it. 305 const alwaysKeepPeriod = time.Hour * 24 * 14 306 if buildAsset.CreateDate.After(timeNow(ad.c).Add(-alwaysKeepPeriod)) { 307 return true, nil 308 } 309 if buildAsset.Type == dashapi.HTMLCoverageReport { 310 // We want to keep coverage reports forever, not just 311 // while there are any open bugs. But we don't want to 312 // keep all coverage reports, just a share of them. 313 return ad.buildArchivePolicy(build, buildAsset) 314 } 315 if build.Type == BuildNormal || build.Type == BuildFailed { 316 // A build-related asset, keep it only while there are open bugs with crashes 317 // related to this build. 318 return ad.buildBugStatusPolicy(build) 319 } 320 // TODO: fix this once this is no longer the case. 321 return false, fmt.Errorf("job-related assets are not supported yet") 322 } 323 324 func filterOutAssets(assets []Asset, deleteList []string) []Asset { 325 toDelete := map[string]bool{} 326 for _, url := range deleteList { 327 toDelete[url] = true 328 } 329 newAssets := []Asset{} 330 for _, asset := range assets { 331 if _, ok := toDelete[asset.DownloadURL]; !ok { 332 newAssets = append(newAssets, asset) 333 } 334 } 335 return newAssets 336 } 337 338 func (ad *buildAssetDeprecator) updateBuild(buildID string, urlsToDelete []string) error { 339 tx := func(c context.Context) error { 340 build, err := loadBuild(ad.c, ad.ns, buildID) 341 if build == nil || err != nil { 342 // Assume the DB has been updated in the meanwhile. 343 return nil 344 } 345 build.Assets = filterOutAssets(build.Assets, urlsToDelete) 346 build.AssetsLastCheck = timeNow(ad.c) 347 if _, err := db.Put(ad.c, buildKey(ad.c, ad.ns, buildID), build); err != nil { 348 return fmt.Errorf("failed to save build: %w", err) 349 } 350 return nil 351 } 352 if err := db.RunInTransaction(ad.c, tx, nil); err != nil { 353 return fmt.Errorf("failed to update build: %w", err) 354 } 355 return nil 356 } 357 358 func (ad *buildAssetDeprecator) batchProcessBuilds(count int) error { 359 // We cannot query only the Build with non-empty Assets array and yet sort 360 // by AssetsLastCheck. The datastore returns "The first sort property must 361 // be the same as the property to which the inequality filter is applied. 362 // In your query the first sort property is AssetsLastCheck but the inequality 363 // filter is on Assets.DownloadURL. 364 // So we have to omit Filter("Assets.DownloadURL>", ""). here. 365 var builds []*Build 366 _, err := db.NewQuery("Build"). 367 Filter("Namespace=", ad.ns). 368 Order("AssetsLastCheck"). 369 Limit(count). 370 GetAll(ad.c, &builds) 371 if err != nil { 372 return fmt.Errorf("failed to fetch builds: %w", err) 373 } 374 for _, build := range builds { 375 toDelete := []string{} 376 for _, asset := range build.Assets { 377 needed, err := ad.needThisBuildAsset(build, &asset) 378 if err != nil { 379 return fmt.Errorf("failed to test asset: %w", err) 380 } else if !needed { 381 toDelete = append(toDelete, asset.DownloadURL) 382 } 383 } 384 err := ad.updateBuild(build.ID, toDelete) 385 if err != nil { 386 return err 387 } 388 } 389 return nil 390 } 391 392 type crashAssetDeprecator struct { 393 c context.Context 394 } 395 396 func (ad *crashAssetDeprecator) batchProcessCrashes(count int) error { 397 // Unfortunately we cannot only query the crashes with assets. 398 // See the explanation in batchProcessBuilds(). 399 var crashes []*Crash 400 crashKeys, err := db.NewQuery("Crash"). 401 Order("AssetsLastCheck"). 402 Limit(count). 403 GetAll(ad.c, &crashes) 404 if err != nil { 405 return fmt.Errorf("failed to fetch crashes: %w", err) 406 } 407 for i, crash := range crashes { 408 toDelete := []string{} 409 for _, asset := range crash.Assets { 410 needed, err := ad.needThisCrashAsset(crashKeys[i], &asset) 411 if err != nil { 412 return fmt.Errorf("failed to test crash asset: %w", err) 413 } else if !needed { 414 toDelete = append(toDelete, asset.DownloadURL) 415 } 416 } 417 if i > 0 { 418 // Sleep for one second to prevent the "API error 2 (datastore_v3: 419 // CONCURRENT_TRANSACTION): too much contention on these datastore 420 // entities. please try again." error. 421 time.Sleep(time.Second) 422 } 423 err := ad.updateCrash(crashKeys[i], toDelete) 424 if err != nil { 425 return err 426 } 427 } 428 return nil 429 } 430 431 func (ad *crashAssetDeprecator) needThisCrashAsset(crashKey *db.Key, crashAsset *Asset) (bool, error) { 432 if crashAsset.Type == dashapi.MountInRepro { 433 // We keed mount images from reproducers for as long as the bug is still relevant. 434 // They're not that big to set stricter limits. 435 return ad.bugStatusPolicy(crashKey, crashAsset) 436 } 437 return false, fmt.Errorf("no deprecation policy for %s", crashAsset.Type) 438 } 439 440 func (ad *crashAssetDeprecator) bugStatusPolicy(crashKey *db.Key, crashAsset *Asset) (bool, error) { 441 bugKey := crashKey.Parent() 442 bug := new(Bug) 443 err := db.Get(ad.c, bugKey, bug) 444 if err != nil { 445 return false, fmt.Errorf("failed to query bug: %w", err) 446 } 447 return bug.Status == BugStatusOpen || 448 bug.Closed.After(timeNow(ad.c).Add(-keepAssetsForClosedBugs)), nil 449 } 450 451 func (ad *crashAssetDeprecator) updateCrash(crashKey *db.Key, urlsToDelete []string) error { 452 tx := func(c context.Context) error { 453 crash := new(Crash) 454 err := db.Get(c, crashKey, crash) 455 if err != nil { 456 // Assume the DB has been updated in the meanwhile. 457 return nil 458 } 459 crash.Assets = filterOutAssets(crash.Assets, urlsToDelete) 460 crash.AssetsLastCheck = timeNow(ad.c) 461 if _, err := db.Put(ad.c, crashKey, crash); err != nil { 462 return fmt.Errorf("failed to save crash: %w", err) 463 } 464 return nil 465 } 466 if err := db.RunInTransaction(ad.c, tx, &db.TransactionOptions{Attempts: 10}); err != nil { 467 return fmt.Errorf("failed to update crash: %w", err) 468 } 469 return nil 470 } 471 472 func queryLatestManagerAssets(c context.Context, ns string, assetType dashapi.AssetType, 473 period time.Duration) (map[string]Asset, error) { 474 var builds []*Build 475 startTime := timeNow(c).Add(-period) 476 query := db.NewQuery("Build") 477 if ns != "" { 478 query = query.Filter("Namespace=", ns) 479 } 480 _, err := query.Filter("Assets.Type=", assetType). 481 Filter("Assets.CreateDate>", startTime). 482 Order("Assets.CreateDate"). 483 GetAll(c, &builds) 484 if err != nil { 485 return nil, err 486 } 487 ret := map[string]Asset{} 488 for _, build := range builds { 489 for _, asset := range build.Assets { 490 if asset.Type != assetType { 491 continue 492 } 493 ret[build.Manager] = asset 494 } 495 } 496 return ret, nil 497 } 498 499 func createAssetList(build *Build, crash *Crash, forReport bool) []dashapi.Asset { 500 var crashAssets []Asset 501 if crash != nil { 502 crashAssets = crash.Assets 503 } 504 assetList := []dashapi.Asset{} 505 for _, reportAsset := range append(build.Assets, crashAssets...) { 506 typeDescr := asset.GetTypeDescription(reportAsset.Type) 507 if typeDescr == nil || forReport && typeDescr.NoReporting { 508 continue 509 } 510 assetList = append(assetList, dashapi.Asset{ 511 Title: typeDescr.GetTitle(targets.Get(build.OS, build.Arch)), 512 DownloadURL: reportAsset.DownloadURL, 513 Type: reportAsset.Type, 514 }) 515 } 516 sort.SliceStable(assetList, func(i, j int) bool { 517 return asset.GetTypeDescription(assetList[i].Type).ReportingPrio < 518 asset.GetTypeDescription(assetList[j].Type).ReportingPrio 519 }) 520 handleDupAssetTitles(assetList) 521 return assetList 522 } 523 524 // Convert asset lists like {"Mounted image", "Mounted image"} to {"Mounted image #1", "Mounted image #2"}. 525 func handleDupAssetTitles(assetList []dashapi.Asset) { 526 duplicates := map[string]bool{} 527 for _, asset := range assetList { 528 if _, ok := duplicates[asset.Title]; ok { 529 duplicates[asset.Title] = true 530 } else { 531 duplicates[asset.Title] = false 532 } 533 } 534 counts := map[string]int{} 535 for i, asset := range assetList { 536 if !duplicates[asset.Title] { 537 continue 538 } 539 counts[asset.Title]++ 540 assetList[i].Title = fmt.Sprintf("%s #%d", asset.Title, counts[asset.Title]) 541 } 542 }