github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/dashboard/app/asset_storage.go (about)

     1  // Copyright 2022 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package main
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"net/http"
    11  	"sort"
    12  	"time"
    13  
    14  	"github.com/google/syzkaller/dashboard/dashapi"
    15  	"github.com/google/syzkaller/pkg/asset"
    16  	"github.com/google/syzkaller/sys/targets"
    17  	"golang.org/x/sync/errgroup"
    18  	"google.golang.org/appengine/v2"
    19  	db "google.golang.org/appengine/v2/datastore"
    20  	"google.golang.org/appengine/v2/log"
    21  )
    22  
    23  // TODO: decide if we want to save job-related assets.
    24  
    25  func appendBuildAssets(c context.Context, ns, buildID string, assets []Asset) (*Build, error) {
    26  	var retBuild *Build
    27  	tx := func(c context.Context) error {
    28  		build, err := loadBuild(c, ns, buildID)
    29  		if err != nil {
    30  			return err
    31  		}
    32  		retBuild = build
    33  		appendedOk := false
    34  		var appendErr error
    35  		for _, newAsset := range assets {
    36  			appendErr = build.AppendAsset(newAsset)
    37  			if appendErr == nil {
    38  				appendedOk = true
    39  			}
    40  		}
    41  		// It took quite a number of resources to upload the files, so we return success
    42  		// even if we managed to save at least one of the new assets.
    43  		if !appendedOk {
    44  			return fmt.Errorf("failed to append all assets, last error %w", appendErr)
    45  		}
    46  		if _, err := db.Put(c, buildKey(c, ns, buildID), build); err != nil {
    47  			return fmt.Errorf("failed to put build: %w", err)
    48  		}
    49  		log.Infof(c, "updated build: %#v", build)
    50  		return nil
    51  	}
    52  	if err := db.RunInTransaction(c, tx, &db.TransactionOptions{}); err != nil {
    53  		return nil, err
    54  	}
    55  	return retBuild, nil
    56  }
    57  
    58  var ErrAssetDuplicated = errors.New("an asset of this type is already present")
    59  
    60  func (build *Build) AppendAsset(addAsset Asset) error {
    61  	typeInfo := asset.GetTypeDescription(addAsset.Type)
    62  	if typeInfo == nil {
    63  		return fmt.Errorf("unknown asset type")
    64  	}
    65  	if !typeInfo.AllowMultiple {
    66  		for _, obj := range build.Assets {
    67  			if obj.Type == addAsset.Type {
    68  				return ErrAssetDuplicated
    69  			}
    70  		}
    71  	}
    72  	build.Assets = append(build.Assets, addAsset)
    73  	return nil
    74  }
    75  
    76  func queryNeededAssets(c context.Context) (*dashapi.NeededAssetsResp, error) {
    77  	buildURLs, crashURLs := []string{}, []string{}
    78  	g, _ := errgroup.WithContext(c)
    79  	g.Go(func() error {
    80  		var err error
    81  		buildURLs, err = neededBuildURLs(c)
    82  		return err
    83  	})
    84  	g.Go(func() error {
    85  		var err error
    86  		crashURLs, err = neededCrashURLs(c)
    87  		return err
    88  	})
    89  	if err := g.Wait(); err != nil {
    90  		return nil, err
    91  	}
    92  	return &dashapi.NeededAssetsResp{
    93  		DownloadURLs: append(buildURLs, crashURLs...),
    94  	}, nil
    95  }
    96  
    97  // nolint: dupl
    98  func neededBuildURLs(c context.Context) ([]string, error) {
    99  	var builds []*Build
   100  	_, err := db.NewQuery("Build").
   101  		Filter("Assets.DownloadURL>", "").
   102  		Project("Assets.DownloadURL").
   103  		GetAll(c, &builds)
   104  	if err != nil {
   105  		return nil, fmt.Errorf("failed to query builds: %w", err)
   106  	}
   107  	log.Infof(c, "queried %v builds with assets", len(builds))
   108  	ret := []string{}
   109  	for _, build := range builds {
   110  		for _, asset := range build.Assets {
   111  			ret = append(ret, asset.DownloadURL)
   112  		}
   113  	}
   114  	return ret, nil
   115  }
   116  
   117  // nolint: dupl
   118  func neededCrashURLs(c context.Context) ([]string, error) {
   119  	var crashes []*Crash
   120  	_, err := db.NewQuery("Crash").
   121  		Filter("Assets.DownloadURL>", "").
   122  		Project("Assets.DownloadURL").
   123  		GetAll(c, &crashes)
   124  	if err != nil {
   125  		return nil, fmt.Errorf("failed to query assets: %w", err)
   126  	}
   127  	log.Infof(c, "queried %v crashes with assets", len(crashes))
   128  	ret := []string{}
   129  	for _, crash := range crashes {
   130  		for _, asset := range crash.Assets {
   131  			ret = append(ret, asset.DownloadURL)
   132  		}
   133  	}
   134  	return ret, nil
   135  }
   136  
   137  func handleDeprecateAssets(w http.ResponseWriter, r *http.Request) {
   138  	c := appengine.NewContext(r)
   139  	for ns := range getConfig(c).Namespaces {
   140  		err := deprecateNamespaceAssets(c, ns)
   141  		if err != nil {
   142  			log.Errorf(c, "deprecateNamespaceAssets failed for ns=%v: %v", ns, err)
   143  		}
   144  	}
   145  	err := deprecateCrashAssets(c)
   146  	if err != nil {
   147  		log.Errorf(c, "deprecateCrashAssets failed: %v", err)
   148  	}
   149  }
   150  
   151  func deprecateCrashAssets(c context.Context) error {
   152  	ad := crashAssetDeprecator{c: c}
   153  	const crashBatchSize = 16
   154  	return ad.batchProcessCrashes(crashBatchSize)
   155  }
   156  
   157  func deprecateNamespaceAssets(c context.Context, ns string) error {
   158  	ad := buildAssetDeprecator{
   159  		ns:         ns,
   160  		c:          c,
   161  		lastBuilds: map[string]*Build{},
   162  	}
   163  	const buildBatchSize = 16
   164  	err := ad.batchProcessBuilds(buildBatchSize)
   165  	if err != nil {
   166  		return fmt.Errorf("build batch processing failed: %w", err)
   167  	}
   168  	return nil
   169  }
   170  
   171  type buildAssetDeprecator struct {
   172  	ns           string
   173  	c            context.Context
   174  	bugsQueried  bool
   175  	relevantBugs map[string]bool
   176  	lastBuilds   map[string]*Build
   177  }
   178  
   179  const keepAssetsForClosedBugs = time.Hour * 24 * 30
   180  
   181  func (ad *buildAssetDeprecator) lastBuild(manager string) (*Build, error) {
   182  	build, ok := ad.lastBuilds[manager]
   183  	if ok {
   184  		return build, nil
   185  	}
   186  	lastBuild, err := lastManagerBuild(ad.c, ad.ns, manager)
   187  	if err != nil {
   188  		return nil, err
   189  	}
   190  	ad.lastBuilds[manager] = lastBuild
   191  	return lastBuild, err
   192  }
   193  
   194  func (ad *buildAssetDeprecator) queryBugs() error {
   195  	if ad.bugsQueried {
   196  		return nil
   197  	}
   198  	var openBugKeys []*db.Key
   199  	var closedBugKeys []*db.Key
   200  	g, _ := errgroup.WithContext(context.Background())
   201  	g.Go(func() error {
   202  		// Query open bugs.
   203  		var err error
   204  		openBugKeys, err = db.NewQuery("Bug").
   205  			Filter("Namespace=", ad.ns).
   206  			Filter("Status=", BugStatusOpen).
   207  			KeysOnly().
   208  			GetAll(ad.c, nil)
   209  		if err != nil {
   210  			return fmt.Errorf("failed to fetch open builds: %w", err)
   211  		}
   212  		return nil
   213  	})
   214  	g.Go(func() error {
   215  		// Query recently closed bugs.
   216  		var err error
   217  		closedBugKeys, err = db.NewQuery("Bug").
   218  			Filter("Namespace=", ad.ns).
   219  			Filter("Closed>", timeNow(ad.c).Add(-keepAssetsForClosedBugs)).
   220  			KeysOnly().
   221  			GetAll(ad.c, nil)
   222  		if err != nil {
   223  			return fmt.Errorf("failed to fetch closed builds: %w", err)
   224  		}
   225  		return nil
   226  	})
   227  	err := g.Wait()
   228  	if err != nil {
   229  		return fmt.Errorf("failed to query bugs: %w", err)
   230  	}
   231  	ad.relevantBugs = map[string]bool{}
   232  	for _, key := range append(append([]*db.Key{}, openBugKeys...), closedBugKeys...) {
   233  		ad.relevantBugs[key.String()] = true
   234  	}
   235  	return nil
   236  }
   237  
   238  func (ad *buildAssetDeprecator) buildArchivePolicy(build *Build, asset *Asset) (bool, error) {
   239  	// Query builds to see whether there's a newer same-type asset on the same week.
   240  	var builds []*Build
   241  	_, err := db.NewQuery("Build").
   242  		Filter("Namespace=", ad.ns).
   243  		Filter("Manager=", build.Manager).
   244  		Filter("Assets.Type=", asset.Type).
   245  		Filter("Assets.CreateDate>", asset.CreateDate).
   246  		Limit(1).
   247  		Order("Assets.CreateDate").
   248  		GetAll(ad.c, &builds)
   249  	if err != nil {
   250  		return false, fmt.Errorf("failed to query newer assets: %w", err)
   251  	}
   252  	log.Infof(ad.c, "running archive policy for %s, date %s; queried %d builds",
   253  		asset.DownloadURL, asset.CreateDate, len(builds))
   254  	sameWeek := false
   255  	if len(builds) > 0 {
   256  		origY, origW := asset.CreateDate.ISOWeek()
   257  		for _, nextAsset := range builds[0].Assets {
   258  			if nextAsset.Type != asset.Type {
   259  				continue
   260  			}
   261  			if nextAsset.CreateDate.Before(asset.CreateDate) ||
   262  				nextAsset.CreateDate.Equal(asset.CreateDate) {
   263  				continue
   264  			}
   265  			nextY, nextW := nextAsset.CreateDate.ISOWeek()
   266  			if origY == nextY && origW == nextW {
   267  				log.Infof(ad.c, "found a newer asset: %s, date %s",
   268  					nextAsset.DownloadURL, nextAsset.CreateDate)
   269  				sameWeek = true
   270  				break
   271  			}
   272  		}
   273  	}
   274  	return !sameWeek, nil
   275  }
   276  
   277  func (ad *buildAssetDeprecator) buildBugStatusPolicy(build *Build) (bool, error) {
   278  	if err := ad.queryBugs(); err != nil {
   279  		return false, fmt.Errorf("failed to query bugs: %w", err)
   280  	}
   281  	keys, err := db.NewQuery("Crash").
   282  		Filter("BuildID=", build.ID).
   283  		KeysOnly().
   284  		GetAll(ad.c, nil)
   285  	if err != nil {
   286  		return false, fmt.Errorf("failed to query crashes: %w", err)
   287  	}
   288  	for _, key := range keys {
   289  		bugKey := key.Parent()
   290  		if _, ok := ad.relevantBugs[bugKey.String()]; ok {
   291  			// At least one crash is related to an opened/recently closed bug.
   292  			return true, nil
   293  		}
   294  	}
   295  	// If there are no crashes, but it's the latest build, they may still appear.
   296  	lastBuild, err := ad.lastBuild(build.Manager)
   297  	if err != nil {
   298  		return false, nil
   299  	}
   300  	return build.ID == lastBuild.ID, nil
   301  }
   302  
   303  func (ad *buildAssetDeprecator) needThisBuildAsset(build *Build, buildAsset *Asset) (bool, error) {
   304  	// If the asset is reasonably new, we always keep it.
   305  	const alwaysKeepPeriod = time.Hour * 24 * 14
   306  	if buildAsset.CreateDate.After(timeNow(ad.c).Add(-alwaysKeepPeriod)) {
   307  		return true, nil
   308  	}
   309  	if buildAsset.Type == dashapi.HTMLCoverageReport {
   310  		// We want to keep coverage reports forever, not just
   311  		// while there are any open bugs. But we don't want to
   312  		// keep all coverage reports, just a share of them.
   313  		return ad.buildArchivePolicy(build, buildAsset)
   314  	}
   315  	if build.Type == BuildNormal || build.Type == BuildFailed {
   316  		// A build-related asset, keep it only while there are open bugs with crashes
   317  		// related to this build.
   318  		return ad.buildBugStatusPolicy(build)
   319  	}
   320  	// TODO: fix this once this is no longer the case.
   321  	return false, fmt.Errorf("job-related assets are not supported yet")
   322  }
   323  
   324  func filterOutAssets(assets []Asset, deleteList []string) []Asset {
   325  	toDelete := map[string]bool{}
   326  	for _, url := range deleteList {
   327  		toDelete[url] = true
   328  	}
   329  	newAssets := []Asset{}
   330  	for _, asset := range assets {
   331  		if _, ok := toDelete[asset.DownloadURL]; !ok {
   332  			newAssets = append(newAssets, asset)
   333  		}
   334  	}
   335  	return newAssets
   336  }
   337  
   338  func (ad *buildAssetDeprecator) updateBuild(buildID string, urlsToDelete []string) error {
   339  	tx := func(c context.Context) error {
   340  		build, err := loadBuild(ad.c, ad.ns, buildID)
   341  		if build == nil || err != nil {
   342  			// Assume the DB has been updated in the meanwhile.
   343  			return nil
   344  		}
   345  		build.Assets = filterOutAssets(build.Assets, urlsToDelete)
   346  		build.AssetsLastCheck = timeNow(ad.c)
   347  		if _, err := db.Put(ad.c, buildKey(ad.c, ad.ns, buildID), build); err != nil {
   348  			return fmt.Errorf("failed to save build: %w", err)
   349  		}
   350  		return nil
   351  	}
   352  	if err := db.RunInTransaction(ad.c, tx, nil); err != nil {
   353  		return fmt.Errorf("failed to update build: %w", err)
   354  	}
   355  	return nil
   356  }
   357  
   358  func (ad *buildAssetDeprecator) batchProcessBuilds(count int) error {
   359  	// We cannot query only the Build with non-empty Assets array and yet sort
   360  	// by AssetsLastCheck. The datastore returns "The first sort property must
   361  	// be the same as the property to which the inequality filter is applied.
   362  	// In your query the first sort property is AssetsLastCheck but the inequality
   363  	// filter is on Assets.DownloadURL.
   364  	// So we have to omit Filter("Assets.DownloadURL>", ""). here.
   365  	var builds []*Build
   366  	_, err := db.NewQuery("Build").
   367  		Filter("Namespace=", ad.ns).
   368  		Order("AssetsLastCheck").
   369  		Limit(count).
   370  		GetAll(ad.c, &builds)
   371  	if err != nil {
   372  		return fmt.Errorf("failed to fetch builds: %w", err)
   373  	}
   374  	for _, build := range builds {
   375  		toDelete := []string{}
   376  		for _, asset := range build.Assets {
   377  			needed, err := ad.needThisBuildAsset(build, &asset)
   378  			if err != nil {
   379  				return fmt.Errorf("failed to test asset: %w", err)
   380  			} else if !needed {
   381  				toDelete = append(toDelete, asset.DownloadURL)
   382  			}
   383  		}
   384  		err := ad.updateBuild(build.ID, toDelete)
   385  		if err != nil {
   386  			return err
   387  		}
   388  	}
   389  	return nil
   390  }
   391  
   392  type crashAssetDeprecator struct {
   393  	c context.Context
   394  }
   395  
   396  func (ad *crashAssetDeprecator) batchProcessCrashes(count int) error {
   397  	// Unfortunately we cannot only query the crashes with assets.
   398  	// See the explanation in batchProcessBuilds().
   399  	var crashes []*Crash
   400  	crashKeys, err := db.NewQuery("Crash").
   401  		Order("AssetsLastCheck").
   402  		Limit(count).
   403  		GetAll(ad.c, &crashes)
   404  	if err != nil {
   405  		return fmt.Errorf("failed to fetch crashes: %w", err)
   406  	}
   407  	for i, crash := range crashes {
   408  		toDelete := []string{}
   409  		for _, asset := range crash.Assets {
   410  			needed, err := ad.needThisCrashAsset(crashKeys[i], &asset)
   411  			if err != nil {
   412  				return fmt.Errorf("failed to test crash asset: %w", err)
   413  			} else if !needed {
   414  				toDelete = append(toDelete, asset.DownloadURL)
   415  			}
   416  		}
   417  		if i > 0 {
   418  			// Sleep for one second to prevent the "API error 2 (datastore_v3:
   419  			// CONCURRENT_TRANSACTION): too much contention on these datastore
   420  			// entities. please try again." error.
   421  			time.Sleep(time.Second)
   422  		}
   423  		err := ad.updateCrash(crashKeys[i], toDelete)
   424  		if err != nil {
   425  			return err
   426  		}
   427  	}
   428  	return nil
   429  }
   430  
   431  func (ad *crashAssetDeprecator) needThisCrashAsset(crashKey *db.Key, crashAsset *Asset) (bool, error) {
   432  	if crashAsset.Type == dashapi.MountInRepro {
   433  		// We keed mount images from reproducers for as long as the bug is still relevant.
   434  		// They're not that big to set stricter limits.
   435  		return ad.bugStatusPolicy(crashKey, crashAsset)
   436  	}
   437  	return false, fmt.Errorf("no deprecation policy for %s", crashAsset.Type)
   438  }
   439  
   440  func (ad *crashAssetDeprecator) bugStatusPolicy(crashKey *db.Key, crashAsset *Asset) (bool, error) {
   441  	bugKey := crashKey.Parent()
   442  	bug := new(Bug)
   443  	err := db.Get(ad.c, bugKey, bug)
   444  	if err != nil {
   445  		return false, fmt.Errorf("failed to query bug: %w", err)
   446  	}
   447  	return bug.Status == BugStatusOpen ||
   448  		bug.Closed.After(timeNow(ad.c).Add(-keepAssetsForClosedBugs)), nil
   449  }
   450  
   451  func (ad *crashAssetDeprecator) updateCrash(crashKey *db.Key, urlsToDelete []string) error {
   452  	tx := func(c context.Context) error {
   453  		crash := new(Crash)
   454  		err := db.Get(c, crashKey, crash)
   455  		if err != nil {
   456  			// Assume the DB has been updated in the meanwhile.
   457  			return nil
   458  		}
   459  		crash.Assets = filterOutAssets(crash.Assets, urlsToDelete)
   460  		crash.AssetsLastCheck = timeNow(ad.c)
   461  		if _, err := db.Put(ad.c, crashKey, crash); err != nil {
   462  			return fmt.Errorf("failed to save crash: %w", err)
   463  		}
   464  		return nil
   465  	}
   466  	if err := db.RunInTransaction(ad.c, tx, &db.TransactionOptions{Attempts: 10}); err != nil {
   467  		return fmt.Errorf("failed to update crash: %w", err)
   468  	}
   469  	return nil
   470  }
   471  
   472  func queryLatestManagerAssets(c context.Context, ns string, assetType dashapi.AssetType,
   473  	period time.Duration) (map[string]Asset, error) {
   474  	var builds []*Build
   475  	startTime := timeNow(c).Add(-period)
   476  	query := db.NewQuery("Build")
   477  	if ns != "" {
   478  		query = query.Filter("Namespace=", ns)
   479  	}
   480  	_, err := query.Filter("Assets.Type=", assetType).
   481  		Filter("Assets.CreateDate>", startTime).
   482  		Order("Assets.CreateDate").
   483  		GetAll(c, &builds)
   484  	if err != nil {
   485  		return nil, err
   486  	}
   487  	ret := map[string]Asset{}
   488  	for _, build := range builds {
   489  		for _, asset := range build.Assets {
   490  			if asset.Type != assetType {
   491  				continue
   492  			}
   493  			ret[build.Manager] = asset
   494  		}
   495  	}
   496  	return ret, nil
   497  }
   498  
   499  func createAssetList(build *Build, crash *Crash, forReport bool) []dashapi.Asset {
   500  	var crashAssets []Asset
   501  	if crash != nil {
   502  		crashAssets = crash.Assets
   503  	}
   504  	assetList := []dashapi.Asset{}
   505  	for _, reportAsset := range append(build.Assets, crashAssets...) {
   506  		typeDescr := asset.GetTypeDescription(reportAsset.Type)
   507  		if typeDescr == nil || forReport && typeDescr.NoReporting {
   508  			continue
   509  		}
   510  		assetList = append(assetList, dashapi.Asset{
   511  			Title:       typeDescr.GetTitle(targets.Get(build.OS, build.Arch)),
   512  			DownloadURL: reportAsset.DownloadURL,
   513  			Type:        reportAsset.Type,
   514  		})
   515  	}
   516  	sort.SliceStable(assetList, func(i, j int) bool {
   517  		return asset.GetTypeDescription(assetList[i].Type).ReportingPrio <
   518  			asset.GetTypeDescription(assetList[j].Type).ReportingPrio
   519  	})
   520  	handleDupAssetTitles(assetList)
   521  	return assetList
   522  }
   523  
   524  // Convert asset lists like {"Mounted image", "Mounted image"} to {"Mounted image #1", "Mounted image #2"}.
   525  func handleDupAssetTitles(assetList []dashapi.Asset) {
   526  	duplicates := map[string]bool{}
   527  	for _, asset := range assetList {
   528  		if _, ok := duplicates[asset.Title]; ok {
   529  			duplicates[asset.Title] = true
   530  		} else {
   531  			duplicates[asset.Title] = false
   532  		}
   533  	}
   534  	counts := map[string]int{}
   535  	for i, asset := range assetList {
   536  		if !duplicates[asset.Title] {
   537  			continue
   538  		}
   539  		counts[asset.Title]++
   540  		assetList[i].Title = fmt.Sprintf("%s #%d", asset.Title, counts[asset.Title])
   541  	}
   542  }