github.com/nya3jp/tast@v0.0.0-20230601000426-85c8e4d83a9b/src/go.chromium.org/tast/core/internal/extdata/extdata.go (about)

     1  // Copyright 2018 The ChromiumOS Authors
     2  // Use of this source code is governed by a BSD-style license that can be
     3  // found in the LICENSE file.
     4  
     5  // Package extdata implements the external data file mechanism.
     6  package extdata
     7  
     8  import (
     9  	"context"
    10  	"crypto/sha256"
    11  	"encoding/hex"
    12  	"encoding/json"
    13  	"errors"
    14  	"fmt"
    15  	"io"
    16  	"io/ioutil"
    17  	"os"
    18  	"path/filepath"
    19  	"reflect"
    20  	"sort"
    21  	"strings"
    22  	"time"
    23  
    24  	"go.chromium.org/tast/core/internal/devserver"
    25  	"go.chromium.org/tast/core/internal/logging"
    26  	"go.chromium.org/tast/core/internal/protocol"
    27  	"go.chromium.org/tast/core/internal/testing"
    28  )
    29  
    30  // LinkType represents a type of an external data link.
    31  type LinkType string
    32  
    33  const (
    34  	// TypeStatic is for a link to a file on web with fixed URL and content.
    35  	TypeStatic LinkType = ""
    36  
    37  	// TypeArtifact is for a link to a file in ChromeOS build artifacts
    38  	// corresponding to the DUT image version.
    39  	TypeArtifact LinkType = "artifact"
    40  )
    41  
    42  // LinkData defines the schema of external data link files.
    43  type LinkData struct {
    44  	// Type declares the type of the external data link.
    45  	Type LinkType `json:"type"`
    46  
    47  	// StaticURL is the URL of the static external data file on Google Cloud Storage.
    48  	// This field is valid for static external data links only.
    49  	StaticURL string `json:"url"`
    50  
    51  	// Size is the size of the external data file in bytes.
    52  	// This field is valid for static external data links only.
    53  	Size int64 `json:"size"`
    54  
    55  	// Size is SHA256 hash of the external data file.
    56  	// This field is valid for static external data links only.
    57  	SHA256Sum string `json:"sha256sum"`
    58  
    59  	// Name is the file name of a build artifact.
    60  	// This field is valid for build artifact external data links only.
    61  	Name string `json:"name"`
    62  
    63  	// Executable specifies whether the external data file is executable.
    64  	// If this is true, executable permission is given to the downloaded file.
    65  	Executable bool `json:"executable"`
    66  }
    67  
    68  // link holds information of an external data link.
    69  type link struct {
    70  	// Data holds the original LinkData.
    71  	Data LinkData
    72  
    73  	// ComputedURL is the URL of the external data file on Google Cloud Storage.
    74  	ComputedURL string
    75  }
    76  
    77  // newLink creates link from LinkData.
    78  func newLink(d *LinkData, artifactsURL string) (*link, error) {
    79  	switch d.Type {
    80  	case TypeStatic:
    81  		if d.StaticURL == "" {
    82  			return nil, errors.New("url field must not be empty for static external data file")
    83  		}
    84  		if d.Name != "" {
    85  			return nil, errors.New("name field must be empty for static external data file")
    86  		}
    87  		if d.SHA256Sum == "" {
    88  			return nil, errors.New("sha256sum field must not be empty for static external data file")
    89  		}
    90  		return &link{Data: *d, ComputedURL: d.StaticURL}, nil
    91  	case TypeArtifact:
    92  		if d.StaticURL != "" {
    93  			return nil, errors.New("url field must be empty for artifact external data file")
    94  		}
    95  		if d.Name == "" {
    96  			return nil, errors.New("name field must not be empty for artifact external data file")
    97  		}
    98  		if d.SHA256Sum != "" {
    99  			return nil, errors.New("sha256sum field must be empty for artifact external data file")
   100  		}
   101  		if d.Size != 0 {
   102  			return nil, errors.New("size field must be empty for artifact external data file")
   103  		}
   104  		if artifactsURL == "" {
   105  			return nil, errors.New("build artifact URL is unknown (running a developer build?)")
   106  		}
   107  		return &link{Data: *d, ComputedURL: artifactsURL + d.Name}, nil
   108  	default:
   109  		return nil, fmt.Errorf("unknown external data link type %q", d.Type)
   110  	}
   111  }
   112  
   113  // DownloadJob represents a job to download an external data file and make hard links
   114  // at several file paths.
   115  type DownloadJob struct {
   116  	link  *link
   117  	dests []string
   118  }
   119  
   120  // downloadResult represents a result of a DownloadJob.
   121  type downloadResult struct {
   122  	job      *DownloadJob
   123  	duration time.Duration
   124  	size     int64
   125  	err      error
   126  }
   127  
   128  // Manager manages operations for external data files.
   129  type Manager struct {
   130  	dataDir      string
   131  	artifactsURL string
   132  	all          []string // all the locations external data files can exist.
   133  	// inuse is a mutable field that maps external data files to the number
   134  	// of entities currently using it.
   135  	inuse map[string]int
   136  }
   137  
   138  // NewManager creates a new Manager.
   139  //
   140  // dataDir is the path to the base directory containing external data link files
   141  // (typically "/usr/local/share/tast/data" on DUT). artifactURL is the URL of
   142  // Google Cloud Storage directory, ending with a slash, containing build
   143  // artifacts for the current ChromeOS image.
   144  func NewManager(ctx context.Context, dataDir, artifactsURL string) (*Manager, error) {
   145  	var all []string
   146  	if err := filepath.Walk(dataDir, func(linkPath string, info os.FileInfo, err error) error {
   147  		if err != nil {
   148  			return err
   149  		}
   150  		if !strings.HasSuffix(linkPath, testing.ExternalLinkSuffix) {
   151  			return nil
   152  		}
   153  		destPath := strings.TrimSuffix(linkPath, testing.ExternalLinkSuffix)
   154  		all = append(all, destPath)
   155  		return nil
   156  	}); err != nil && !os.IsNotExist(err) {
   157  		return nil, fmt.Errorf("Failed to walk data directory: %v", err)
   158  	}
   159  	sort.Strings(all)
   160  
   161  	return &Manager{
   162  		dataDir:      dataDir,
   163  		artifactsURL: artifactsURL,
   164  		all:          all,
   165  		inuse:        make(map[string]int),
   166  	}, nil
   167  }
   168  
   169  // Purgeable returns a list of external data file paths not needed by the
   170  // currently running entities. They can be deleted if the disk space is low.
   171  func (m *Manager) Purgeable() []string {
   172  	var res []string
   173  	for _, p := range m.all {
   174  		if m.inuse[p] > 0 {
   175  			continue
   176  		}
   177  		if _, err := os.Stat(p); err == nil {
   178  			res = append(res, p)
   179  		}
   180  	}
   181  	return res
   182  }
   183  
   184  // PrepareDownloads computes a list of external data files that need to be
   185  // downloaded for entities.
   186  //
   187  // PrepareDownloads also removes stale files so they are never used even if we
   188  // fail to download them later. When it encounters errors, *.external-error
   189  // files are saved so that they can be read and reported by bundles later.
   190  //
   191  // PrepareDownloads returns a list of download job specifications that can be
   192  // passed to RunDownloads to perform actual downloads.
   193  //
   194  // release must be called after entities finish.
   195  func (m *Manager) PrepareDownloads(ctx context.Context, entities []*protocol.Entity) (jobs []*DownloadJob, release func()) {
   196  	urlToJob := make(map[string]*DownloadJob)
   197  	hasErr := false
   198  
   199  	var releaseFunc []func()
   200  
   201  	// Process tests.
   202  	for _, t := range entities {
   203  		for _, name := range t.GetDependencies().GetDataFiles() {
   204  			destPath := filepath.Join(m.dataDir, testing.RelativeDataDir(t.Package), name)
   205  			linkPath := destPath + testing.ExternalLinkSuffix
   206  			errorPath := destPath + testing.ExternalErrorSuffix
   207  
   208  			reportErr := func(format string, args ...interface{}) {
   209  				msg := fmt.Sprintf("failed to prepare downloading %s: %s", name, fmt.Sprintf(format, args...))
   210  				logging.Info(ctx, strings.ToUpper(msg[:1])+msg[1:])
   211  				ioutil.WriteFile(errorPath, []byte(msg), 0666)
   212  				hasErr = true
   213  			}
   214  
   215  			// Clear the error message first.
   216  			os.Remove(errorPath)
   217  
   218  			_, err := os.Stat(linkPath)
   219  			if os.IsNotExist(err) {
   220  				// Not an external data file.
   221  				continue
   222  			} else if err != nil {
   223  				reportErr("failed to stat %s: %v", linkPath, err)
   224  				continue
   225  			}
   226  
   227  			link, err := loadLink(linkPath, m.artifactsURL)
   228  			if err != nil {
   229  				reportErr("failed to load %s: %v", linkPath, err)
   230  				continue
   231  			}
   232  
   233  			// This file is not purgeable.
   234  			m.inuse[destPath]++
   235  			releaseFunc = append(releaseFunc, func() {
   236  				m.inuse[destPath]--
   237  			})
   238  
   239  			// Decide if we need to update the destination file.
   240  			needed := false
   241  			f, err := os.Open(destPath)
   242  			if err == nil {
   243  				needed = shouldDownload(f, link, destPath)
   244  				f.Close()
   245  
   246  				if needed {
   247  					// Remove the stale file early so that they are never used.
   248  					if err := os.Remove(destPath); err != nil {
   249  						reportErr("failed to remove stale file %s: %v", destPath, err)
   250  						continue
   251  					}
   252  				}
   253  			} else if os.IsNotExist(err) {
   254  				needed = true
   255  			} else {
   256  				reportErr("failed to stat %s: %v", destPath, err)
   257  				continue
   258  			}
   259  
   260  			// To check consistency, create an entry in urlToJob even if we are not updating the destination file.
   261  			job := urlToJob[link.ComputedURL]
   262  			if job == nil {
   263  				job = &DownloadJob{link, nil}
   264  				urlToJob[link.ComputedURL] = job
   265  			} else if !reflect.DeepEqual(job.link, link) {
   266  				reportErr("conflicting external data link found at %s: got %+v, want %+v", filepath.Join(testing.RelativeDataDir(t.Package), name), link, job.link)
   267  				continue
   268  			}
   269  
   270  			if needed {
   271  				// Use O(n^2) algorithm assuming the number of duplicates is small.
   272  				dup := false
   273  				for _, d := range job.dests {
   274  					if d == destPath {
   275  						dup = true
   276  						break
   277  					}
   278  				}
   279  				if !dup {
   280  					job.dests = append(job.dests, destPath)
   281  				}
   282  			}
   283  		}
   284  	}
   285  
   286  	for _, j := range urlToJob {
   287  		if len(j.dests) > 0 {
   288  			jobs = append(jobs, j)
   289  		}
   290  	}
   291  	sort.Slice(jobs, func(i, j int) bool {
   292  		return jobs[i].link.ComputedURL < jobs[j].link.ComputedURL
   293  	})
   294  
   295  	logging.Infof(ctx, "Found %d external linked data file(s), need to download %d", len(urlToJob), len(jobs))
   296  	if hasErr {
   297  		logging.Info(ctx, "Encountered some errors on scanning external data link files, but continuing anyway; corresponding tests will fail")
   298  	}
   299  	return jobs, func() {
   300  		for _, f := range releaseFunc {
   301  			f()
   302  		}
   303  	}
   304  }
   305  
   306  // writeExternalURLRecord record url source of external file.
   307  func writeExternalURLRecord(ctx context.Context, job *DownloadJob) {
   308  	if job.link.Data.Type == TypeArtifact {
   309  		for _, dest := range job.dests {
   310  			urlRecordFile := dest + testing.ExternalURLSuffix
   311  			err := os.WriteFile(urlRecordFile, []byte(job.link.ComputedURL), 0666)
   312  
   313  			if err != nil {
   314  				// Non critical error.
   315  				msg := fmt.Sprintf("Failed to write urlRecord %s, content: %s: %v", urlRecordFile, job.link.ComputedURL, err)
   316  				logging.Info(ctx, msg)
   317  			}
   318  		}
   319  	}
   320  }
   321  
   322  // loadLink loads a JSON file of LinkData.
   323  func loadLink(path, artifactsURL string) (*link, error) {
   324  	f, err := os.Open(path)
   325  	if err != nil {
   326  		return nil, err
   327  	}
   328  	defer f.Close()
   329  
   330  	var d LinkData
   331  	if err := json.NewDecoder(f).Decode(&d); err != nil {
   332  		return nil, err
   333  	}
   334  
   335  	l, err := newLink(&d, artifactsURL)
   336  	if err != nil {
   337  		return nil, err
   338  	}
   339  	return l, nil
   340  }
   341  
   342  // RunDownloads downloads required external data files in parallel.
   343  //
   344  // dataDir is the path to the base directory containing external data link files
   345  // (typically "/usr/local/share/tast/data" on DUT). jobs are typically obtained
   346  // by calling PrepareDownloads.
   347  //
   348  // This function does not return errors; instead it tries to download files as
   349  // far as possible and logs encountered errors with ctx so that a single
   350  // download error does not cause all tests to fail.
   351  func RunDownloads(ctx context.Context, dataDir string, jobs []*DownloadJob, cl devserver.Client) {
   352  	jobCh := make(chan *DownloadJob, len(jobs))
   353  	for _, job := range jobs {
   354  		jobCh <- job
   355  	}
   356  	close(jobCh)
   357  
   358  	const parallelism = 4
   359  	resCh := make(chan *downloadResult, len(jobs))
   360  	for i := 0; i < parallelism; i++ {
   361  		go func() {
   362  			for job := range jobCh {
   363  				start := time.Now()
   364  				size, err := runDownload(ctx, dataDir, job, cl)
   365  				duration := time.Since(start)
   366  				resCh <- &downloadResult{job, duration, size, err}
   367  			}
   368  		}()
   369  	}
   370  
   371  	hasErr := false
   372  	finished := 0
   373  	for finished < len(jobs) {
   374  		select {
   375  		case res := <-resCh:
   376  			if res.err != nil {
   377  				msg := fmt.Sprintf("failed to download %s: %v", res.job.link.ComputedURL, res.err)
   378  				logging.Info(ctx, strings.ToUpper(msg[:1])+msg[1:])
   379  				for _, dest := range res.job.dests {
   380  					ioutil.WriteFile(dest+testing.ExternalErrorSuffix, []byte(msg), 0666)
   381  				}
   382  				hasErr = true
   383  			} else {
   384  				mbs := float64(res.size) / res.duration.Seconds() / 1024 / 1024
   385  				logging.Infof(ctx, "Finished downloading %s (%d bytes, %v, %.1fMB/s)",
   386  					res.job.link.ComputedURL, res.size, res.duration.Round(time.Millisecond), mbs)
   387  				writeExternalURLRecord(ctx, res.job)
   388  			}
   389  			finished++
   390  		case <-time.After(30 * time.Second):
   391  			// Without this keep-alive message, the tast command may think that the SSH connection was lost.
   392  			// TODO(nya): Remove this keep-alive logic after 20190701.
   393  			logging.Info(ctx, "Still downloading...")
   394  		}
   395  	}
   396  
   397  	if hasErr {
   398  		logging.Info(ctx, "Failed to download some external data files, but continuing anyway; corresponding tests will fail")
   399  	}
   400  }
   401  
   402  // runDownload downloads an external data file.
   403  func runDownload(ctx context.Context, dataDir string, job *DownloadJob, cl devserver.Client) (size int64, retErr error) {
   404  	// Create the temporary file under dataDir to make use of hard links.
   405  	f, err := ioutil.TempFile(dataDir, ".external-download.")
   406  	if err != nil {
   407  		return 0, err
   408  	}
   409  	defer os.Remove(f.Name())
   410  	defer func() {
   411  		if err := f.Close(); err != nil && retErr == nil {
   412  			retErr = err
   413  		}
   414  	}()
   415  
   416  	var mode os.FileMode = 0644
   417  	if job.link.Data.Executable {
   418  		mode = 0755
   419  	}
   420  	if err := f.Chmod(mode); err != nil {
   421  		return 0, err
   422  	}
   423  
   424  	r, err := cl.Open(ctx, job.link.ComputedURL)
   425  	if err != nil {
   426  		return 0, err
   427  	}
   428  	defer r.Close()
   429  
   430  	size, err = io.Copy(f, r)
   431  	if err != nil {
   432  		return size, err
   433  	}
   434  
   435  	if err := verify(f, job.link); err != nil {
   436  		return size, err
   437  	}
   438  
   439  	for _, dest := range job.dests {
   440  		if err := os.Remove(dest); err != nil && !os.IsNotExist(err) {
   441  			return size, err
   442  		}
   443  
   444  		if err := os.Link(f.Name(), dest); err != nil {
   445  			return size, err
   446  		}
   447  	}
   448  	return size, nil
   449  }
   450  
   451  // shouldDownload decides if the file needs to be downloaded
   452  func shouldDownload(f *os.File, link *link, destPath string) bool {
   453  
   454  	if link.Data.Type == TypeArtifact {
   455  		// For Artifact type, we check for staleness of the previous downloaded files.
   456  		// Staleness check is done by comparing current download url to previusly download
   457  		// url
   458  		urlRecordPath := destPath + testing.ExternalURLSuffix
   459  
   460  		bytes, err := os.ReadFile(urlRecordPath)
   461  		// url record does not exists, meaning file previously was never downloaded.
   462  		// Lets download it.
   463  		if err != nil {
   464  			return true
   465  		}
   466  		// url record exists. Lets examine the record.
   467  		urlRecord := string(bytes)
   468  		return link.ComputedURL != urlRecord
   469  	}
   470  	return verify(f, link) != nil
   471  }
   472  
   473  // verify checks the integrity of an external data file.
   474  func verify(f *os.File, link *link) error {
   475  	if link.Data.Type == TypeArtifact {
   476  		// For artifacts, we do not verify files.
   477  		return nil
   478  	}
   479  
   480  	fi, err := f.Stat()
   481  	if err != nil {
   482  		return err
   483  	}
   484  	if fi.Size() != link.Data.Size {
   485  		return fmt.Errorf("file size mismatch; got %d bytes, want %d bytes", fi.Size(), link.Data.Size)
   486  	}
   487  
   488  	if _, err := f.Seek(0, 0); err != nil {
   489  		return err
   490  	}
   491  
   492  	hasher := sha256.New()
   493  	if _, err := io.Copy(hasher, f); err != nil {
   494  		return fmt.Errorf("failed to compute hash: %v", err)
   495  	}
   496  	hash := hex.EncodeToString(hasher.Sum(nil))
   497  	if hash != link.Data.SHA256Sum {
   498  		return fmt.Errorf("hash mismatch; got %s, want %s", hash, link.Data.SHA256Sum)
   499  	}
   500  	return nil
   501  }