github.com/nya3jp/tast@v0.0.0-20230601000426-85c8e4d83a9b/src/go.chromium.org/tast/core/internal/extdata/extdata.go (about) 1 // Copyright 2018 The ChromiumOS Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Package extdata implements the external data file mechanism. 6 package extdata 7 8 import ( 9 "context" 10 "crypto/sha256" 11 "encoding/hex" 12 "encoding/json" 13 "errors" 14 "fmt" 15 "io" 16 "io/ioutil" 17 "os" 18 "path/filepath" 19 "reflect" 20 "sort" 21 "strings" 22 "time" 23 24 "go.chromium.org/tast/core/internal/devserver" 25 "go.chromium.org/tast/core/internal/logging" 26 "go.chromium.org/tast/core/internal/protocol" 27 "go.chromium.org/tast/core/internal/testing" 28 ) 29 30 // LinkType represents a type of an external data link. 31 type LinkType string 32 33 const ( 34 // TypeStatic is for a link to a file on web with fixed URL and content. 35 TypeStatic LinkType = "" 36 37 // TypeArtifact is for a link to a file in ChromeOS build artifacts 38 // corresponding to the DUT image version. 39 TypeArtifact LinkType = "artifact" 40 ) 41 42 // LinkData defines the schema of external data link files. 43 type LinkData struct { 44 // Type declares the type of the external data link. 45 Type LinkType `json:"type"` 46 47 // StaticURL is the URL of the static external data file on Google Cloud Storage. 48 // This field is valid for static external data links only. 49 StaticURL string `json:"url"` 50 51 // Size is the size of the external data file in bytes. 52 // This field is valid for static external data links only. 53 Size int64 `json:"size"` 54 55 // Size is SHA256 hash of the external data file. 56 // This field is valid for static external data links only. 57 SHA256Sum string `json:"sha256sum"` 58 59 // Name is the file name of a build artifact. 60 // This field is valid for build artifact external data links only. 61 Name string `json:"name"` 62 63 // Executable specifies whether the external data file is executable. 64 // If this is true, executable permission is given to the downloaded file. 65 Executable bool `json:"executable"` 66 } 67 68 // link holds information of an external data link. 69 type link struct { 70 // Data holds the original LinkData. 71 Data LinkData 72 73 // ComputedURL is the URL of the external data file on Google Cloud Storage. 74 ComputedURL string 75 } 76 77 // newLink creates link from LinkData. 78 func newLink(d *LinkData, artifactsURL string) (*link, error) { 79 switch d.Type { 80 case TypeStatic: 81 if d.StaticURL == "" { 82 return nil, errors.New("url field must not be empty for static external data file") 83 } 84 if d.Name != "" { 85 return nil, errors.New("name field must be empty for static external data file") 86 } 87 if d.SHA256Sum == "" { 88 return nil, errors.New("sha256sum field must not be empty for static external data file") 89 } 90 return &link{Data: *d, ComputedURL: d.StaticURL}, nil 91 case TypeArtifact: 92 if d.StaticURL != "" { 93 return nil, errors.New("url field must be empty for artifact external data file") 94 } 95 if d.Name == "" { 96 return nil, errors.New("name field must not be empty for artifact external data file") 97 } 98 if d.SHA256Sum != "" { 99 return nil, errors.New("sha256sum field must be empty for artifact external data file") 100 } 101 if d.Size != 0 { 102 return nil, errors.New("size field must be empty for artifact external data file") 103 } 104 if artifactsURL == "" { 105 return nil, errors.New("build artifact URL is unknown (running a developer build?)") 106 } 107 return &link{Data: *d, ComputedURL: artifactsURL + d.Name}, nil 108 default: 109 return nil, fmt.Errorf("unknown external data link type %q", d.Type) 110 } 111 } 112 113 // DownloadJob represents a job to download an external data file and make hard links 114 // at several file paths. 115 type DownloadJob struct { 116 link *link 117 dests []string 118 } 119 120 // downloadResult represents a result of a DownloadJob. 121 type downloadResult struct { 122 job *DownloadJob 123 duration time.Duration 124 size int64 125 err error 126 } 127 128 // Manager manages operations for external data files. 129 type Manager struct { 130 dataDir string 131 artifactsURL string 132 all []string // all the locations external data files can exist. 133 // inuse is a mutable field that maps external data files to the number 134 // of entities currently using it. 135 inuse map[string]int 136 } 137 138 // NewManager creates a new Manager. 139 // 140 // dataDir is the path to the base directory containing external data link files 141 // (typically "/usr/local/share/tast/data" on DUT). artifactURL is the URL of 142 // Google Cloud Storage directory, ending with a slash, containing build 143 // artifacts for the current ChromeOS image. 144 func NewManager(ctx context.Context, dataDir, artifactsURL string) (*Manager, error) { 145 var all []string 146 if err := filepath.Walk(dataDir, func(linkPath string, info os.FileInfo, err error) error { 147 if err != nil { 148 return err 149 } 150 if !strings.HasSuffix(linkPath, testing.ExternalLinkSuffix) { 151 return nil 152 } 153 destPath := strings.TrimSuffix(linkPath, testing.ExternalLinkSuffix) 154 all = append(all, destPath) 155 return nil 156 }); err != nil && !os.IsNotExist(err) { 157 return nil, fmt.Errorf("Failed to walk data directory: %v", err) 158 } 159 sort.Strings(all) 160 161 return &Manager{ 162 dataDir: dataDir, 163 artifactsURL: artifactsURL, 164 all: all, 165 inuse: make(map[string]int), 166 }, nil 167 } 168 169 // Purgeable returns a list of external data file paths not needed by the 170 // currently running entities. They can be deleted if the disk space is low. 171 func (m *Manager) Purgeable() []string { 172 var res []string 173 for _, p := range m.all { 174 if m.inuse[p] > 0 { 175 continue 176 } 177 if _, err := os.Stat(p); err == nil { 178 res = append(res, p) 179 } 180 } 181 return res 182 } 183 184 // PrepareDownloads computes a list of external data files that need to be 185 // downloaded for entities. 186 // 187 // PrepareDownloads also removes stale files so they are never used even if we 188 // fail to download them later. When it encounters errors, *.external-error 189 // files are saved so that they can be read and reported by bundles later. 190 // 191 // PrepareDownloads returns a list of download job specifications that can be 192 // passed to RunDownloads to perform actual downloads. 193 // 194 // release must be called after entities finish. 195 func (m *Manager) PrepareDownloads(ctx context.Context, entities []*protocol.Entity) (jobs []*DownloadJob, release func()) { 196 urlToJob := make(map[string]*DownloadJob) 197 hasErr := false 198 199 var releaseFunc []func() 200 201 // Process tests. 202 for _, t := range entities { 203 for _, name := range t.GetDependencies().GetDataFiles() { 204 destPath := filepath.Join(m.dataDir, testing.RelativeDataDir(t.Package), name) 205 linkPath := destPath + testing.ExternalLinkSuffix 206 errorPath := destPath + testing.ExternalErrorSuffix 207 208 reportErr := func(format string, args ...interface{}) { 209 msg := fmt.Sprintf("failed to prepare downloading %s: %s", name, fmt.Sprintf(format, args...)) 210 logging.Info(ctx, strings.ToUpper(msg[:1])+msg[1:]) 211 ioutil.WriteFile(errorPath, []byte(msg), 0666) 212 hasErr = true 213 } 214 215 // Clear the error message first. 216 os.Remove(errorPath) 217 218 _, err := os.Stat(linkPath) 219 if os.IsNotExist(err) { 220 // Not an external data file. 221 continue 222 } else if err != nil { 223 reportErr("failed to stat %s: %v", linkPath, err) 224 continue 225 } 226 227 link, err := loadLink(linkPath, m.artifactsURL) 228 if err != nil { 229 reportErr("failed to load %s: %v", linkPath, err) 230 continue 231 } 232 233 // This file is not purgeable. 234 m.inuse[destPath]++ 235 releaseFunc = append(releaseFunc, func() { 236 m.inuse[destPath]-- 237 }) 238 239 // Decide if we need to update the destination file. 240 needed := false 241 f, err := os.Open(destPath) 242 if err == nil { 243 needed = shouldDownload(f, link, destPath) 244 f.Close() 245 246 if needed { 247 // Remove the stale file early so that they are never used. 248 if err := os.Remove(destPath); err != nil { 249 reportErr("failed to remove stale file %s: %v", destPath, err) 250 continue 251 } 252 } 253 } else if os.IsNotExist(err) { 254 needed = true 255 } else { 256 reportErr("failed to stat %s: %v", destPath, err) 257 continue 258 } 259 260 // To check consistency, create an entry in urlToJob even if we are not updating the destination file. 261 job := urlToJob[link.ComputedURL] 262 if job == nil { 263 job = &DownloadJob{link, nil} 264 urlToJob[link.ComputedURL] = job 265 } else if !reflect.DeepEqual(job.link, link) { 266 reportErr("conflicting external data link found at %s: got %+v, want %+v", filepath.Join(testing.RelativeDataDir(t.Package), name), link, job.link) 267 continue 268 } 269 270 if needed { 271 // Use O(n^2) algorithm assuming the number of duplicates is small. 272 dup := false 273 for _, d := range job.dests { 274 if d == destPath { 275 dup = true 276 break 277 } 278 } 279 if !dup { 280 job.dests = append(job.dests, destPath) 281 } 282 } 283 } 284 } 285 286 for _, j := range urlToJob { 287 if len(j.dests) > 0 { 288 jobs = append(jobs, j) 289 } 290 } 291 sort.Slice(jobs, func(i, j int) bool { 292 return jobs[i].link.ComputedURL < jobs[j].link.ComputedURL 293 }) 294 295 logging.Infof(ctx, "Found %d external linked data file(s), need to download %d", len(urlToJob), len(jobs)) 296 if hasErr { 297 logging.Info(ctx, "Encountered some errors on scanning external data link files, but continuing anyway; corresponding tests will fail") 298 } 299 return jobs, func() { 300 for _, f := range releaseFunc { 301 f() 302 } 303 } 304 } 305 306 // writeExternalURLRecord record url source of external file. 307 func writeExternalURLRecord(ctx context.Context, job *DownloadJob) { 308 if job.link.Data.Type == TypeArtifact { 309 for _, dest := range job.dests { 310 urlRecordFile := dest + testing.ExternalURLSuffix 311 err := os.WriteFile(urlRecordFile, []byte(job.link.ComputedURL), 0666) 312 313 if err != nil { 314 // Non critical error. 315 msg := fmt.Sprintf("Failed to write urlRecord %s, content: %s: %v", urlRecordFile, job.link.ComputedURL, err) 316 logging.Info(ctx, msg) 317 } 318 } 319 } 320 } 321 322 // loadLink loads a JSON file of LinkData. 323 func loadLink(path, artifactsURL string) (*link, error) { 324 f, err := os.Open(path) 325 if err != nil { 326 return nil, err 327 } 328 defer f.Close() 329 330 var d LinkData 331 if err := json.NewDecoder(f).Decode(&d); err != nil { 332 return nil, err 333 } 334 335 l, err := newLink(&d, artifactsURL) 336 if err != nil { 337 return nil, err 338 } 339 return l, nil 340 } 341 342 // RunDownloads downloads required external data files in parallel. 343 // 344 // dataDir is the path to the base directory containing external data link files 345 // (typically "/usr/local/share/tast/data" on DUT). jobs are typically obtained 346 // by calling PrepareDownloads. 347 // 348 // This function does not return errors; instead it tries to download files as 349 // far as possible and logs encountered errors with ctx so that a single 350 // download error does not cause all tests to fail. 351 func RunDownloads(ctx context.Context, dataDir string, jobs []*DownloadJob, cl devserver.Client) { 352 jobCh := make(chan *DownloadJob, len(jobs)) 353 for _, job := range jobs { 354 jobCh <- job 355 } 356 close(jobCh) 357 358 const parallelism = 4 359 resCh := make(chan *downloadResult, len(jobs)) 360 for i := 0; i < parallelism; i++ { 361 go func() { 362 for job := range jobCh { 363 start := time.Now() 364 size, err := runDownload(ctx, dataDir, job, cl) 365 duration := time.Since(start) 366 resCh <- &downloadResult{job, duration, size, err} 367 } 368 }() 369 } 370 371 hasErr := false 372 finished := 0 373 for finished < len(jobs) { 374 select { 375 case res := <-resCh: 376 if res.err != nil { 377 msg := fmt.Sprintf("failed to download %s: %v", res.job.link.ComputedURL, res.err) 378 logging.Info(ctx, strings.ToUpper(msg[:1])+msg[1:]) 379 for _, dest := range res.job.dests { 380 ioutil.WriteFile(dest+testing.ExternalErrorSuffix, []byte(msg), 0666) 381 } 382 hasErr = true 383 } else { 384 mbs := float64(res.size) / res.duration.Seconds() / 1024 / 1024 385 logging.Infof(ctx, "Finished downloading %s (%d bytes, %v, %.1fMB/s)", 386 res.job.link.ComputedURL, res.size, res.duration.Round(time.Millisecond), mbs) 387 writeExternalURLRecord(ctx, res.job) 388 } 389 finished++ 390 case <-time.After(30 * time.Second): 391 // Without this keep-alive message, the tast command may think that the SSH connection was lost. 392 // TODO(nya): Remove this keep-alive logic after 20190701. 393 logging.Info(ctx, "Still downloading...") 394 } 395 } 396 397 if hasErr { 398 logging.Info(ctx, "Failed to download some external data files, but continuing anyway; corresponding tests will fail") 399 } 400 } 401 402 // runDownload downloads an external data file. 403 func runDownload(ctx context.Context, dataDir string, job *DownloadJob, cl devserver.Client) (size int64, retErr error) { 404 // Create the temporary file under dataDir to make use of hard links. 405 f, err := ioutil.TempFile(dataDir, ".external-download.") 406 if err != nil { 407 return 0, err 408 } 409 defer os.Remove(f.Name()) 410 defer func() { 411 if err := f.Close(); err != nil && retErr == nil { 412 retErr = err 413 } 414 }() 415 416 var mode os.FileMode = 0644 417 if job.link.Data.Executable { 418 mode = 0755 419 } 420 if err := f.Chmod(mode); err != nil { 421 return 0, err 422 } 423 424 r, err := cl.Open(ctx, job.link.ComputedURL) 425 if err != nil { 426 return 0, err 427 } 428 defer r.Close() 429 430 size, err = io.Copy(f, r) 431 if err != nil { 432 return size, err 433 } 434 435 if err := verify(f, job.link); err != nil { 436 return size, err 437 } 438 439 for _, dest := range job.dests { 440 if err := os.Remove(dest); err != nil && !os.IsNotExist(err) { 441 return size, err 442 } 443 444 if err := os.Link(f.Name(), dest); err != nil { 445 return size, err 446 } 447 } 448 return size, nil 449 } 450 451 // shouldDownload decides if the file needs to be downloaded 452 func shouldDownload(f *os.File, link *link, destPath string) bool { 453 454 if link.Data.Type == TypeArtifact { 455 // For Artifact type, we check for staleness of the previous downloaded files. 456 // Staleness check is done by comparing current download url to previusly download 457 // url 458 urlRecordPath := destPath + testing.ExternalURLSuffix 459 460 bytes, err := os.ReadFile(urlRecordPath) 461 // url record does not exists, meaning file previously was never downloaded. 462 // Lets download it. 463 if err != nil { 464 return true 465 } 466 // url record exists. Lets examine the record. 467 urlRecord := string(bytes) 468 return link.ComputedURL != urlRecord 469 } 470 return verify(f, link) != nil 471 } 472 473 // verify checks the integrity of an external data file. 474 func verify(f *os.File, link *link) error { 475 if link.Data.Type == TypeArtifact { 476 // For artifacts, we do not verify files. 477 return nil 478 } 479 480 fi, err := f.Stat() 481 if err != nil { 482 return err 483 } 484 if fi.Size() != link.Data.Size { 485 return fmt.Errorf("file size mismatch; got %d bytes, want %d bytes", fi.Size(), link.Data.Size) 486 } 487 488 if _, err := f.Seek(0, 0); err != nil { 489 return err 490 } 491 492 hasher := sha256.New() 493 if _, err := io.Copy(hasher, f); err != nil { 494 return fmt.Errorf("failed to compute hash: %v", err) 495 } 496 hash := hex.EncodeToString(hasher.Sum(nil)) 497 if hash != link.Data.SHA256Sum { 498 return fmt.Errorf("hash mismatch; got %s, want %s", hash, link.Data.SHA256Sum) 499 } 500 return nil 501 }