github.com/choria-io/go-choria@v0.28.1-0.20240416190746-b3bf9c7d5a45/aagent/watchers/archivewatcher/archive.go (about) 1 // Copyright (c) 2021-2024, R.I. Pienaar and the Choria Project contributors 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 5 package archive 6 7 import ( 8 "archive/tar" 9 "compress/gzip" 10 "context" 11 "crypto/tls" 12 "fmt" 13 "io" 14 "net/http" 15 "net/url" 16 "os" 17 "path/filepath" 18 "strings" 19 "sync" 20 "time" 21 22 "github.com/choria-io/go-choria/aagent/model" 23 "github.com/choria-io/go-choria/aagent/util" 24 "github.com/choria-io/go-choria/aagent/watchers/event" 25 "github.com/choria-io/go-choria/aagent/watchers/watcher" 26 "github.com/choria-io/go-choria/build" 27 iu "github.com/choria-io/go-choria/internal/util" 28 ) 29 30 type State int 31 32 const ( 33 Unknown State = iota 34 Skipped 35 Error 36 VerifiedOK 37 Downloaded 38 VerifyFailed 39 MissingCreates 40 MissingChecksums 41 42 wtype = "archive" 43 version = "v1" 44 ) 45 46 var stateNames = map[State]string{ 47 Unknown: "unknown", 48 Skipped: "skipped", 49 Error: "error", 50 VerifiedOK: "verified", 51 Downloaded: "downloaded", 52 VerifyFailed: "verify_failed", 53 MissingCreates: "no_creates", 54 MissingChecksums: "no_checksums", 55 } 56 57 type Properties struct { 58 // ArchiveChecksum is a sha256 hex string of the archive being downloaded, requires ArchiveChecksumChecksum 59 ArchiveChecksum string `mapstructure:"checksum"` 60 // Creates is a subdirectory that the tarball will create on untar, it has to create a sub directory 61 Creates string 62 // Governor is the optional name of a governor to use for concurrency control 63 Governor string 64 // GovernorTimeout is how long we'll try to access the governor 65 GovernorTimeout time.Duration `mapstructure:"governor_timeout"` 66 // Insecure skips TLS verification on https downloads (not implemented) 67 Insecure bool 68 // Password for accessing the source, required when a username is set 69 Password string 70 // Source is a URL to the file being downloaded, only tar.gz format is supported 71 Source string 72 // TargetDirectory is the directory where the tarball will be extracted 73 TargetDirectory string `mapstructure:"target"` 74 // Timeout is how long HTTP operations are allowed to take 75 Timeout time.Duration 76 // Username is the username to use when downloading, Password is required in addition 77 Username string 78 // ContentChecksums a file in the archive made using sha256 used for verification of files in the archive after extraction and on every interval check 79 ContentChecksums string `mapstructure:"verify"` 80 // ContentChecksumsChecksum is a sha256 hex string of the file specified in ContentChecksums 81 ContentChecksumsChecksum string `mapstructure:"verify_checksum"` 82 } 83 84 type Watcher struct { 85 *watcher.Watcher 86 87 name string 88 machine model.Machine 89 previous State 90 interval time.Duration 91 previousRunTime time.Duration 92 previousSource string 93 properties *Properties 94 95 lastWatch time.Time 96 97 wmu *sync.Mutex 98 mu *sync.Mutex 99 } 100 101 func New(machine model.Machine, name string, states []string, failEvent string, successEvent string, interval string, ai time.Duration, rawprop map[string]any) (any, error) { 102 var err error 103 104 archive := &Watcher{ 105 name: name, 106 machine: machine, 107 properties: &Properties{}, 108 lastWatch: time.Time{}, 109 wmu: &sync.Mutex{}, 110 mu: &sync.Mutex{}, 111 } 112 113 archive.Watcher, err = watcher.NewWatcher(name, wtype, ai, states, machine, failEvent, successEvent) 114 if err != nil { 115 return nil, err 116 } 117 118 err = archive.setProperties(rawprop) 119 if err != nil { 120 return nil, fmt.Errorf("could not set properties: %v", err) 121 } 122 123 if interval != "" { 124 archive.interval, err = iu.ParseDuration(interval) 125 if err != nil { 126 return nil, fmt.Errorf("invalid interval: %v", err) 127 } 128 129 if archive.interval < 10*time.Second { 130 return nil, fmt.Errorf("interval %v is too small", archive.interval) 131 } 132 } 133 134 return archive, nil 135 } 136 137 func (w *Watcher) Run(ctx context.Context, wg *sync.WaitGroup) { 138 defer wg.Done() 139 140 w.Infof("archive watcher for %s starting", w.name) 141 142 if w.interval != 0 { 143 wg.Add(1) 144 go w.intervalWatcher(ctx, wg) 145 } 146 147 w.performWatch(ctx, false) 148 149 for { 150 select { 151 case <-w.Watcher.StateChangeC(): 152 w.performWatch(ctx, true) 153 154 case <-ctx.Done(): 155 w.Infof("Stopping on context interrupt") 156 w.CancelGovernor() 157 return 158 } 159 } 160 } 161 162 func (w *Watcher) verifyCreates() (string, State, error) { 163 creates := filepath.Join(w.properties.TargetDirectory, w.properties.Creates) 164 165 if !iu.FileIsDir(creates) { 166 return creates, MissingCreates, nil 167 } 168 169 if w.properties.ContentChecksums == "" { 170 return creates, VerifiedOK, nil 171 } 172 173 checksums := filepath.Join(creates, w.properties.ContentChecksums) 174 if !iu.FileExist(checksums) { 175 w.Errorf("Checksums file %s does not exist in %s, triggering download: %s", checksums, w.properties.ContentChecksums) 176 return creates, MissingChecksums, nil 177 } 178 179 // TODO: if verify fail on checksum fail of the sha256sums file should I remove the resulting files, 180 // they are probably compromised so should stop being used maybe a flag to control that 181 if w.properties.ContentChecksumsChecksum == "" { 182 return creates, VerifiedOK, nil 183 } 184 185 err := w.verify(creates) 186 if err == nil { 187 w.Infof("Checksums of %s verified successfully using %s", creates, w.properties.ContentChecksums) 188 return creates, VerifiedOK, nil 189 } 190 191 w.Errorf("Checksum verification failed, triggering download: %v", err) 192 193 return creates, VerifyFailed, nil 194 } 195 196 func (w *Watcher) watch(ctx context.Context) (state State, err error) { 197 if !w.ShouldWatch() { 198 return Skipped, nil 199 } 200 201 start := time.Now() 202 defer func() { 203 w.mu.Lock() 204 w.previousRunTime = time.Since(start) 205 w.mu.Unlock() 206 }() 207 208 creates, state, err := w.verifyCreates() 209 if err == nil && state == VerifiedOK { 210 return state, err 211 } 212 213 if w.properties.Governor != "" { 214 fin, err := w.EnterGovernor(ctx, w.properties.Governor, w.properties.GovernorTimeout) 215 if err != nil { 216 w.Errorf("Cannot enter Governor %s: %s", w.properties.Governor, err) 217 return Error, err 218 } 219 defer fin() 220 } 221 222 timeout, cancel := context.WithTimeout(ctx, w.properties.Timeout) 223 defer cancel() 224 225 tf, err := w.downloadSourceToTemp(timeout) 226 if tf != "" { 227 defer os.RemoveAll(filepath.Dir(tf)) 228 } 229 if err != nil { 230 return Error, fmt.Errorf("download failed: %s", err) 231 } 232 if tf == "" { 233 return Error, fmt.Errorf("unknown error downloading to temporary file") 234 } 235 236 td, err := w.extractAndVerifyToTemp(tf) 237 if err != nil { 238 return Error, fmt.Errorf("archive extraction failed: %s", err) 239 } 240 241 if iu.FileExist(creates) { 242 err = os.RemoveAll(creates) 243 if err != nil { 244 return Error, fmt.Errorf("removing current destination failed: %s", err) 245 } 246 } 247 248 if !iu.FileIsDir(w.properties.TargetDirectory) { 249 err = os.MkdirAll(w.properties.TargetDirectory, 0700) 250 if err != nil { 251 return Error, fmt.Errorf("could not create target directory: %s", err) 252 } 253 } 254 255 err = os.Rename(filepath.Join(td, w.properties.Creates), creates) 256 if err != nil { 257 return Error, fmt.Errorf("rename failed: %s", err) 258 } 259 260 return Downloaded, nil 261 } 262 263 // extracts path into a new temporary directory in the same directory as path, returns 264 // the path to the new extracted temp directory 265 func (w *Watcher) extractAndVerifyToTemp(path string) (string, error) { 266 if path == "" { 267 return "", fmt.Errorf("empty archive path") 268 } 269 270 if !iu.FileExist(path) { 271 return "", fmt.Errorf("archive file %s does not exist", path) 272 } 273 274 parent := filepath.Dir(path) 275 if parent == "" { 276 return "", fmt.Errorf("invalid temp path") 277 } 278 279 td, err := os.MkdirTemp(parent, "choria-archive") 280 if err != nil { 281 return td, err 282 } 283 284 f, err := os.Open(path) 285 if err != nil { 286 return td, err 287 } 288 289 err = w.untar(f, td) 290 if err != nil { 291 return td, fmt.Errorf("untar failed: %s", err) 292 } 293 294 if w.properties.ContentChecksumsChecksum != "" { 295 err = w.verify(filepath.Join(td, w.properties.Creates)) 296 if err != nil { 297 w.Errorf("sha256 verify failed: %v", err) 298 return td, err 299 } 300 } 301 302 return td, nil 303 } 304 305 func (w *Watcher) verify(dir string) error { 306 ccc, err := w.ProcessTemplate(w.properties.ContentChecksumsChecksum) 307 if err != nil { 308 return fmt.Errorf("could not parse template on verify_checksum property") 309 } 310 if ccc == "" { 311 return fmt.Errorf("verify_checksum template resulted in an empty string") 312 } 313 314 sumsFile := filepath.Join(dir, w.properties.ContentChecksums) 315 if !iu.FileIsRegular(sumsFile) { 316 return fmt.Errorf("checksums file %s does not exist in the archive (%s)", w.properties.ContentChecksums, sumsFile) 317 } 318 319 ok, sum, err := iu.FileHasSha256Sum(sumsFile, ccc) 320 if err != nil { 321 return fmt.Errorf("failed to checksum file %s: %s", w.properties.ContentChecksums, err) 322 } 323 if !ok { 324 return fmt.Errorf("checksum file %s has an invalid checksum %q != %q", w.properties.ContentChecksums, sum, ccc) 325 } 326 327 ok, err = iu.Sha256VerifyDir(sumsFile, dir, nil, func(file string, ok bool) { 328 if !ok { 329 w.Warnf("Verification checksum failed for %s", file) 330 } 331 }) 332 if err != nil { 333 return err 334 } 335 336 if !ok { 337 return fmt.Errorf("contents did not pass verification") 338 } 339 340 return nil 341 } 342 343 func (w *Watcher) untar(s io.Reader, t string) error { 344 uncompressed, err := gzip.NewReader(s) 345 if err != nil { 346 return fmt.Errorf("unzip failed: %s", err) 347 } 348 349 tarReader := tar.NewReader(uncompressed) 350 for { 351 header, err := tarReader.Next() 352 if err == io.EOF { 353 break 354 } else if err != nil { 355 return err 356 } 357 358 if header.Typeflag != tar.TypeReg && header.Typeflag != tar.TypeDir { 359 return fmt.Errorf("only regular files and directories are supported") 360 } 361 362 if strings.Contains(header.Name, "..") { 363 return fmt.Errorf("invalid tar file detected") 364 } 365 366 path := filepath.Join(t, header.Name) 367 if !strings.HasPrefix(path, t) { 368 return fmt.Errorf("invalid tar file detected") 369 } 370 371 nfo := header.FileInfo() 372 if nfo.IsDir() { 373 err = os.MkdirAll(path, nfo.Mode()) 374 if err != nil { 375 return err 376 } 377 continue 378 } 379 380 file, err := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, nfo.Mode()) 381 if err != nil { 382 return err 383 } 384 _, err = io.Copy(file, tarReader) 385 file.Close() 386 if err != nil { 387 return err 388 } 389 } 390 391 return nil 392 } 393 394 func (w *Watcher) mkTempDir() (string, error) { 395 // aagent loader will ignore tmp directory 396 parent := filepath.Join(w.properties.TargetDirectory, "tmp") 397 if !iu.FileIsDir(parent) { 398 err := os.MkdirAll(parent, 0700) 399 if err != nil { 400 return "", err 401 } 402 } 403 404 return os.MkdirTemp(parent, "") 405 } 406 407 // creates a temp directory, creates a file in that directory and returns the path to the file 408 // removes the temp directory on any failure, but leaves it on success - the temp file is in there 409 func (w *Watcher) downloadSourceToTemp(ctx context.Context) (string, error) { 410 source, err := w.ProcessTemplate(w.properties.Source) 411 if err != nil { 412 return "", fmt.Errorf("source template processing failed: %s", err) 413 } 414 415 sourceChecksum, err := w.ProcessTemplate(w.properties.ArchiveChecksum) 416 if err != nil { 417 return "", fmt.Errorf("checksum template processing failed: %s", err) 418 } 419 420 if source == "" { 421 return "", fmt.Errorf("source template resulted in an empty string") 422 } 423 if sourceChecksum == "" { 424 return "", fmt.Errorf("checksum template resulted in an empty string") 425 } 426 427 w.previousSource = source 428 429 uri, err := url.Parse(source) 430 if err != nil { 431 return "", fmt.Errorf("invalid url: %s", err) 432 } 433 434 td, err := w.mkTempDir() 435 if err != nil { 436 return "", fmt.Errorf("could not create temp directory: %s", err) 437 } 438 if td == "" { 439 return "", fmt.Errorf("could not create temp directory for unknown reason") 440 } 441 442 tf, err := os.CreateTemp(td, "*-archive.tgz") 443 if err != nil { 444 os.RemoveAll(td) 445 return "", fmt.Errorf("could not create temp file: %s", err) 446 } 447 defer tf.Close() 448 449 w.Infof("Attempting to download %s to %s", uri.String(), tf.Name()) 450 451 err = func() error { 452 client := http.Client{} 453 454 if w.properties.Insecure { 455 client.Transport = &http.Transport{ 456 TLSClientConfig: &tls.Config{ 457 InsecureSkipVerify: true, 458 }, 459 } 460 } 461 462 req, err := http.NewRequestWithContext(ctx, http.MethodGet, uri.String(), nil) 463 if err != nil { 464 return fmt.Errorf("request failed: %s", err) 465 } 466 req.Header.Add("User-Agent", fmt.Sprintf("Choria Archive Watcher %s", build.Version)) 467 468 if w.properties.Username != "" { 469 user, err := w.ProcessTemplate(w.properties.Username) 470 if err != nil { 471 return fmt.Errorf("invalid username template: %v", err) 472 } 473 pass, err := w.ProcessTemplate(w.properties.Password) 474 if err != nil { 475 return fmt.Errorf("invalid password template: %v", err) 476 } 477 478 req.SetBasicAuth(user, pass) 479 } 480 481 resp, err := client.Do(req) 482 if err != nil { 483 return fmt.Errorf("request failed: %s", err) 484 } 485 defer resp.Body.Close() 486 487 _, err = io.Copy(tf, resp.Body) 488 if err != nil { 489 return fmt.Errorf("request failed: %s", err) 490 } 491 492 tf.Close() 493 494 ok, sum, err := iu.FileHasSha256Sum(tf.Name(), sourceChecksum) 495 if err != nil { 496 return fmt.Errorf("archive checksum calculation failed: %s", err) 497 } 498 if !ok { 499 return fmt.Errorf("archive checksum %s != %s missmatch", sum, sourceChecksum) 500 } 501 502 return nil 503 }() 504 if err != nil { 505 os.RemoveAll(td) 506 return "", err 507 } 508 509 if !iu.FileExist(tf.Name()) { 510 return "", fmt.Errorf("downloaded file %s does not exist", tf.Name()) 511 } 512 513 return tf.Name(), nil 514 } 515 516 func (w *Watcher) performWatch(ctx context.Context, force bool) { 517 w.wmu.Lock() 518 defer w.wmu.Unlock() 519 520 if !force && time.Since(w.lastWatch) < w.interval { 521 return 522 } 523 524 err := w.handleCheck(w.watch(ctx)) 525 if err != nil { 526 w.Errorf("could not handle watcher event: %s", err) 527 } 528 } 529 530 func (w *Watcher) intervalWatcher(ctx context.Context, wg *sync.WaitGroup) { 531 defer wg.Done() 532 533 tick := time.NewTicker(w.interval) 534 535 for { 536 select { 537 case <-tick.C: 538 w.performWatch(ctx, false) 539 540 case <-ctx.Done(): 541 tick.Stop() 542 return 543 } 544 } 545 } 546 547 func (w *Watcher) handleCheck(s State, err error) error { 548 w.Debugf("handling state for %s %v", stateNames[s], err) 549 550 w.mu.Lock() 551 w.previous = s 552 w.mu.Unlock() 553 554 switch s { 555 case Error: 556 if err != nil { 557 w.Errorf("Managing archive failed: %s", err) 558 } 559 560 w.NotifyWatcherState(w.CurrentState()) 561 return w.FailureTransition() 562 563 case Downloaded: 564 w.NotifyWatcherState(w.CurrentState()) 565 return w.SuccessTransition() 566 567 case VerifiedOK: 568 w.NotifyWatcherState(w.CurrentState()) 569 return w.SuccessTransition() 570 571 case VerifyFailed: 572 w.NotifyWatcherState(w.CurrentState()) 573 return w.FailureTransition() 574 575 } 576 577 return nil 578 } 579 580 func (w *Watcher) setProperties(props map[string]any) error { 581 if w.properties == nil { 582 w.properties = &Properties{} 583 } 584 585 err := util.ParseMapStructure(props, w.properties) 586 if err != nil { 587 return err 588 } 589 590 return w.validate() 591 } 592 593 func (w *Watcher) validate() error { 594 if w.properties.Source == "" { 595 return fmt.Errorf("source is required") 596 } 597 598 if w.properties.Creates == "" { 599 return fmt.Errorf("creates is required") 600 } 601 602 // TODO need to make sure this is somehow not super dangerous choices like target / creates etc 603 // might make this into a machine downloader not a generic downloader 604 if w.properties.TargetDirectory == "" { 605 return fmt.Errorf("target is required") 606 } 607 608 if w.properties.ArchiveChecksum == "" { 609 return fmt.Errorf("checksum is required") 610 } 611 612 if w.properties.ContentChecksums != "" && w.properties.ContentChecksumsChecksum == "" { 613 return fmt.Errorf("verify_checksum is required if verify is set") 614 } 615 616 if w.properties.Username != "" && w.properties.Password == "" { 617 return fmt.Errorf("password is required when username is given") 618 } 619 620 if w.properties.Governor != "" && w.properties.GovernorTimeout == 0 { 621 w.Infof("Setting Governor timeout to 5 minutes while unset") 622 w.properties.GovernorTimeout = 5 * time.Minute 623 } 624 625 if w.properties.Timeout < 5*time.Second { 626 w.Infof("Setting timeout to minimum 5 seconds") 627 w.properties.Timeout = 5 * time.Second 628 } 629 630 return nil 631 } 632 633 func (w *Watcher) CurrentState() any { 634 w.mu.Lock() 635 defer w.mu.Unlock() 636 637 s := &StateNotification{ 638 Event: event.New(w.name, wtype, version, w.machine), 639 Source: w.properties.Source, 640 Creates: w.properties.Creates, 641 PreviousOutcome: stateNames[w.previous], 642 PreviousRunTime: w.previousRunTime.Nanoseconds(), 643 } 644 645 if w.previousSource != "" { 646 s.Source = w.previousSource 647 } 648 649 return s 650 }