oras.land/oras-go/v2@v2.5.1-0.20240520045656-aef90e4d04c4/content/file/file.go (about) 1 /* 2 Copyright The ORAS Authors. 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 http://www.apache.org/licenses/LICENSE-2.0 7 Unless required by applicable law or agreed to in writing, software 8 distributed under the License is distributed on an "AS IS" BASIS, 9 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 See the License for the specific language governing permissions and 11 limitations under the License. 12 */ 13 14 // Package file provides implementation of a content store based on file system. 15 package file 16 17 import ( 18 "compress/gzip" 19 "context" 20 "errors" 21 "fmt" 22 "io" 23 "os" 24 "path/filepath" 25 "strings" 26 "sync" 27 "sync/atomic" 28 29 "github.com/opencontainers/go-digest" 30 ocispec "github.com/opencontainers/image-spec/specs-go/v1" 31 "oras.land/oras-go/v2/content" 32 "oras.land/oras-go/v2/errdef" 33 "oras.land/oras-go/v2/internal/cas" 34 "oras.land/oras-go/v2/internal/graph" 35 "oras.land/oras-go/v2/internal/ioutil" 36 "oras.land/oras-go/v2/internal/resolver" 37 ) 38 39 // bufPool is a pool of byte buffers that can be reused for copying content 40 // between files. 41 var bufPool = sync.Pool{ 42 New: func() interface{} { 43 // the buffer size should be larger than or equal to 128 KiB 44 // for performance considerations. 45 // we choose 1 MiB here so there will be less disk I/O. 46 buffer := make([]byte, 1<<20) // buffer size = 1 MiB 47 return &buffer 48 }, 49 } 50 51 const ( 52 // AnnotationDigest is the annotation key for the digest of the uncompressed content. 53 AnnotationDigest = "io.deis.oras.content.digest" 54 // AnnotationUnpack is the annotation key for indication of unpacking. 55 AnnotationUnpack = "io.deis.oras.content.unpack" 56 // defaultBlobMediaType specifies the default blob media type. 57 defaultBlobMediaType = ocispec.MediaTypeImageLayer 58 // defaultBlobDirMediaType specifies the default blob directory media type. 59 defaultBlobDirMediaType = ocispec.MediaTypeImageLayerGzip 60 // defaultFallbackPushSizeLimit specifies the default size limit for pushing no-name contents. 61 defaultFallbackPushSizeLimit = 1 << 22 // 4 MiB 62 ) 63 64 // Store represents a file system based store, which implements `oras.Target`. 65 // 66 // In the file store, the contents described by names are location-addressed 67 // by file paths. Meanwhile, the file paths are mapped to a virtual CAS 68 // where all metadata are stored in the memory. 69 // 70 // The contents that are not described by names are stored in a fallback storage, 71 // which is a limited memory CAS by default. 72 // As all the metadata are stored in the memory, the file store 73 // cannot be restored from the file system. 74 // 75 // After use, the file store needs to be closed by calling the [Store.Close] function. 76 // The file store cannot be used after being closed. 77 type Store struct { 78 // TarReproducible controls if the tarballs generated 79 // for the added directories are reproducible. 80 // When specified, some metadata such as change time 81 // will be removed from the files in the tarballs. Default value: false. 82 TarReproducible bool 83 // AllowPathTraversalOnWrite controls if path traversal is allowed 84 // when writing files. When specified, writing files 85 // outside the working directory will be allowed. Default value: false. 86 AllowPathTraversalOnWrite bool 87 // DisableOverwrite controls if push operations can overwrite existing files. 88 // When specified, saving files to existing paths will be disabled. 89 // Default value: false. 90 DisableOverwrite bool 91 // ForceCAS controls if files with same content but different names are 92 // deduped after push operations. When a DAG is copied between CAS 93 // targets, nodes are deduped by content. By default, file store restores 94 // deduped successor files after a node is copied. This may result in two 95 // files with identical content. If this is not the desired behavior, 96 // ForceCAS can be specified to enforce CAS style dedup. 97 // Default value: false. 98 ForceCAS bool 99 // IgnoreNoName controls if push operations should ignore descriptors 100 // without a name. When specified, corresponding content will be discarded. 101 // Otherwise, content will be saved to a fallback storage. 102 // A typical scenario is pulling an arbitrary artifact masqueraded as OCI 103 // image to file store. This option can be specified to discard unnamed 104 // manifest and config file, while leaving only named layer files. 105 // Default value: false. 106 IgnoreNoName bool 107 // SkipUnpack controls if push operations should skip unpacking files. This 108 // value overrides the [AnnotationUnpack]. 109 // Default value: false. 110 SkipUnpack bool 111 112 workingDir string // the working directory of the file store 113 closed int32 // if the store is closed - 0: false, 1: true. 114 digestToPath sync.Map // map[digest.Digest]string 115 nameToStatus sync.Map // map[string]*nameStatus 116 tmpFiles sync.Map // map[string]bool 117 118 fallbackStorage content.Storage 119 resolver content.TagResolver 120 graph *graph.Memory 121 } 122 123 // nameStatus contains a flag indicating if a name exists, 124 // and a RWMutex protecting it. 125 type nameStatus struct { 126 sync.RWMutex 127 exists bool 128 } 129 130 // New creates a file store, using a default limited memory CAS 131 // as the fallback storage for contents without names. 132 // When pushing content without names, the size of content being pushed 133 // cannot exceed the default size limit: 4 MiB. 134 func New(workingDir string) (*Store, error) { 135 return NewWithFallbackLimit(workingDir, defaultFallbackPushSizeLimit) 136 } 137 138 // NewWithFallbackLimit creates a file store, using a default 139 // limited memory CAS as the fallback storage for contents without names. 140 // When pushing content without names, the size of content being pushed 141 // cannot exceed the size limit specified by the `limit` parameter. 142 func NewWithFallbackLimit(workingDir string, limit int64) (*Store, error) { 143 m := cas.NewMemory() 144 ls := content.LimitStorage(m, limit) 145 return NewWithFallbackStorage(workingDir, ls) 146 } 147 148 // NewWithFallbackStorage creates a file store, 149 // using the provided fallback storage for contents without names. 150 func NewWithFallbackStorage(workingDir string, fallbackStorage content.Storage) (*Store, error) { 151 workingDirAbs, err := filepath.Abs(workingDir) 152 if err != nil { 153 return nil, fmt.Errorf("failed to resolve absolute path for %s: %w", workingDir, err) 154 } 155 156 return &Store{ 157 workingDir: workingDirAbs, 158 fallbackStorage: fallbackStorage, 159 resolver: resolver.NewMemory(), 160 graph: graph.NewMemory(), 161 }, nil 162 } 163 164 // Close closes the file store and cleans up all the temporary files used by it. 165 // The store cannot be used after being closed. 166 // This function is not go-routine safe. 167 func (s *Store) Close() error { 168 if s.isClosedSet() { 169 return nil 170 } 171 s.setClosed() 172 173 var errs []string 174 s.tmpFiles.Range(func(name, _ interface{}) bool { 175 if err := os.Remove(name.(string)); err != nil { 176 errs = append(errs, err.Error()) 177 } 178 return true 179 }) 180 181 if len(errs) > 0 { 182 return errors.New(strings.Join(errs, "; ")) 183 } 184 return nil 185 } 186 187 // Fetch fetches the content identified by the descriptor. 188 func (s *Store) Fetch(ctx context.Context, target ocispec.Descriptor) (io.ReadCloser, error) { 189 if s.isClosedSet() { 190 return nil, ErrStoreClosed 191 } 192 193 // if the target has name, check if the name exists. 194 name := target.Annotations[ocispec.AnnotationTitle] 195 if name != "" && !s.nameExists(name) { 196 return nil, fmt.Errorf("%s: %s: %w", name, target.MediaType, errdef.ErrNotFound) 197 } 198 199 // check if the content exists in the store 200 val, exists := s.digestToPath.Load(target.Digest) 201 if exists { 202 path := val.(string) 203 204 fp, err := os.Open(path) 205 if err != nil { 206 if os.IsNotExist(err) { 207 return nil, fmt.Errorf("%s: %s: %w", target.Digest, target.MediaType, errdef.ErrNotFound) 208 } 209 return nil, err 210 } 211 212 return fp, nil 213 } 214 215 // if the content does not exist in the store, 216 // then fall back to the fallback storage. 217 return s.fallbackStorage.Fetch(ctx, target) 218 } 219 220 // Push pushes the content, matching the expected descriptor. 221 // If name is not specified in the descriptor, the content will be pushed to 222 // the fallback storage by default, or will be discarded when 223 // Store.IgnoreNoName is true. 224 func (s *Store) Push(ctx context.Context, expected ocispec.Descriptor, content io.Reader) error { 225 if s.isClosedSet() { 226 return ErrStoreClosed 227 } 228 229 if err := s.push(ctx, expected, content); err != nil { 230 if errors.Is(err, errSkipUnnamed) { 231 return nil 232 } 233 return err 234 } 235 236 if !s.ForceCAS { 237 if err := s.restoreDuplicates(ctx, expected); err != nil { 238 return fmt.Errorf("failed to restore duplicated file: %w", err) 239 } 240 } 241 242 return s.graph.Index(ctx, s, expected) 243 } 244 245 // push pushes the content, matching the expected descriptor. 246 // If name is not specified in the descriptor, the content will be pushed to 247 // the fallback storage by default, or will be discarded when 248 // Store.IgnoreNoName is true. 249 func (s *Store) push(ctx context.Context, expected ocispec.Descriptor, content io.Reader) error { 250 name := expected.Annotations[ocispec.AnnotationTitle] 251 if name == "" { 252 if s.IgnoreNoName { 253 return errSkipUnnamed 254 } 255 return s.fallbackStorage.Push(ctx, expected, content) 256 } 257 258 // check the status of the name 259 status := s.status(name) 260 status.Lock() 261 defer status.Unlock() 262 263 if status.exists { 264 return fmt.Errorf("%s: %w", name, ErrDuplicateName) 265 } 266 267 target, err := s.resolveWritePath(name) 268 if err != nil { 269 return fmt.Errorf("failed to resolve path for writing: %w", err) 270 } 271 272 if needUnpack := expected.Annotations[AnnotationUnpack]; needUnpack == "true" && !s.SkipUnpack { 273 err = s.pushDir(name, target, expected, content) 274 } else { 275 err = s.pushFile(target, expected, content) 276 } 277 if err != nil { 278 return err 279 } 280 281 // update the name status as existed 282 status.exists = true 283 return nil 284 } 285 286 // restoreDuplicates restores successor files with same content but different 287 // names. 288 // See Store.ForceCAS for more info. 289 func (s *Store) restoreDuplicates(ctx context.Context, desc ocispec.Descriptor) error { 290 successors, err := content.Successors(ctx, s, desc) 291 if err != nil { 292 return err 293 } 294 for _, successor := range successors { 295 name := successor.Annotations[ocispec.AnnotationTitle] 296 if name == "" || s.nameExists(name) { 297 continue 298 } 299 if err := func() error { 300 desc := ocispec.Descriptor{ 301 MediaType: successor.MediaType, 302 Digest: successor.Digest, 303 Size: successor.Size, 304 } 305 rc, err := s.Fetch(ctx, desc) 306 if err != nil { 307 return fmt.Errorf("%q: %s: %w", name, desc.MediaType, err) 308 } 309 defer rc.Close() 310 if err := s.push(ctx, successor, rc); err != nil { 311 return fmt.Errorf("%q: %s: %w", name, desc.MediaType, err) 312 } 313 return nil 314 }(); err != nil { 315 switch { 316 case errors.Is(err, errdef.ErrNotFound): 317 // allow pushing manifests before blobs 318 case errors.Is(err, ErrDuplicateName): 319 // in case multiple goroutines are pushing or restoring the same 320 // named content, the error is ignored 321 default: 322 return err 323 } 324 } 325 } 326 return nil 327 } 328 329 // Exists returns true if the described content exists. 330 func (s *Store) Exists(ctx context.Context, target ocispec.Descriptor) (bool, error) { 331 if s.isClosedSet() { 332 return false, ErrStoreClosed 333 } 334 335 // if the target has name, check if the name exists. 336 name := target.Annotations[ocispec.AnnotationTitle] 337 if name != "" && !s.nameExists(name) { 338 return false, nil 339 } 340 341 // check if the content exists in the store 342 _, exists := s.digestToPath.Load(target.Digest) 343 if exists { 344 return true, nil 345 } 346 347 // if the content does not exist in the store, 348 // then fall back to the fallback storage. 349 return s.fallbackStorage.Exists(ctx, target) 350 } 351 352 // Resolve resolves a reference to a descriptor. 353 func (s *Store) Resolve(ctx context.Context, ref string) (ocispec.Descriptor, error) { 354 if s.isClosedSet() { 355 return ocispec.Descriptor{}, ErrStoreClosed 356 } 357 358 if ref == "" { 359 return ocispec.Descriptor{}, errdef.ErrMissingReference 360 } 361 362 return s.resolver.Resolve(ctx, ref) 363 } 364 365 // Tag tags a descriptor with a reference string. 366 func (s *Store) Tag(ctx context.Context, desc ocispec.Descriptor, ref string) error { 367 if s.isClosedSet() { 368 return ErrStoreClosed 369 } 370 371 if ref == "" { 372 return errdef.ErrMissingReference 373 } 374 375 exists, err := s.Exists(ctx, desc) 376 if err != nil { 377 return err 378 } 379 if !exists { 380 return fmt.Errorf("%s: %s: %w", desc.Digest, desc.MediaType, errdef.ErrNotFound) 381 } 382 383 return s.resolver.Tag(ctx, desc, ref) 384 } 385 386 // Predecessors returns the nodes directly pointing to the current node. 387 // Predecessors returns nil without error if the node does not exists in the 388 // store. 389 func (s *Store) Predecessors(ctx context.Context, node ocispec.Descriptor) ([]ocispec.Descriptor, error) { 390 if s.isClosedSet() { 391 return nil, ErrStoreClosed 392 } 393 394 return s.graph.Predecessors(ctx, node) 395 } 396 397 // Add adds a file into the file store. 398 func (s *Store) Add(_ context.Context, name, mediaType, path string) (ocispec.Descriptor, error) { 399 if s.isClosedSet() { 400 return ocispec.Descriptor{}, ErrStoreClosed 401 } 402 403 if name == "" { 404 return ocispec.Descriptor{}, ErrMissingName 405 } 406 407 // check the status of the name 408 status := s.status(name) 409 status.Lock() 410 defer status.Unlock() 411 412 if status.exists { 413 return ocispec.Descriptor{}, fmt.Errorf("%s: %w", name, ErrDuplicateName) 414 } 415 416 if path == "" { 417 path = name 418 } 419 path = s.absPath(path) 420 421 fi, err := os.Stat(path) 422 if err != nil { 423 return ocispec.Descriptor{}, fmt.Errorf("failed to stat %s: %w", path, err) 424 } 425 426 // generate descriptor 427 var desc ocispec.Descriptor 428 if fi.IsDir() { 429 desc, err = s.descriptorFromDir(name, mediaType, path) 430 } else { 431 desc, err = s.descriptorFromFile(fi, mediaType, path) 432 } 433 if err != nil { 434 return ocispec.Descriptor{}, fmt.Errorf("failed to generate descriptor from %s: %w", path, err) 435 } 436 437 if desc.Annotations == nil { 438 desc.Annotations = make(map[string]string) 439 } 440 desc.Annotations[ocispec.AnnotationTitle] = name 441 442 // update the name status as existed 443 status.exists = true 444 return desc, nil 445 } 446 447 // saveFile saves content matching the descriptor to the given file. 448 func (s *Store) saveFile(fp *os.File, expected ocispec.Descriptor, content io.Reader) (err error) { 449 defer func() { 450 closeErr := fp.Close() 451 if err == nil { 452 err = closeErr 453 } 454 }() 455 path := fp.Name() 456 457 buf := bufPool.Get().(*[]byte) 458 defer bufPool.Put(buf) 459 if err := ioutil.CopyBuffer(fp, content, *buf, expected); err != nil { 460 return fmt.Errorf("failed to copy content to %s: %w", path, err) 461 } 462 463 s.digestToPath.Store(expected.Digest, path) 464 return nil 465 } 466 467 // pushFile saves content matching the descriptor to the target path. 468 func (s *Store) pushFile(target string, expected ocispec.Descriptor, content io.Reader) error { 469 if err := ensureDir(filepath.Dir(target)); err != nil { 470 return fmt.Errorf("failed to ensure directories of the target path: %w", err) 471 } 472 473 fp, err := os.Create(target) 474 if err != nil { 475 return fmt.Errorf("failed to create file %s: %w", target, err) 476 } 477 478 return s.saveFile(fp, expected, content) 479 } 480 481 // pushDir saves content matching the descriptor to the target directory. 482 func (s *Store) pushDir(name, target string, expected ocispec.Descriptor, content io.Reader) (err error) { 483 if err := ensureDir(target); err != nil { 484 return fmt.Errorf("failed to ensure directories of the target path: %w", err) 485 } 486 487 gz, err := s.tempFile() 488 if err != nil { 489 return err 490 } 491 492 gzPath := gz.Name() 493 // the digest of the gz is verified while saving 494 if err := s.saveFile(gz, expected, content); err != nil { 495 return fmt.Errorf("failed to save gzip to %s: %w", gzPath, err) 496 } 497 498 checksum := expected.Annotations[AnnotationDigest] 499 buf := bufPool.Get().(*[]byte) 500 defer bufPool.Put(buf) 501 if err := extractTarGzip(target, name, gzPath, checksum, *buf); err != nil { 502 return fmt.Errorf("failed to extract tar to %s: %w", target, err) 503 } 504 return nil 505 } 506 507 // descriptorFromDir generates descriptor from the given directory. 508 func (s *Store) descriptorFromDir(name, mediaType, dir string) (desc ocispec.Descriptor, err error) { 509 // make a temp file to store the gzip 510 gz, err := s.tempFile() 511 if err != nil { 512 return ocispec.Descriptor{}, err 513 } 514 defer func() { 515 closeErr := gz.Close() 516 if err == nil { 517 err = closeErr 518 } 519 }() 520 521 // compress the directory 522 gzDigester := digest.Canonical.Digester() 523 gzw := gzip.NewWriter(io.MultiWriter(gz, gzDigester.Hash())) 524 defer func() { 525 closeErr := gzw.Close() 526 if err == nil { 527 err = closeErr 528 } 529 }() 530 531 tarDigester := digest.Canonical.Digester() 532 tw := io.MultiWriter(gzw, tarDigester.Hash()) 533 buf := bufPool.Get().(*[]byte) 534 defer bufPool.Put(buf) 535 if err := tarDirectory(dir, name, tw, s.TarReproducible, *buf); err != nil { 536 return ocispec.Descriptor{}, fmt.Errorf("failed to tar %s: %w", dir, err) 537 } 538 539 // flush all 540 if err := gzw.Close(); err != nil { 541 return ocispec.Descriptor{}, err 542 } 543 if err := gz.Sync(); err != nil { 544 return ocispec.Descriptor{}, err 545 } 546 547 fi, err := gz.Stat() 548 if err != nil { 549 return ocispec.Descriptor{}, err 550 } 551 552 // map gzip digest to gzip path 553 gzDigest := gzDigester.Digest() 554 s.digestToPath.Store(gzDigest, gz.Name()) 555 556 // generate descriptor 557 if mediaType == "" { 558 mediaType = defaultBlobDirMediaType 559 } 560 561 return ocispec.Descriptor{ 562 MediaType: mediaType, 563 Digest: gzDigest, // digest for the compressed content 564 Size: fi.Size(), 565 Annotations: map[string]string{ 566 AnnotationDigest: tarDigester.Digest().String(), // digest fot the uncompressed content 567 AnnotationUnpack: "true", // the content needs to be unpacked 568 }, 569 }, nil 570 } 571 572 // descriptorFromFile generates descriptor from the given file. 573 func (s *Store) descriptorFromFile(fi os.FileInfo, mediaType, path string) (desc ocispec.Descriptor, err error) { 574 fp, err := os.Open(path) 575 if err != nil { 576 return ocispec.Descriptor{}, err 577 } 578 defer func() { 579 closeErr := fp.Close() 580 if err == nil { 581 err = closeErr 582 } 583 }() 584 585 dgst, err := digest.FromReader(fp) 586 if err != nil { 587 return ocispec.Descriptor{}, err 588 } 589 // map digest to file path 590 s.digestToPath.Store(dgst, path) 591 592 // generate descriptor 593 if mediaType == "" { 594 mediaType = defaultBlobMediaType 595 } 596 597 return ocispec.Descriptor{ 598 MediaType: mediaType, 599 Digest: dgst, 600 Size: fi.Size(), 601 }, nil 602 } 603 604 // resolveWritePath resolves the path to write for the given name. 605 func (s *Store) resolveWritePath(name string) (string, error) { 606 path := s.absPath(name) 607 if !s.AllowPathTraversalOnWrite { 608 base, err := filepath.Abs(s.workingDir) 609 if err != nil { 610 return "", err 611 } 612 target, err := filepath.Abs(path) 613 if err != nil { 614 return "", err 615 } 616 rel, err := filepath.Rel(base, target) 617 if err != nil { 618 return "", ErrPathTraversalDisallowed 619 } 620 rel = filepath.ToSlash(rel) 621 if strings.HasPrefix(rel, "../") || rel == ".." { 622 return "", ErrPathTraversalDisallowed 623 } 624 } 625 if s.DisableOverwrite { 626 if _, err := os.Stat(path); err == nil { 627 return "", ErrOverwriteDisallowed 628 } else if !os.IsNotExist(err) { 629 return "", err 630 } 631 } 632 return path, nil 633 } 634 635 // status returns the nameStatus for the given name. 636 func (s *Store) status(name string) *nameStatus { 637 v, _ := s.nameToStatus.LoadOrStore(name, &nameStatus{sync.RWMutex{}, false}) 638 status := v.(*nameStatus) 639 return status 640 } 641 642 // nameExists returns if the given name exists in the file store. 643 func (s *Store) nameExists(name string) bool { 644 status := s.status(name) 645 status.RLock() 646 defer status.RUnlock() 647 648 return status.exists 649 } 650 651 // tempFile creates a temp file with the file name format "oras_file_randomString", 652 // and returns the pointer to the temp file. 653 func (s *Store) tempFile() (*os.File, error) { 654 tmp, err := os.CreateTemp("", "oras_file_*") 655 if err != nil { 656 return nil, err 657 } 658 659 s.tmpFiles.Store(tmp.Name(), true) 660 return tmp, nil 661 } 662 663 // absPath returns the absolute path of the path. 664 func (s *Store) absPath(path string) string { 665 if filepath.IsAbs(path) { 666 return path 667 } 668 return filepath.Join(s.workingDir, path) 669 } 670 671 // isClosedSet returns true if the `closed` flag is set, otherwise returns false. 672 func (s *Store) isClosedSet() bool { 673 return atomic.LoadInt32(&s.closed) == 1 674 } 675 676 // setClosed sets the `closed` flag. 677 func (s *Store) setClosed() { 678 atomic.StoreInt32(&s.closed, 1) 679 } 680 681 // ensureDir ensures the directories of the path exists. 682 func ensureDir(path string) error { 683 return os.MkdirAll(path, 0777) 684 }