github.com/opcr-io/oras-go/v2@v2.0.0-20231122155130-eb4260d8a0ae/content/file/file.go (about) 1 /* 2 Copyright The ORAS Authors. 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 http://www.apache.org/licenses/LICENSE-2.0 7 Unless required by applicable law or agreed to in writing, software 8 distributed under the License is distributed on an "AS IS" BASIS, 9 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 See the License for the specific language governing permissions and 11 limitations under the License. 12 */ 13 14 // Package file provides implementation of a content store based on file system. 15 package file 16 17 import ( 18 "compress/gzip" 19 "context" 20 "errors" 21 "fmt" 22 "io" 23 "os" 24 "path/filepath" 25 "strings" 26 "sync" 27 "sync/atomic" 28 29 "github.com/opcr-io/oras-go/v2/content" 30 "github.com/opcr-io/oras-go/v2/errdef" 31 "github.com/opcr-io/oras-go/v2/internal/cas" 32 "github.com/opcr-io/oras-go/v2/internal/graph" 33 "github.com/opcr-io/oras-go/v2/internal/ioutil" 34 "github.com/opcr-io/oras-go/v2/internal/resolver" 35 "github.com/opencontainers/go-digest" 36 ocispec "github.com/opencontainers/image-spec/specs-go/v1" 37 ) 38 39 // bufPool is a pool of byte buffers that can be reused for copying content 40 // between files. 41 var bufPool = sync.Pool{ 42 New: func() interface{} { 43 // the buffer size should be larger than or equal to 128 KiB 44 // for performance considerations. 45 // we choose 1 MiB here so there will be less disk I/O. 46 buffer := make([]byte, 1<<20) // buffer size = 1 MiB 47 return &buffer 48 }, 49 } 50 51 const ( 52 // AnnotationDigest is the annotation key for the digest of the uncompressed content. 53 AnnotationDigest = "io.deis.oras.content.digest" 54 // AnnotationUnpack is the annotation key for indication of unpacking. 55 AnnotationUnpack = "io.deis.oras.content.unpack" 56 // defaultBlobMediaType specifies the default blob media type. 57 defaultBlobMediaType = ocispec.MediaTypeImageLayer 58 // defaultBlobDirMediaType specifies the default blob directory media type. 59 defaultBlobDirMediaType = ocispec.MediaTypeImageLayerGzip 60 // defaultFallbackPushSizeLimit specifies the default size limit for pushing no-name contents. 61 defaultFallbackPushSizeLimit = 1 << 22 // 4 MiB 62 ) 63 64 // Store represents a file system based store, which implements `oras.Target`. 65 // 66 // In the file store, the contents described by names are location-addressed 67 // by file paths. Meanwhile, the file paths are mapped to a virtual CAS 68 // where all metadata are stored in the memory. 69 // 70 // The contents that are not described by names are stored in a fallback storage, 71 // which is a limited memory CAS by default. 72 // As all the metadata are stored in the memory, the file store 73 // cannot be restored from the file system. 74 // 75 // After use, the file store needs to be closed by calling the [Store.Close] function. 76 // The file store cannot be used after being closed. 77 type Store struct { 78 // TarReproducible controls if the tarballs generated 79 // for the added directories are reproducible. 80 // When specified, some metadata such as change time 81 // will be removed from the files in the tarballs. Default value: false. 82 TarReproducible bool 83 // AllowPathTraversalOnWrite controls if path traversal is allowed 84 // when writing files. When specified, writing files 85 // outside the working directory will be allowed. Default value: false. 86 AllowPathTraversalOnWrite bool 87 // DisableOverwrite controls if push operations can overwrite existing files. 88 // When specified, saving files to existing paths will be disabled. 89 // Default value: false. 90 DisableOverwrite bool 91 // ForceCAS controls if files with same content but different names are 92 // deduped after push operations. When a DAG is copied between CAS 93 // targets, nodes are deduped by content. By default, file store restores 94 // deduped successor files after a node is copied. This may result in two 95 // files with identical content. If this is not the desired behavior, 96 // ForceCAS can be specified to enforce CAS style dedup. 97 // Default value: false. 98 ForceCAS bool 99 // IgnoreNoName controls if push operations should ignore descriptors 100 // without a name. When specified, corresponding content will be discarded. 101 // Otherwise, content will be saved to a fallback storage. 102 // A typical scenario is pulling an arbitrary artifact masqueraded as OCI 103 // image to file store. This option can be specified to discard unnamed 104 // manifest and config file, while leaving only named layer files. 105 // Default value: false. 106 IgnoreNoName bool 107 108 workingDir string // the working directory of the file store 109 closed int32 // if the store is closed - 0: false, 1: true. 110 digestToPath sync.Map // map[digest.Digest]string 111 nameToStatus sync.Map // map[string]*nameStatus 112 tmpFiles sync.Map // map[string]bool 113 114 fallbackStorage content.Storage 115 resolver content.TagResolver 116 graph *graph.Memory 117 } 118 119 // nameStatus contains a flag indicating if a name exists, 120 // and a RWMutex protecting it. 121 type nameStatus struct { 122 sync.RWMutex 123 exists bool 124 } 125 126 // New creates a file store, using a default limited memory CAS 127 // as the fallback storage for contents without names. 128 // When pushing content without names, the size of content being pushed 129 // cannot exceed the default size limit: 4 MiB. 130 func New(workingDir string) (*Store, error) { 131 return NewWithFallbackLimit(workingDir, defaultFallbackPushSizeLimit) 132 } 133 134 // NewWithFallbackLimit creates a file store, using a default 135 // limited memory CAS as the fallback storage for contents without names. 136 // When pushing content without names, the size of content being pushed 137 // cannot exceed the size limit specified by the `limit` parameter. 138 func NewWithFallbackLimit(workingDir string, limit int64) (*Store, error) { 139 m := cas.NewMemory() 140 ls := content.LimitStorage(m, limit) 141 return NewWithFallbackStorage(workingDir, ls) 142 } 143 144 // NewWithFallbackStorage creates a file store, 145 // using the provided fallback storage for contents without names. 146 func NewWithFallbackStorage(workingDir string, fallbackStorage content.Storage) (*Store, error) { 147 workingDirAbs, err := filepath.Abs(workingDir) 148 if err != nil { 149 return nil, fmt.Errorf("failed to resolve absolute path for %s: %w", workingDir, err) 150 } 151 152 return &Store{ 153 workingDir: workingDirAbs, 154 fallbackStorage: fallbackStorage, 155 resolver: resolver.NewMemory(), 156 graph: graph.NewMemory(), 157 }, nil 158 } 159 160 // Close closes the file store and cleans up all the temporary files used by it. 161 // The store cannot be used after being closed. 162 // This function is not go-routine safe. 163 func (s *Store) Close() error { 164 if s.isClosedSet() { 165 return nil 166 } 167 s.setClosed() 168 169 var errs []string 170 s.tmpFiles.Range(func(name, _ interface{}) bool { 171 if err := os.Remove(name.(string)); err != nil { 172 errs = append(errs, err.Error()) 173 } 174 return true 175 }) 176 177 if len(errs) > 0 { 178 return errors.New(strings.Join(errs, "; ")) 179 } 180 return nil 181 } 182 183 // Fetch fetches the content identified by the descriptor. 184 func (s *Store) Fetch(ctx context.Context, target ocispec.Descriptor) (io.ReadCloser, error) { 185 if s.isClosedSet() { 186 return nil, ErrStoreClosed 187 } 188 189 // if the target has name, check if the name exists. 190 name := target.Annotations[ocispec.AnnotationTitle] 191 if name != "" && !s.nameExists(name) { 192 return nil, fmt.Errorf("%s: %s: %w", name, target.MediaType, errdef.ErrNotFound) 193 } 194 195 // check if the content exists in the store 196 val, exists := s.digestToPath.Load(target.Digest) 197 if exists { 198 path := val.(string) 199 200 fp, err := os.Open(path) 201 if err != nil { 202 if os.IsNotExist(err) { 203 return nil, fmt.Errorf("%s: %s: %w", target.Digest, target.MediaType, errdef.ErrNotFound) 204 } 205 return nil, err 206 } 207 208 return fp, nil 209 } 210 211 // if the content does not exist in the store, 212 // then fall back to the fallback storage. 213 return s.fallbackStorage.Fetch(ctx, target) 214 } 215 216 // Push pushes the content, matching the expected descriptor. 217 // If name is not specified in the descriptor, the content will be pushed to 218 // the fallback storage by default, or will be discarded when 219 // Store.IgnoreNoName is true. 220 func (s *Store) Push(ctx context.Context, expected ocispec.Descriptor, content io.Reader) error { 221 if s.isClosedSet() { 222 return ErrStoreClosed 223 } 224 225 if err := s.push(ctx, expected, content); err != nil { 226 if errors.Is(err, errSkipUnnamed) { 227 return nil 228 } 229 return err 230 } 231 232 if !s.ForceCAS { 233 if err := s.restoreDuplicates(ctx, expected); err != nil { 234 return fmt.Errorf("failed to restore duplicated file: %w", err) 235 } 236 } 237 238 return s.graph.Index(ctx, s, expected) 239 } 240 241 // push pushes the content, matching the expected descriptor. 242 // If name is not specified in the descriptor, the content will be pushed to 243 // the fallback storage by default, or will be discarded when 244 // Store.IgnoreNoName is true. 245 func (s *Store) push(ctx context.Context, expected ocispec.Descriptor, content io.Reader) error { 246 name := expected.Annotations[ocispec.AnnotationTitle] 247 if name == "" { 248 if s.IgnoreNoName { 249 return errSkipUnnamed 250 } 251 return s.fallbackStorage.Push(ctx, expected, content) 252 } 253 254 // check the status of the name 255 status := s.status(name) 256 status.Lock() 257 defer status.Unlock() 258 259 if status.exists { 260 return fmt.Errorf("%s: %w", name, ErrDuplicateName) 261 } 262 263 target, err := s.resolveWritePath(name) 264 if err != nil { 265 return fmt.Errorf("failed to resolve path for writing: %w", err) 266 } 267 268 if needUnpack := expected.Annotations[AnnotationUnpack]; needUnpack == "true" { 269 err = s.pushDir(name, target, expected, content) 270 } else { 271 err = s.pushFile(target, expected, content) 272 } 273 if err != nil { 274 return err 275 } 276 277 // update the name status as existed 278 status.exists = true 279 return nil 280 } 281 282 // restoreDuplicates restores successor files with same content but different names. 283 // See Store.ForceCAS for more info. 284 func (s *Store) restoreDuplicates(ctx context.Context, desc ocispec.Descriptor) error { 285 successors, err := content.Successors(ctx, s, desc) 286 if err != nil { 287 return err 288 } 289 for _, successor := range successors { 290 name := successor.Annotations[ocispec.AnnotationTitle] 291 if name == "" || s.nameExists(name) { 292 continue 293 } 294 if err := func() error { 295 desc := ocispec.Descriptor{ 296 MediaType: successor.MediaType, 297 Digest: successor.Digest, 298 Size: successor.Size, 299 } 300 rc, err := s.Fetch(ctx, desc) 301 if err != nil { 302 return fmt.Errorf("%q: %s: %w", name, desc.MediaType, err) 303 } 304 defer rc.Close() 305 if err := s.push(ctx, successor, rc); err != nil { 306 return fmt.Errorf("%q: %s: %w", name, desc.MediaType, err) 307 } 308 return nil 309 }(); err != nil && !errors.Is(err, errdef.ErrNotFound) { 310 return err 311 } 312 } 313 return nil 314 } 315 316 // Exists returns true if the described content exists. 317 func (s *Store) Exists(ctx context.Context, target ocispec.Descriptor) (bool, error) { 318 if s.isClosedSet() { 319 return false, ErrStoreClosed 320 } 321 322 // if the target has name, check if the name exists. 323 name := target.Annotations[ocispec.AnnotationTitle] 324 if name != "" && !s.nameExists(name) { 325 return false, nil 326 } 327 328 // check if the content exists in the store 329 _, exists := s.digestToPath.Load(target.Digest) 330 if exists { 331 return true, nil 332 } 333 334 // if the content does not exist in the store, 335 // then fall back to the fallback storage. 336 return s.fallbackStorage.Exists(ctx, target) 337 } 338 339 // Resolve resolves a reference to a descriptor. 340 func (s *Store) Resolve(ctx context.Context, ref string) (ocispec.Descriptor, error) { 341 if s.isClosedSet() { 342 return ocispec.Descriptor{}, ErrStoreClosed 343 } 344 345 if ref == "" { 346 return ocispec.Descriptor{}, errdef.ErrMissingReference 347 } 348 349 return s.resolver.Resolve(ctx, ref) 350 } 351 352 // Tag tags a descriptor with a reference string. 353 func (s *Store) Tag(ctx context.Context, desc ocispec.Descriptor, ref string) error { 354 if s.isClosedSet() { 355 return ErrStoreClosed 356 } 357 358 if ref == "" { 359 return errdef.ErrMissingReference 360 } 361 362 exists, err := s.Exists(ctx, desc) 363 if err != nil { 364 return err 365 } 366 if !exists { 367 return fmt.Errorf("%s: %s: %w", desc.Digest, desc.MediaType, errdef.ErrNotFound) 368 } 369 370 return s.resolver.Tag(ctx, desc, ref) 371 } 372 373 // Predecessors returns the nodes directly pointing to the current node. 374 // Predecessors returns nil without error if the node does not exists in the 375 // store. 376 func (s *Store) Predecessors(ctx context.Context, node ocispec.Descriptor) ([]ocispec.Descriptor, error) { 377 if s.isClosedSet() { 378 return nil, ErrStoreClosed 379 } 380 381 return s.graph.Predecessors(ctx, node) 382 } 383 384 // Add adds a file into the file store. 385 func (s *Store) Add(_ context.Context, name, mediaType, path string) (ocispec.Descriptor, error) { 386 if s.isClosedSet() { 387 return ocispec.Descriptor{}, ErrStoreClosed 388 } 389 390 if name == "" { 391 return ocispec.Descriptor{}, ErrMissingName 392 } 393 394 // check the status of the name 395 status := s.status(name) 396 status.Lock() 397 defer status.Unlock() 398 399 if status.exists { 400 return ocispec.Descriptor{}, fmt.Errorf("%s: %w", name, ErrDuplicateName) 401 } 402 403 if path == "" { 404 path = name 405 } 406 path = s.absPath(path) 407 408 fi, err := os.Stat(path) 409 if err != nil { 410 return ocispec.Descriptor{}, fmt.Errorf("failed to stat %s: %w", path, err) 411 } 412 413 // generate descriptor 414 var desc ocispec.Descriptor 415 if fi.IsDir() { 416 desc, err = s.descriptorFromDir(name, mediaType, path) 417 } else { 418 desc, err = s.descriptorFromFile(fi, mediaType, path) 419 } 420 if err != nil { 421 return ocispec.Descriptor{}, fmt.Errorf("failed to generate descriptor from %s: %w", path, err) 422 } 423 424 if desc.Annotations == nil { 425 desc.Annotations = make(map[string]string) 426 } 427 desc.Annotations[ocispec.AnnotationTitle] = name 428 429 // update the name status as existed 430 status.exists = true 431 return desc, nil 432 } 433 434 // saveFile saves content matching the descriptor to the given file. 435 func (s *Store) saveFile(fp *os.File, expected ocispec.Descriptor, content io.Reader) (err error) { 436 defer func() { 437 closeErr := fp.Close() 438 if err == nil { 439 err = closeErr 440 } 441 }() 442 path := fp.Name() 443 444 buf := bufPool.Get().(*[]byte) 445 defer bufPool.Put(buf) 446 if err := ioutil.CopyBuffer(fp, content, *buf, expected); err != nil { 447 return fmt.Errorf("failed to copy content to %s: %w", path, err) 448 } 449 450 s.digestToPath.Store(expected.Digest, path) 451 return nil 452 } 453 454 // pushFile saves content matching the descriptor to the target path. 455 func (s *Store) pushFile(target string, expected ocispec.Descriptor, content io.Reader) error { 456 if err := ensureDir(filepath.Dir(target)); err != nil { 457 return fmt.Errorf("failed to ensure directories of the target path: %w", err) 458 } 459 460 fp, err := os.Create(target) 461 if err != nil { 462 return fmt.Errorf("failed to create file %s: %w", target, err) 463 } 464 465 return s.saveFile(fp, expected, content) 466 } 467 468 // pushDir saves content matching the descriptor to the target directory. 469 func (s *Store) pushDir(name, target string, expected ocispec.Descriptor, content io.Reader) (err error) { 470 if err := ensureDir(target); err != nil { 471 return fmt.Errorf("failed to ensure directories of the target path: %w", err) 472 } 473 474 gz, err := s.tempFile() 475 if err != nil { 476 return err 477 } 478 479 gzPath := gz.Name() 480 // the digest of the gz is verified while saving 481 if err := s.saveFile(gz, expected, content); err != nil { 482 return fmt.Errorf("failed to save gzip to %s: %w", gzPath, err) 483 } 484 485 checksum := expected.Annotations[AnnotationDigest] 486 buf := bufPool.Get().(*[]byte) 487 defer bufPool.Put(buf) 488 if err := extractTarGzip(target, name, gzPath, checksum, *buf); err != nil { 489 return fmt.Errorf("failed to extract tar to %s: %w", target, err) 490 } 491 return nil 492 } 493 494 // descriptorFromDir generates descriptor from the given directory. 495 func (s *Store) descriptorFromDir(name, mediaType, dir string) (desc ocispec.Descriptor, err error) { 496 // make a temp file to store the gzip 497 gz, err := s.tempFile() 498 if err != nil { 499 return ocispec.Descriptor{}, err 500 } 501 defer func() { 502 closeErr := gz.Close() 503 if err == nil { 504 err = closeErr 505 } 506 }() 507 508 // compress the directory 509 gzDigester := digest.Canonical.Digester() 510 gzw := gzip.NewWriter(io.MultiWriter(gz, gzDigester.Hash())) 511 defer func() { 512 closeErr := gzw.Close() 513 if err == nil { 514 err = closeErr 515 } 516 }() 517 518 tarDigester := digest.Canonical.Digester() 519 tw := io.MultiWriter(gzw, tarDigester.Hash()) 520 buf := bufPool.Get().(*[]byte) 521 defer bufPool.Put(buf) 522 if err := tarDirectory(dir, name, tw, s.TarReproducible, *buf); err != nil { 523 return ocispec.Descriptor{}, fmt.Errorf("failed to tar %s: %w", dir, err) 524 } 525 526 // flush all 527 if err := gzw.Close(); err != nil { 528 return ocispec.Descriptor{}, err 529 } 530 if err := gz.Sync(); err != nil { 531 return ocispec.Descriptor{}, err 532 } 533 534 fi, err := gz.Stat() 535 if err != nil { 536 return ocispec.Descriptor{}, err 537 } 538 539 // map gzip digest to gzip path 540 gzDigest := gzDigester.Digest() 541 s.digestToPath.Store(gzDigest, gz.Name()) 542 543 // generate descriptor 544 if mediaType == "" { 545 mediaType = defaultBlobDirMediaType 546 } 547 548 return ocispec.Descriptor{ 549 MediaType: mediaType, 550 Digest: gzDigest, // digest for the compressed content 551 Size: fi.Size(), 552 Annotations: map[string]string{ 553 AnnotationDigest: tarDigester.Digest().String(), // digest fot the uncompressed content 554 AnnotationUnpack: "true", // the content needs to be unpacked 555 }, 556 }, nil 557 } 558 559 // descriptorFromFile generates descriptor from the given file. 560 func (s *Store) descriptorFromFile(fi os.FileInfo, mediaType, path string) (desc ocispec.Descriptor, err error) { 561 fp, err := os.Open(path) 562 if err != nil { 563 return ocispec.Descriptor{}, err 564 } 565 defer func() { 566 closeErr := fp.Close() 567 if err == nil { 568 err = closeErr 569 } 570 }() 571 572 dgst, err := digest.FromReader(fp) 573 if err != nil { 574 return ocispec.Descriptor{}, err 575 } 576 // map digest to file path 577 s.digestToPath.Store(dgst, path) 578 579 // generate descriptor 580 if mediaType == "" { 581 mediaType = defaultBlobMediaType 582 } 583 584 return ocispec.Descriptor{ 585 MediaType: mediaType, 586 Digest: dgst, 587 Size: fi.Size(), 588 }, nil 589 } 590 591 // resolveWritePath resolves the path to write for the given name. 592 func (s *Store) resolveWritePath(name string) (string, error) { 593 path := s.absPath(name) 594 if !s.AllowPathTraversalOnWrite { 595 base, err := filepath.Abs(s.workingDir) 596 if err != nil { 597 return "", err 598 } 599 target, err := filepath.Abs(path) 600 if err != nil { 601 return "", err 602 } 603 rel, err := filepath.Rel(base, target) 604 if err != nil { 605 return "", ErrPathTraversalDisallowed 606 } 607 rel = filepath.ToSlash(rel) 608 if strings.HasPrefix(rel, "../") || rel == ".." { 609 return "", ErrPathTraversalDisallowed 610 } 611 } 612 if s.DisableOverwrite { 613 if _, err := os.Stat(path); err == nil { 614 return "", ErrOverwriteDisallowed 615 } else if !os.IsNotExist(err) { 616 return "", err 617 } 618 } 619 return path, nil 620 } 621 622 // status returns the nameStatus for the given name. 623 func (s *Store) status(name string) *nameStatus { 624 v, _ := s.nameToStatus.LoadOrStore(name, &nameStatus{sync.RWMutex{}, false}) 625 status := v.(*nameStatus) 626 return status 627 } 628 629 // nameExists returns if the given name exists in the file store. 630 func (s *Store) nameExists(name string) bool { 631 status := s.status(name) 632 status.RLock() 633 defer status.RUnlock() 634 635 return status.exists 636 } 637 638 // tempFile creates a temp file with the file name format "oras_file_randomString", 639 // and returns the pointer to the temp file. 640 func (s *Store) tempFile() (*os.File, error) { 641 tmp, err := os.CreateTemp("", "oras_file_*") 642 if err != nil { 643 return nil, err 644 } 645 646 s.tmpFiles.Store(tmp.Name(), true) 647 return tmp, nil 648 } 649 650 // absPath returns the absolute path of the path. 651 func (s *Store) absPath(path string) string { 652 if filepath.IsAbs(path) { 653 return path 654 } 655 return filepath.Join(s.workingDir, path) 656 } 657 658 // isClosedSet returns true if the `closed` flag is set, otherwise returns false. 659 func (s *Store) isClosedSet() bool { 660 return atomic.LoadInt32(&s.closed) == 1 661 } 662 663 // setClosed sets the `closed` flag. 664 func (s *Store) setClosed() { 665 atomic.StoreInt32(&s.closed, 1) 666 } 667 668 // ensureDir ensures the directories of the path exists. 669 func ensureDir(path string) error { 670 return os.MkdirAll(path, 0777) 671 }