github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/embeddedfs/archive/archive.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package archive provides an extractor for extracting software inventories from archives 16 package archive 17 18 import ( 19 "compress/gzip" 20 "context" 21 "errors" 22 "fmt" 23 "strings" 24 "sync" 25 26 cpb "github.com/google/osv-scalibr/binary/proto/config_go_proto" 27 "github.com/google/osv-scalibr/extractor/filesystem" 28 "github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/common" 29 scalibrfs "github.com/google/osv-scalibr/fs" 30 "github.com/google/osv-scalibr/inventory" 31 "github.com/google/osv-scalibr/plugin" 32 ) 33 34 const ( 35 // Name is the unique identifier for the archive extractor. 36 Name = "embeddedfs/archive" 37 ) 38 39 // Extractor implements the filesystem.Extractor interface for archive extraction. 40 type Extractor struct { 41 // maxFileSizeBytes is the maximum size of an archive file that can be traversed. 42 // If this limit is greater than zero and a file is encountered that is larger 43 // than this limit, the file is ignored. 44 maxFileSizeBytes int64 45 } 46 47 // New returns a new archive extractor. 48 func New(cfg *cpb.PluginConfig) filesystem.Extractor { 49 maxSize := cfg.MaxFileSizeBytes 50 specific := plugin.FindConfig(cfg, func(c *cpb.PluginSpecificConfig) *cpb.ArchiveConfig { return c.GetArchive() }) 51 if specific.GetMaxFileSizeBytes() > 0 { 52 maxSize = specific.GetMaxFileSizeBytes() 53 } 54 return &Extractor{maxFileSizeBytes: maxSize} 55 } 56 57 // Name returns the name of the extractor. 58 func (e *Extractor) Name() string { 59 return Name 60 } 61 62 // Version returns the version of the extractor. 63 func (e *Extractor) Version() int { 64 return 0 65 } 66 67 // Requirements returns the requirements for the extractor. 68 func (e *Extractor) Requirements() *plugin.Capabilities { 69 return &plugin.Capabilities{} 70 } 71 72 // FileRequired checks if the file is a supported archive. 73 func (e *Extractor) FileRequired(api filesystem.FileAPI) bool { 74 path := api.Path() 75 if !strings.HasSuffix(path, ".tar") && !strings.HasSuffix(path, ".tar.gz") { 76 return false 77 } 78 79 fileinfo, err := api.Stat() 80 if err != nil { 81 return false 82 } 83 84 if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes { 85 return false 86 } 87 88 return true 89 } 90 91 // Extract returns an Inventory with embedded filesystems for the given archive file. 92 func (e *Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 93 if input.Reader == nil { 94 return inventory.Inventory{}, errors.New("input.Reader is nil") 95 } 96 97 var tempDir string 98 var err error 99 if strings.HasSuffix(input.Path, ".tar") { 100 tempDir, err = common.TARToTempDir(input.Reader) 101 if err != nil { 102 return inventory.Inventory{}, fmt.Errorf("common.TARToTempDir(%q): %w", input.Path, err) 103 } 104 } else if strings.HasSuffix(input.Path, ".tar.gz") { 105 reader, err := gzip.NewReader(input.Reader) 106 if err != nil { 107 return inventory.Inventory{}, fmt.Errorf("gzip.NewReader(%q): %w", input.Path, err) 108 } 109 tempDir, err = common.TARToTempDir(reader) 110 if err != nil { 111 return inventory.Inventory{}, fmt.Errorf("common.TARToTempDir(%q): %w", input.Path, err) 112 } 113 } else { 114 return inventory.Inventory{}, fmt.Errorf("%q not a supported archive format", input.Path) 115 } 116 117 var refCount int32 = 1 118 var refMu sync.Mutex 119 getEmbeddedFS := func(ctx context.Context) (scalibrfs.FS, error) { 120 return &common.EmbeddedDirFS{ 121 FS: scalibrfs.DirFS(tempDir), 122 File: nil, 123 TmpPaths: []string{tempDir}, 124 RefCount: &refCount, 125 RefMu: &refMu, 126 }, nil 127 } 128 return inventory.Inventory{ 129 EmbeddedFSs: []*inventory.EmbeddedFS{ 130 { 131 Path: input.Path, 132 GetEmbeddedFS: getEmbeddedFS, 133 }}, 134 }, nil 135 }