github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/python/condameta/condameta.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package condameta extracts Conda package metadata from conda-meta JSON files. 16 package condameta 17 18 import ( 19 "context" 20 "encoding/json" 21 "errors" 22 "fmt" 23 "io" 24 "path/filepath" 25 "strings" 26 27 "github.com/google/osv-scalibr/extractor" 28 "github.com/google/osv-scalibr/extractor/filesystem" 29 "github.com/google/osv-scalibr/extractor/filesystem/internal/units" 30 "github.com/google/osv-scalibr/inventory" 31 "github.com/google/osv-scalibr/plugin" 32 "github.com/google/osv-scalibr/purl" 33 "github.com/google/osv-scalibr/stats" 34 ) 35 36 const ( 37 // Name is the unique name of this extractor. 38 Name = "python/condameta" 39 ) 40 41 // Config is the configuration for the Extractor. 42 type Config struct { 43 Stats stats.Collector 44 MaxFileSizeBytes int64 45 } 46 47 // DefaultConfig returns the default configuration for the extractor. 48 func DefaultConfig() Config { 49 return Config{ 50 Stats: nil, 51 MaxFileSizeBytes: 10 * units.MiB, 52 } 53 } 54 55 // Config returns the configuration of the extractor. 56 func (e Extractor) Config() Config { 57 return Config{ 58 Stats: e.stats, 59 MaxFileSizeBytes: e.maxFileSizeBytes, 60 } 61 } 62 63 // Extractor extracts packages from Conda package metadata. 64 type Extractor struct { 65 stats stats.Collector 66 maxFileSizeBytes int64 67 } 68 69 // New returns a Conda package metadata extractor. 70 func New(cfg Config) *Extractor { 71 return &Extractor{ 72 stats: cfg.Stats, 73 maxFileSizeBytes: cfg.MaxFileSizeBytes, 74 } 75 } 76 77 // NewDefault returns an extractor with the default config settings. 78 func NewDefault() filesystem.Extractor { return New(DefaultConfig()) } 79 80 // Name of the extractor. 81 func (e Extractor) Name() string { return Name } 82 83 // Version of the extractor. 84 func (e Extractor) Version() int { return 0 } 85 86 // Requirements of the extractor. 87 func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } 88 89 // FileRequired checks if the file is a valid Conda metadata JSON file. 90 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 91 path := api.Path() 92 93 // Normalize the path to use forward slashes, making it platform-independent 94 path = filepath.ToSlash(path) 95 96 // Verify the path contains the `envs/` directory 97 if !(strings.HasPrefix(path, "envs/") || strings.Contains(path, "/envs/")) { 98 return false 99 } 100 101 // Verify extension 102 if !strings.HasSuffix(path, ".json") { 103 return false 104 } 105 106 // Ensure the last directory is `conda-meta`. 107 if !strings.HasSuffix(filepath.Dir(path), "conda-meta") { 108 return false 109 } 110 111 // Check file size if a maximum limit is set. 112 fileinfo, err := api.Stat() 113 if err != nil { 114 return false 115 } 116 117 if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes { 118 e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded) 119 return false 120 } 121 122 e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultOK) 123 return true 124 } 125 126 func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) { 127 if e.stats == nil { 128 return 129 } 130 e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{ 131 Path: path, 132 Result: result, 133 FileSizeBytes: fileSizeBytes, 134 }) 135 } 136 137 // Extract parses and extracts dependency data from Conda metadata files. 138 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 139 pkg, err := e.extractFromInput(input) 140 if e.stats != nil { 141 var fileSizeBytes int64 142 if input.Info != nil { 143 fileSizeBytes = input.Info.Size() 144 } 145 e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{ 146 Path: input.Path, 147 Result: filesystem.ExtractorErrorToFileExtractedResult(err), 148 FileSizeBytes: fileSizeBytes, 149 }) 150 } 151 return inventory.Inventory{Packages: pkg}, err 152 } 153 154 func (e Extractor) extractFromInput(input *filesystem.ScanInput) ([]*extractor.Package, error) { 155 // Parse the metadata and get a package 156 pkg, err := parse(input.Reader) 157 if err != nil { 158 return nil, err 159 } 160 161 // Return an empty slice if the package name or version is empty 162 if pkg.Name == "" || pkg.Version == "" { 163 return nil, errors.New("package name or version is empty") 164 } 165 166 return []*extractor.Package{&extractor.Package{ 167 Name: pkg.Name, 168 Version: pkg.Version, 169 PURLType: purl.TypePyPi, 170 Locations: []string{ 171 input.Path, 172 }, 173 }}, nil 174 } 175 176 // parse reads a Conda metadata JSON file and extracts a package. 177 func parse(r io.Reader) (*condaPackage, error) { 178 var pkg condaPackage 179 if err := json.NewDecoder(r).Decode(&pkg); err != nil { 180 return nil, fmt.Errorf("failed to parse Conda metadata: %w", err) 181 } 182 return &pkg, nil 183 } 184 185 type condaPackage struct { 186 Name string `json:"name"` 187 Version string `json:"version"` 188 }