github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/python/setup/setup.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package setup extracts packages from setup.py. 16 package setup 17 18 import ( 19 "bufio" 20 "context" 21 "fmt" 22 "path/filepath" 23 "regexp" 24 "strings" 25 26 "github.com/google/osv-scalibr/extractor" 27 "github.com/google/osv-scalibr/extractor/filesystem" 28 "github.com/google/osv-scalibr/extractor/filesystem/internal/units" 29 "github.com/google/osv-scalibr/inventory" 30 "github.com/google/osv-scalibr/plugin" 31 "github.com/google/osv-scalibr/purl" 32 "github.com/google/osv-scalibr/stats" 33 ) 34 35 const ( 36 // Name is the unique name of this extractor. 37 Name = "python/setup" 38 39 // defaultMaxFileSizeBytes is the maximum file size an extractor will unmarshal. 40 // If Extract gets a bigger file, it will return an error. 41 defaultMaxFileSizeBytes = 10 * units.MiB 42 ) 43 44 // Config is the configuration for the Extractor. 45 type Config struct { 46 // Stats is a stats collector for reporting metrics. 47 Stats stats.Collector 48 // MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If 49 // `FileRequired` gets a bigger file, it will return false, 50 MaxFileSizeBytes int64 51 } 52 53 // DefaultConfig returns the default configuration for the setup.py extractor. 54 func DefaultConfig() Config { 55 return Config{ 56 Stats: nil, 57 MaxFileSizeBytes: defaultMaxFileSizeBytes, 58 } 59 } 60 61 // Extractor extracts python packages from setup.py. 62 type Extractor struct { 63 stats stats.Collector 64 maxFileSizeBytes int64 65 } 66 67 // New returns a setup.py extractor. 68 func New(cfg Config) *Extractor { 69 return &Extractor{ 70 stats: cfg.Stats, 71 maxFileSizeBytes: cfg.MaxFileSizeBytes, 72 } 73 } 74 75 // NewDefault returns an extractor with the default config settings. 76 func NewDefault() filesystem.Extractor { return New(DefaultConfig()) } 77 78 // Config returns the configuration of the extractor. 79 func (e Extractor) Config() Config { 80 return Config{ 81 Stats: e.stats, 82 MaxFileSizeBytes: e.maxFileSizeBytes, 83 } 84 } 85 86 // Name of the extractor. 87 func (e Extractor) Name() string { return Name } 88 89 // Version of the extractor. 90 func (e Extractor) Version() int { return 0 } 91 92 // Requirements of the extractor. 93 func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } 94 95 // FileRequired returns true if the specified file matches python setup.py file pattern. 96 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 97 path := api.Path() 98 99 if filepath.Base(path) != "setup.py" { 100 return false 101 } 102 103 fileinfo, err := api.Stat() 104 if err != nil { 105 return false 106 } 107 if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes { 108 e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded) 109 return false 110 } 111 112 e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultOK) 113 return true 114 } 115 116 func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) { 117 if e.stats == nil { 118 return 119 } 120 e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{ 121 Path: path, 122 Result: result, 123 FileSizeBytes: fileSizeBytes, 124 }) 125 } 126 127 // Extract extracts packages from setup.py files passed through the scan input. 128 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 129 pkgs, err := e.extractFromInput(ctx, input) 130 131 if e.stats != nil { 132 var fileSizeBytes int64 133 if input.Info != nil { 134 fileSizeBytes = input.Info.Size() 135 } 136 e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{ 137 Path: input.Path, 138 Result: filesystem.ExtractorErrorToFileExtractedResult(err), 139 FileSizeBytes: fileSizeBytes, 140 }) 141 } 142 return inventory.Inventory{Packages: pkgs}, err 143 } 144 145 var packageVersionRe = regexp.MustCompile(`['"]\W?(\w+)\W?(==|>=|<=)\W?([\w.]*)`) 146 147 func (e Extractor) extractFromInput(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Package, error) { 148 s := bufio.NewScanner(input.Reader) 149 packages := []*extractor.Package{} 150 151 for s.Scan() { 152 // Return if canceled or exceeding deadline. 153 if err := ctx.Err(); err != nil { 154 return packages, fmt.Errorf("%s halted due to context error: %w", e.Name(), err) 155 } 156 157 line := s.Text() 158 line = strings.TrimSpace(line) 159 160 // Skip commented lines 161 if strings.HasPrefix(line, "#") { 162 continue 163 } 164 165 matches := packageVersionRe.FindAllStringSubmatch(line, -1) 166 167 for _, match := range matches { 168 if len(match) != 4 { 169 continue 170 } 171 if containsTemplate(match[0]) { 172 continue 173 } 174 175 pkgName := strings.TrimSpace(match[1]) 176 comp := match[2] 177 pkgVersion := strings.TrimSpace(match[3]) 178 179 p := &extractor.Package{ 180 Name: pkgName, 181 Version: pkgVersion, 182 PURLType: purl.TypePyPi, 183 Locations: []string{input.Path}, 184 Metadata: &Metadata{VersionComparator: comp}, 185 } 186 187 packages = append(packages, p) 188 } 189 190 if s.Err() != nil { 191 return packages, fmt.Errorf("error while scanning setup.py file: %w", s.Err()) 192 } 193 } 194 195 return packages, nil 196 } 197 198 func containsTemplate(s string) bool { 199 return strings.Contains(s, `%s`) || strings.ContainsAny(s, "%{}") 200 }