github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/python/setup/setup.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package setup extracts packages from setup.py.
    16  package setup
    17  
    18  import (
    19  	"bufio"
    20  	"context"
    21  	"fmt"
    22  	"path/filepath"
    23  	"regexp"
    24  	"strings"
    25  
    26  	"github.com/google/osv-scalibr/extractor"
    27  	"github.com/google/osv-scalibr/extractor/filesystem"
    28  	"github.com/google/osv-scalibr/extractor/filesystem/internal/units"
    29  	"github.com/google/osv-scalibr/inventory"
    30  	"github.com/google/osv-scalibr/plugin"
    31  	"github.com/google/osv-scalibr/purl"
    32  	"github.com/google/osv-scalibr/stats"
    33  )
    34  
    35  const (
    36  	// Name is the unique name of this extractor.
    37  	Name = "python/setup"
    38  
    39  	// defaultMaxFileSizeBytes is the maximum file size an extractor will unmarshal.
    40  	// If Extract gets a bigger file, it will return an error.
    41  	defaultMaxFileSizeBytes = 10 * units.MiB
    42  )
    43  
    44  // Config is the configuration for the Extractor.
    45  type Config struct {
    46  	// Stats is a stats collector for reporting metrics.
    47  	Stats stats.Collector
    48  	// MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If
    49  	// `FileRequired` gets a bigger file, it will return false,
    50  	MaxFileSizeBytes int64
    51  }
    52  
    53  // DefaultConfig returns the default configuration for the setup.py extractor.
    54  func DefaultConfig() Config {
    55  	return Config{
    56  		Stats:            nil,
    57  		MaxFileSizeBytes: defaultMaxFileSizeBytes,
    58  	}
    59  }
    60  
    61  // Extractor extracts python packages from setup.py.
    62  type Extractor struct {
    63  	stats            stats.Collector
    64  	maxFileSizeBytes int64
    65  }
    66  
    67  // New returns a setup.py extractor.
    68  func New(cfg Config) *Extractor {
    69  	return &Extractor{
    70  		stats:            cfg.Stats,
    71  		maxFileSizeBytes: cfg.MaxFileSizeBytes,
    72  	}
    73  }
    74  
    75  // NewDefault returns an extractor with the default config settings.
    76  func NewDefault() filesystem.Extractor { return New(DefaultConfig()) }
    77  
    78  // Config returns the configuration of the extractor.
    79  func (e Extractor) Config() Config {
    80  	return Config{
    81  		Stats:            e.stats,
    82  		MaxFileSizeBytes: e.maxFileSizeBytes,
    83  	}
    84  }
    85  
    86  // Name of the extractor.
    87  func (e Extractor) Name() string { return Name }
    88  
    89  // Version of the extractor.
    90  func (e Extractor) Version() int { return 0 }
    91  
    92  // Requirements of the extractor.
    93  func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }
    94  
    95  // FileRequired returns true if the specified file matches python setup.py file pattern.
    96  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
    97  	path := api.Path()
    98  
    99  	if filepath.Base(path) != "setup.py" {
   100  		return false
   101  	}
   102  
   103  	fileinfo, err := api.Stat()
   104  	if err != nil {
   105  		return false
   106  	}
   107  	if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes {
   108  		e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded)
   109  		return false
   110  	}
   111  
   112  	e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultOK)
   113  	return true
   114  }
   115  
   116  func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) {
   117  	if e.stats == nil {
   118  		return
   119  	}
   120  	e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{
   121  		Path:          path,
   122  		Result:        result,
   123  		FileSizeBytes: fileSizeBytes,
   124  	})
   125  }
   126  
   127  // Extract extracts packages from setup.py files passed through the scan input.
   128  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   129  	pkgs, err := e.extractFromInput(ctx, input)
   130  
   131  	if e.stats != nil {
   132  		var fileSizeBytes int64
   133  		if input.Info != nil {
   134  			fileSizeBytes = input.Info.Size()
   135  		}
   136  		e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{
   137  			Path:          input.Path,
   138  			Result:        filesystem.ExtractorErrorToFileExtractedResult(err),
   139  			FileSizeBytes: fileSizeBytes,
   140  		})
   141  	}
   142  	return inventory.Inventory{Packages: pkgs}, err
   143  }
   144  
   145  var packageVersionRe = regexp.MustCompile(`['"]\W?(\w+)\W?(==|>=|<=)\W?([\w.]*)`)
   146  
   147  func (e Extractor) extractFromInput(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Package, error) {
   148  	s := bufio.NewScanner(input.Reader)
   149  	packages := []*extractor.Package{}
   150  
   151  	for s.Scan() {
   152  		// Return if canceled or exceeding deadline.
   153  		if err := ctx.Err(); err != nil {
   154  			return packages, fmt.Errorf("%s halted due to context error: %w", e.Name(), err)
   155  		}
   156  
   157  		line := s.Text()
   158  		line = strings.TrimSpace(line)
   159  
   160  		// Skip commented lines
   161  		if strings.HasPrefix(line, "#") {
   162  			continue
   163  		}
   164  
   165  		matches := packageVersionRe.FindAllStringSubmatch(line, -1)
   166  
   167  		for _, match := range matches {
   168  			if len(match) != 4 {
   169  				continue
   170  			}
   171  			if containsTemplate(match[0]) {
   172  				continue
   173  			}
   174  
   175  			pkgName := strings.TrimSpace(match[1])
   176  			comp := match[2]
   177  			pkgVersion := strings.TrimSpace(match[3])
   178  
   179  			p := &extractor.Package{
   180  				Name:      pkgName,
   181  				Version:   pkgVersion,
   182  				PURLType:  purl.TypePyPi,
   183  				Locations: []string{input.Path},
   184  				Metadata:  &Metadata{VersionComparator: comp},
   185  			}
   186  
   187  			packages = append(packages, p)
   188  		}
   189  
   190  		if s.Err() != nil {
   191  			return packages, fmt.Errorf("error while scanning setup.py file: %w", s.Err())
   192  		}
   193  	}
   194  
   195  	return packages, nil
   196  }
   197  
   198  func containsTemplate(s string) bool {
   199  	return strings.Contains(s, `%s`) || strings.ContainsAny(s, "%{}")
   200  }