github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/python/condameta/condameta.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package condameta extracts Conda package metadata from conda-meta JSON files.
    16  package condameta
    17  
    18  import (
    19  	"context"
    20  	"encoding/json"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"path/filepath"
    25  	"strings"
    26  
    27  	"github.com/google/osv-scalibr/extractor"
    28  	"github.com/google/osv-scalibr/extractor/filesystem"
    29  	"github.com/google/osv-scalibr/extractor/filesystem/internal/units"
    30  	"github.com/google/osv-scalibr/inventory"
    31  	"github.com/google/osv-scalibr/plugin"
    32  	"github.com/google/osv-scalibr/purl"
    33  	"github.com/google/osv-scalibr/stats"
    34  )
    35  
    36  const (
    37  	// Name is the unique name of this extractor.
    38  	Name = "python/condameta"
    39  )
    40  
    41  // Config is the configuration for the Extractor.
    42  type Config struct {
    43  	Stats            stats.Collector
    44  	MaxFileSizeBytes int64
    45  }
    46  
    47  // DefaultConfig returns the default configuration for the extractor.
    48  func DefaultConfig() Config {
    49  	return Config{
    50  		Stats:            nil,
    51  		MaxFileSizeBytes: 10 * units.MiB,
    52  	}
    53  }
    54  
    55  // Config returns the configuration of the extractor.
    56  func (e Extractor) Config() Config {
    57  	return Config{
    58  		Stats:            e.stats,
    59  		MaxFileSizeBytes: e.maxFileSizeBytes,
    60  	}
    61  }
    62  
    63  // Extractor extracts packages from Conda package metadata.
    64  type Extractor struct {
    65  	stats            stats.Collector
    66  	maxFileSizeBytes int64
    67  }
    68  
    69  // New returns a Conda package metadata extractor.
    70  func New(cfg Config) *Extractor {
    71  	return &Extractor{
    72  		stats:            cfg.Stats,
    73  		maxFileSizeBytes: cfg.MaxFileSizeBytes,
    74  	}
    75  }
    76  
    77  // NewDefault returns an extractor with the default config settings.
    78  func NewDefault() filesystem.Extractor { return New(DefaultConfig()) }
    79  
    80  // Name of the extractor.
    81  func (e Extractor) Name() string { return Name }
    82  
    83  // Version of the extractor.
    84  func (e Extractor) Version() int { return 0 }
    85  
    86  // Requirements of the extractor.
    87  func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }
    88  
    89  // FileRequired checks if the file is a valid Conda metadata JSON file.
    90  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
    91  	path := api.Path()
    92  
    93  	// Normalize the path to use forward slashes, making it platform-independent
    94  	path = filepath.ToSlash(path)
    95  
    96  	// Verify the path contains the `envs/` directory
    97  	if !(strings.HasPrefix(path, "envs/") || strings.Contains(path, "/envs/")) {
    98  		return false
    99  	}
   100  
   101  	// Verify extension
   102  	if !strings.HasSuffix(path, ".json") {
   103  		return false
   104  	}
   105  
   106  	// Ensure the last directory is `conda-meta`.
   107  	if !strings.HasSuffix(filepath.Dir(path), "conda-meta") {
   108  		return false
   109  	}
   110  
   111  	// Check file size if a maximum limit is set.
   112  	fileinfo, err := api.Stat()
   113  	if err != nil {
   114  		return false
   115  	}
   116  
   117  	if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes {
   118  		e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded)
   119  		return false
   120  	}
   121  
   122  	e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultOK)
   123  	return true
   124  }
   125  
   126  func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) {
   127  	if e.stats == nil {
   128  		return
   129  	}
   130  	e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{
   131  		Path:          path,
   132  		Result:        result,
   133  		FileSizeBytes: fileSizeBytes,
   134  	})
   135  }
   136  
   137  // Extract parses and extracts dependency data from Conda metadata files.
   138  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   139  	pkg, err := e.extractFromInput(input)
   140  	if e.stats != nil {
   141  		var fileSizeBytes int64
   142  		if input.Info != nil {
   143  			fileSizeBytes = input.Info.Size()
   144  		}
   145  		e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{
   146  			Path:          input.Path,
   147  			Result:        filesystem.ExtractorErrorToFileExtractedResult(err),
   148  			FileSizeBytes: fileSizeBytes,
   149  		})
   150  	}
   151  	return inventory.Inventory{Packages: pkg}, err
   152  }
   153  
   154  func (e Extractor) extractFromInput(input *filesystem.ScanInput) ([]*extractor.Package, error) {
   155  	// Parse the metadata and get a package
   156  	pkg, err := parse(input.Reader)
   157  	if err != nil {
   158  		return nil, err
   159  	}
   160  
   161  	// Return an empty slice if the package name or version is empty
   162  	if pkg.Name == "" || pkg.Version == "" {
   163  		return nil, errors.New("package name or version is empty")
   164  	}
   165  
   166  	return []*extractor.Package{&extractor.Package{
   167  		Name:     pkg.Name,
   168  		Version:  pkg.Version,
   169  		PURLType: purl.TypePyPi,
   170  		Locations: []string{
   171  			input.Path,
   172  		},
   173  	}}, nil
   174  }
   175  
   176  // parse reads a Conda metadata JSON file and extracts a package.
   177  func parse(r io.Reader) (*condaPackage, error) {
   178  	var pkg condaPackage
   179  	if err := json.NewDecoder(r).Decode(&pkg); err != nil {
   180  		return nil, fmt.Errorf("failed to parse Conda metadata: %w", err)
   181  	}
   182  	return &pkg, nil
   183  }
   184  
   185  type condaPackage struct {
   186  	Name    string `json:"name"`
   187  	Version string `json:"version"`
   188  }