github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/python/uvlock/uvlock.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package uvlock extracts uv.lock files.
    16  package uvlock
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  	"path/filepath"
    22  	"sort"
    23  	"strings"
    24  
    25  	"github.com/BurntSushi/toml"
    26  	"github.com/google/osv-scalibr/extractor"
    27  	"github.com/google/osv-scalibr/extractor/filesystem"
    28  	"github.com/google/osv-scalibr/extractor/filesystem/osv"
    29  	"github.com/google/osv-scalibr/inventory"
    30  	"github.com/google/osv-scalibr/plugin"
    31  	"github.com/google/osv-scalibr/purl"
    32  )
    33  
    34  const (
    35  	// Name is the unique name of this extractor.
    36  	Name = "python/uvlock"
    37  )
    38  
    39  type uvLockPackageSource struct {
    40  	Virtual string `toml:"virtual"`
    41  	Git     string `toml:"git"`
    42  }
    43  
    44  type uvLockPackage struct {
    45  	Name    string              `toml:"name"`
    46  	Version string              `toml:"version"`
    47  	Source  uvLockPackageSource `toml:"source"`
    48  
    49  	// uv stores "groups" as a table under "package" after all the packages, which due
    50  	// to how TOML works means it ends up being a property on the last package, even
    51  	// through in this context it's a global property rather than being per-package
    52  	Groups map[string][]uvOptionalDependency `toml:"optional-dependencies"`
    53  }
    54  
    55  type uvOptionalDependency struct {
    56  	Name string `toml:"name"`
    57  }
    58  type uvLockFile struct {
    59  	Version  int             `toml:"version"`
    60  	Packages []uvLockPackage `toml:"package"`
    61  }
    62  
    63  // Extractor extracts python packages from uv.lock files.
    64  type Extractor struct{}
    65  
    66  // New returns a new instance of the extractor.
    67  func New() filesystem.Extractor { return &Extractor{} }
    68  
    69  // Name of the extractor
    70  func (e Extractor) Name() string { return Name }
    71  
    72  // Version of the extractor
    73  func (e Extractor) Version() int { return 0 }
    74  
    75  // Requirements of the extractor
    76  func (e Extractor) Requirements() *plugin.Capabilities {
    77  	return &plugin.Capabilities{}
    78  }
    79  
    80  // FileRequired returns true if the specified file matches uv lockfile patterns
    81  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
    82  	return filepath.Base(api.Path()) == "uv.lock"
    83  }
    84  
    85  // Extract extracts packages from uv.lock files passed through the scan input.
    86  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
    87  	var parsedLockfile *uvLockFile
    88  
    89  	_, err := toml.NewDecoder(input.Reader).Decode(&parsedLockfile)
    90  
    91  	if err != nil {
    92  		return inventory.Inventory{}, fmt.Errorf("could not extract: %w", err)
    93  	}
    94  
    95  	packages := make([]*extractor.Package, 0, len(parsedLockfile.Packages))
    96  
    97  	var groups map[string][]uvOptionalDependency
    98  
    99  	// uv stores "groups" as a table under "package" after all the packages, which due
   100  	// to how TOML works means it ends up being a property on the last package, even
   101  	// through in this context it's a global property rather than being per-package
   102  	if len(parsedLockfile.Packages) > 0 {
   103  		groups = parsedLockfile.Packages[len(parsedLockfile.Packages)-1].Groups
   104  	}
   105  
   106  	for _, lockPackage := range parsedLockfile.Packages {
   107  		// skip including the root "package", since its name and version are most likely arbitrary
   108  		if lockPackage.Source.Virtual == "." {
   109  			continue
   110  		}
   111  
   112  		_, commit, _ := strings.Cut(lockPackage.Source.Git, "#")
   113  
   114  		pkgDetails := &extractor.Package{
   115  			Name:      lockPackage.Name,
   116  			Version:   lockPackage.Version,
   117  			PURLType:  purl.TypePyPi,
   118  			Locations: []string{input.Path},
   119  		}
   120  
   121  		if commit != "" {
   122  			pkgDetails.SourceCode = &extractor.SourceCodeIdentifier{
   123  				Commit: commit,
   124  			}
   125  		}
   126  
   127  		depGroupVals := []string{}
   128  
   129  		for group, deps := range groups {
   130  			for _, dep := range deps {
   131  				if dep.Name == lockPackage.Name {
   132  					depGroupVals = append(depGroupVals, group)
   133  				}
   134  			}
   135  		}
   136  
   137  		sort.Strings(depGroupVals)
   138  
   139  		pkgDetails.Metadata = osv.DepGroupMetadata{
   140  			DepGroupVals: depGroupVals,
   141  		}
   142  		packages = append(packages, pkgDetails)
   143  	}
   144  
   145  	return inventory.Inventory{Packages: packages}, nil
   146  }
   147  
   148  var _ filesystem.Extractor = Extractor{}