github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/golang/gomod/gomod.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package gomod extracts go.mod files.
    16  package gomod
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  	"go/version"
    22  	"io"
    23  	"maps"
    24  	"path/filepath"
    25  	"slices"
    26  	"strings"
    27  
    28  	"github.com/google/osv-scalibr/extractor"
    29  	"github.com/google/osv-scalibr/extractor/filesystem"
    30  	"github.com/google/osv-scalibr/inventory"
    31  	"github.com/google/osv-scalibr/log"
    32  	"github.com/google/osv-scalibr/plugin"
    33  	"github.com/google/osv-scalibr/purl"
    34  	"golang.org/x/mod/modfile"
    35  )
    36  
    37  const (
    38  	// Name is the unique name of this extractor.
    39  	Name = "go/gomod"
    40  )
    41  
    42  // Config is the configuration for the Extractor.
    43  type Config struct {
    44  	ExcludeIndirect bool
    45  }
    46  
    47  // Extractor extracts go packages from a go.mod file,
    48  // including the stdlib version by using the top level go version
    49  //
    50  // The output is not sorted and will not be in a consistent order
    51  type Extractor struct {
    52  	config Config
    53  }
    54  
    55  // DefaultConfig returns a default configuration for the extractor.
    56  func DefaultConfig() Config {
    57  	return Config{}
    58  }
    59  
    60  // New returns a new instance of the extractor with the default configuration.
    61  func New() filesystem.Extractor { return NewWithConfig(DefaultConfig()) }
    62  
    63  // NewWithConfig returns a new instance of the extractor with the given configuration.
    64  func NewWithConfig(cfg Config) filesystem.Extractor { return &Extractor{config: cfg} }
    65  
    66  // Name of the extractor.
    67  func (e Extractor) Name() string { return Name }
    68  
    69  // Version of the extractor.
    70  func (e Extractor) Version() int { return 0 }
    71  
    72  // Requirements of the extractor.
    73  func (e Extractor) Requirements() *plugin.Capabilities {
    74  	return &plugin.Capabilities{}
    75  }
    76  
    77  // FileRequired returns true if the specified file matches go.mod files.
    78  //
    79  // go.sum is not considered since the 'go.mod' file
    80  // is necessary to determine the Go version before opening it.
    81  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
    82  	return filepath.Base(api.Path()) == "go.mod"
    83  }
    84  
    85  type goVersion = string
    86  
    87  type pkgKey struct {
    88  	name    string
    89  	version string
    90  }
    91  
    92  // Extract extracts packages from a go.mod file passed through the scan input.
    93  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
    94  	pkgs, goVersion, err := e.extractGoMod(input)
    95  	if err != nil {
    96  		return inventory.Inventory{}, fmt.Errorf("could not extract: %w", err)
    97  	}
    98  
    99  	// At go 1.17 and above, the go command adds an indirect requirement for each module that provides any
   100  	// package imported (even indirectly) by a package or test in the main module or passed as an argument to go get.
   101  	if goVersion == "" || version.Compare("go"+goVersion, "go1.17") >= 0 {
   102  		return inventory.Inventory{Packages: slices.Collect(maps.Values(pkgs))}, nil
   103  	}
   104  
   105  	// For versions below 1.17 extract indirect dependencies from the go.sum file
   106  	sumPkgs, err := extractFromSum(input)
   107  	if err != nil {
   108  		log.Debugf("could not extract from %s's sum file: %v", input.Path, err)
   109  		return inventory.Inventory{Packages: slices.Collect(maps.Values(pkgs))}, nil
   110  	}
   111  
   112  	// merge go.sum packages with go.mod ones
   113  	for k, sumPkg := range sumPkgs {
   114  		if pkg, ok := pkgs[k]; ok {
   115  			// if the dependency is already present then add `go.sum` to its Locations slice
   116  			pkg.Locations = append(pkg.Locations, sumPkg.Locations...)
   117  		} else {
   118  			// otherwise add a new dependency to the package
   119  			pkgs[k] = sumPkg
   120  		}
   121  	}
   122  
   123  	return inventory.Inventory{Packages: slices.Collect(maps.Values(pkgs))}, nil
   124  }
   125  
   126  func (e Extractor) extractGoMod(input *filesystem.ScanInput) (map[pkgKey]*extractor.Package, goVersion, error) {
   127  	b, err := io.ReadAll(input.Reader)
   128  	if err != nil {
   129  		return nil, "", err
   130  	}
   131  
   132  	parsedLockfile, err := modfile.Parse(input.Path, b, nil)
   133  	if err != nil {
   134  		return nil, "", err
   135  	}
   136  
   137  	// Store the packages in a map since they might be overwritten by later entries.
   138  	packages := map[pkgKey]*extractor.Package{}
   139  
   140  	for _, require := range parsedLockfile.Require {
   141  		// Skip indirect dependencies based on the configuration.
   142  		if e.config.ExcludeIndirect && require.Indirect {
   143  			continue
   144  		}
   145  
   146  		name := require.Mod.Path
   147  		version := strings.TrimPrefix(require.Mod.Version, "v")
   148  		packages[pkgKey{name: name, version: version}] = &extractor.Package{
   149  			Name:      name,
   150  			Version:   version,
   151  			PURLType:  purl.TypeGolang,
   152  			Locations: []string{input.Path},
   153  		}
   154  	}
   155  
   156  	// Apply go.mod replace directives to the identified packages by updating their
   157  	// names+versions as instructed by the directive.
   158  	for _, replace := range parsedLockfile.Replace {
   159  		var replacements []pkgKey
   160  
   161  		if replace.Old.Version == "" {
   162  			// If the version to replace is omitted, all versions of the module are replaced.
   163  			for k, pkg := range packages {
   164  				if pkg.Name == replace.Old.Path {
   165  					replacements = append(replacements, k)
   166  				}
   167  			}
   168  		} else {
   169  			// If the version to replace is specified only that specific version of the
   170  			// module is replaced.
   171  			s := pkgKey{name: replace.Old.Path, version: strings.TrimPrefix(replace.Old.Version, "v")}
   172  
   173  			// A `replace` directive has no effect if the name or version to replace is not present.
   174  			if _, ok := packages[s]; ok {
   175  				replacements = []pkgKey{s}
   176  			}
   177  		}
   178  
   179  		for _, replacement := range replacements {
   180  			packages[replacement] = &extractor.Package{
   181  				Name:      replace.New.Path,
   182  				Version:   strings.TrimPrefix(replace.New.Version, "v"),
   183  				PURLType:  purl.TypeGolang,
   184  				Locations: []string{input.Path},
   185  			}
   186  		}
   187  	}
   188  
   189  	goVersion := ""
   190  	if parsedLockfile.Go != nil && parsedLockfile.Go.Version != "" {
   191  		goVersion = parsedLockfile.Go.Version
   192  	}
   193  
   194  	// Give the toolchain version priority, if present
   195  	if parsedLockfile.Toolchain != nil && parsedLockfile.Toolchain.Name != "" {
   196  		version, _, _ := strings.Cut(parsedLockfile.Toolchain.Name, "-")
   197  		goVersion = strings.TrimPrefix(version, "go")
   198  	}
   199  
   200  	// Add the Go stdlib as an explicit dependency.
   201  	if goVersion != "" {
   202  		packages[pkgKey{name: "stdlib"}] = &extractor.Package{
   203  			Name:      "stdlib",
   204  			Version:   goVersion,
   205  			PURLType:  purl.TypeGolang,
   206  			Locations: []string{input.Path},
   207  		}
   208  	}
   209  
   210  	// An additional deduplication pass is required.
   211  	// This is necessary because the values in the map may have changed after the replacement
   212  	dedupedPs := map[pkgKey]*extractor.Package{}
   213  	for _, p := range packages {
   214  		s := pkgKey{name: p.Name, version: p.Version}
   215  		dedupedPs[s] = p
   216  	}
   217  	return dedupedPs, goVersion, nil
   218  }
   219  
   220  var _ filesystem.Extractor = Extractor{}