github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/containers/dockercomposeimage/dockercomposeimage.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package dockercomposeimage extracts image URLs from Docker Compose files.
    16  package dockercomposeimage
    17  
    18  import (
    19  	"context"
    20  	"errors"
    21  	"fmt"
    22  	"io"
    23  	"maps"
    24  	"os"
    25  	"path/filepath"
    26  	"sort"
    27  	"strings"
    28  
    29  	"github.com/compose-spec/compose-go/v2/dotenv"
    30  	"github.com/compose-spec/compose-go/v2/interpolation"
    31  	"github.com/compose-spec/compose-go/v2/loader"
    32  	"github.com/compose-spec/compose-go/v2/template"
    33  	"github.com/compose-spec/compose-go/v2/tree"
    34  	"github.com/compose-spec/compose-go/v2/types"
    35  	"github.com/google/osv-scalibr/extractor"
    36  	"github.com/google/osv-scalibr/extractor/filesystem"
    37  	"github.com/google/osv-scalibr/extractor/filesystem/internal/units"
    38  	"github.com/google/osv-scalibr/inventory"
    39  	"github.com/google/osv-scalibr/log"
    40  	"github.com/google/osv-scalibr/plugin"
    41  	"github.com/google/osv-scalibr/purl"
    42  	"github.com/google/osv-scalibr/stats"
    43  	"gopkg.in/yaml.v3"
    44  )
    45  
    46  const (
    47  	// Name is the unique name of this extractor.
    48  	Name = "containers/dockercomposeimage"
    49  
    50  	// DefaultMaxFileSizeBytes is the default maximum file size the extractor will
    51  	// attempt to extract. If a file is encountered that is larger than this
    52  	// limit, the file is ignored by `FileRequired`.
    53  	DefaultMaxFileSizeBytes = 1 * units.MiB
    54  )
    55  
    56  // Config is the configuration for the Extractor.
    57  type Config struct {
    58  	// Stats is a stats collector for reporting metrics.
    59  	Stats stats.Collector
    60  	// MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If
    61  	// `FileRequired` gets a bigger file, it will return false.
    62  	MaxFileSizeBytes int64
    63  }
    64  
    65  // DefaultConfig returns the default configuration for the extractor.
    66  func DefaultConfig() Config {
    67  	return Config{
    68  		MaxFileSizeBytes: DefaultMaxFileSizeBytes,
    69  	}
    70  }
    71  
    72  // Extractor extracts image URLs from Docker Compose files.
    73  type Extractor struct {
    74  	stats            stats.Collector
    75  	maxFileSizeBytes int64
    76  }
    77  
    78  // New returns a Docker Compose image extractor.
    79  //
    80  // For most use cases, initialize with:
    81  // ```
    82  // e := New(DefaultConfig())
    83  // ```
    84  func New(cfg Config) *Extractor {
    85  	return &Extractor{
    86  		stats:            cfg.Stats,
    87  		maxFileSizeBytes: cfg.MaxFileSizeBytes,
    88  	}
    89  }
    90  
    91  // NewDefault returns an extractor with the default config settings.
    92  func NewDefault() filesystem.Extractor { return New(DefaultConfig()) }
    93  
    94  // Name of the extractor.
    95  func (e Extractor) Name() string { return Name }
    96  
    97  // Version of the extractor.
    98  func (e Extractor) Version() int { return 0 }
    99  
   100  // Requirements of the extractor.
   101  func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }
   102  
   103  // FileRequired returns true if the specified file could be a Docker Compose file.
   104  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
   105  	path := api.Path()
   106  	// Skip directories and oversized files
   107  	fi, err := os.Stat(path)
   108  	if err != nil || fi.IsDir() {
   109  		return false
   110  	}
   111  	if e.maxFileSizeBytes > 0 && fi.Size() > e.maxFileSizeBytes {
   112  		return false
   113  	}
   114  	filename := filepath.Base(path)
   115  	if filepath.Ext(filename) != ".yml" && filepath.Ext(filename) != ".yaml" {
   116  		return false
   117  	}
   118  	return strings.HasPrefix(filename, "compose") ||
   119  		strings.HasPrefix(filename, "docker-compose")
   120  }
   121  
   122  // Extract extracts image URLs from a Docker Compose file.
   123  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   124  	if input.Info == nil {
   125  		return inventory.Inventory{}, errors.New("input.Info is nil")
   126  	}
   127  
   128  	data, err := io.ReadAll(input.Reader)
   129  	if err != nil {
   130  		return inventory.Inventory{}, err
   131  	}
   132  
   133  	// Check for a top-level "services" field.
   134  	var content map[string]any
   135  	if err := yaml.Unmarshal(data, &content); err != nil {
   136  		// Not a valid yaml file, not an error.
   137  		return inventory.Inventory{}, err
   138  	}
   139  	if _, ok := content["services"]; !ok {
   140  		// Not a compose file, not an error.
   141  		return inventory.Inventory{}, nil
   142  	}
   143  
   144  	images, err := uniqueImagesFromReader(ctx, input)
   145  	if err != nil {
   146  		log.Warnf("Parsing docker-compose file %q failed: %v", input.Path, err)
   147  		return inventory.Inventory{}, nil
   148  	}
   149  	var pkgs []*extractor.Package
   150  	for _, image := range images {
   151  		name, version := parseName(image)
   152  		pkgs = append(pkgs, &extractor.Package{
   153  			Locations: []string{input.Path},
   154  			Name:      name,
   155  			Version:   version,
   156  			PURLType:  purl.TypeDocker,
   157  		})
   158  	}
   159  
   160  	return inventory.Inventory{Packages: pkgs}, nil
   161  }
   162  
   163  // uniqueImagesFromReader extracts unique image names from a Docker Compose file.
   164  // It handles environment variable interpolation and returns a sorted list of unique images.
   165  func uniqueImagesFromReader(ctx context.Context, input *filesystem.ScanInput) ([]string, error) {
   166  	absPath, err := input.GetRealPath()
   167  	if err != nil {
   168  		return nil, fmt.Errorf("GetRealPath(%v): %w", input, err)
   169  	}
   170  	if input.Root == "" {
   171  		// The file got copied to a temporary dir, remove it at the end.
   172  		defer func() {
   173  			dir := filepath.Dir(absPath)
   174  			if err := os.RemoveAll(dir); err != nil {
   175  				log.Errorf("os.RemoveAll(%q): %v", dir, err)
   176  			}
   177  		}()
   178  	}
   179  
   180  	// Load environment variables from a sibling .env file if it exists
   181  	workingDir := filepath.Dir(input.Path)
   182  	envPath := filepath.ToSlash(filepath.Join(workingDir, ".env"))
   183  	environment := types.Mapping{}
   184  	if f, err := input.FS.Open(envPath); err == nil {
   185  		defer f.Close()
   186  		if envVars, err := dotenv.Parse(f); err != nil {
   187  			log.Warnf("dotenv.Parse(%q): %v", envPath, err)
   188  		} else {
   189  			maps.Copy(environment, envVars)
   190  		}
   191  	} else if !errors.Is(err, os.ErrNotExist) {
   192  		log.Warnf("input.FS.Open(%q): %v", envPath, err)
   193  	}
   194  	configFiles := []types.ConfigFile{
   195  		{Filename: absPath},
   196  	}
   197  	details := types.ConfigDetails{
   198  		WorkingDir:  workingDir,
   199  		ConfigFiles: configFiles,
   200  		Environment: environment,
   201  	}
   202  	customOpts := loader.Options{
   203  		Interpolate: &interpolation.Options{
   204  			Substitute:      substitute,
   205  			LookupValue:     details.LookupEnv,
   206  			TypeCastMapping: make(map[tree.Path]interpolation.Cast),
   207  		},
   208  		ResolvePaths: true,
   209  	}
   210  	project, err := loader.LoadWithContext(
   211  		ctx,
   212  		details,
   213  		func(opts *loader.Options) {
   214  			*opts = customOpts
   215  		})
   216  	if err != nil {
   217  		return nil, err
   218  	}
   219  
   220  	uniq := map[string]struct{}{}
   221  	// We Skip services with an empty image version.
   222  	// An empty image version is not a valid image reference.
   223  	// This happened because some environment variables are not resolved
   224  	for _, s := range project.Services {
   225  		if s.Image != "" && !strings.Contains(s.Image, "<IMPERFECT_ENV_VAR_RESOLVING>") {
   226  			uniq[s.Image] = struct{}{}
   227  		}
   228  	}
   229  
   230  	out := make([]string, 0, len(uniq))
   231  	for img := range uniq {
   232  		out = append(out, img)
   233  	}
   234  	sort.Strings(out)
   235  	return out, nil
   236  }
   237  
   238  // parseName extracts the name and version from an image reference.
   239  // It handles both digest format (name@digest) and tag format (name:tag).
   240  // If no version is specified, it returns "latest" as the default version.
   241  func parseName(name string) (string, string) {
   242  	if strings.Contains(name, "@") {
   243  		parts := strings.SplitN(name, "@", 2)
   244  		return parts[0], parts[1]
   245  	}
   246  
   247  	if strings.Contains(name, ":") {
   248  		parts := strings.SplitN(name, ":", 2)
   249  		return parts[0], parts[1]
   250  	}
   251  
   252  	return name, "latest"
   253  }
   254  
   255  // substitute replaces environment variables in template strings with their values.
   256  // For missing variables, it inserts a placeholder "<IMPERFECT_ENV_VAR_RESOLVING>" to indicate
   257  // that the substitution was incomplete, allowing processing to continue.
   258  func substitute(inTemplate string, mapping template.Mapping) (string, error) {
   259  	options := []template.Option{
   260  		template.WithPattern(template.DefaultPattern),
   261  		template.WithReplacementFunction(
   262  			func(substring string, mapping template.Mapping, cfg *template.Config) (string, error) {
   263  				value, _, err := template.DefaultReplacementAppliedFunc(substring, mapping, cfg)
   264  				if err != nil {
   265  					return "", err
   266  				}
   267  				if value == "" {
   268  					// Use placeholder for unresolved variables
   269  					value = "<IMPERFECT_ENV_VAR_RESOLVING>"
   270  				}
   271  				return value, nil
   272  			}),
   273  	}
   274  
   275  	return template.SubstituteWithOptions(inTemplate, mapping, options...)
   276  }