github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/containers/dockercomposeimage/dockercomposeimage.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package dockercomposeimage extracts image URLs from Docker Compose files. 16 package dockercomposeimage 17 18 import ( 19 "context" 20 "errors" 21 "fmt" 22 "io" 23 "maps" 24 "os" 25 "path/filepath" 26 "sort" 27 "strings" 28 29 "github.com/compose-spec/compose-go/v2/dotenv" 30 "github.com/compose-spec/compose-go/v2/interpolation" 31 "github.com/compose-spec/compose-go/v2/loader" 32 "github.com/compose-spec/compose-go/v2/template" 33 "github.com/compose-spec/compose-go/v2/tree" 34 "github.com/compose-spec/compose-go/v2/types" 35 "github.com/google/osv-scalibr/extractor" 36 "github.com/google/osv-scalibr/extractor/filesystem" 37 "github.com/google/osv-scalibr/extractor/filesystem/internal/units" 38 "github.com/google/osv-scalibr/inventory" 39 "github.com/google/osv-scalibr/log" 40 "github.com/google/osv-scalibr/plugin" 41 "github.com/google/osv-scalibr/purl" 42 "github.com/google/osv-scalibr/stats" 43 "gopkg.in/yaml.v3" 44 ) 45 46 const ( 47 // Name is the unique name of this extractor. 48 Name = "containers/dockercomposeimage" 49 50 // DefaultMaxFileSizeBytes is the default maximum file size the extractor will 51 // attempt to extract. If a file is encountered that is larger than this 52 // limit, the file is ignored by `FileRequired`. 53 DefaultMaxFileSizeBytes = 1 * units.MiB 54 ) 55 56 // Config is the configuration for the Extractor. 57 type Config struct { 58 // Stats is a stats collector for reporting metrics. 59 Stats stats.Collector 60 // MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If 61 // `FileRequired` gets a bigger file, it will return false. 62 MaxFileSizeBytes int64 63 } 64 65 // DefaultConfig returns the default configuration for the extractor. 66 func DefaultConfig() Config { 67 return Config{ 68 MaxFileSizeBytes: DefaultMaxFileSizeBytes, 69 } 70 } 71 72 // Extractor extracts image URLs from Docker Compose files. 73 type Extractor struct { 74 stats stats.Collector 75 maxFileSizeBytes int64 76 } 77 78 // New returns a Docker Compose image extractor. 79 // 80 // For most use cases, initialize with: 81 // ``` 82 // e := New(DefaultConfig()) 83 // ``` 84 func New(cfg Config) *Extractor { 85 return &Extractor{ 86 stats: cfg.Stats, 87 maxFileSizeBytes: cfg.MaxFileSizeBytes, 88 } 89 } 90 91 // NewDefault returns an extractor with the default config settings. 92 func NewDefault() filesystem.Extractor { return New(DefaultConfig()) } 93 94 // Name of the extractor. 95 func (e Extractor) Name() string { return Name } 96 97 // Version of the extractor. 98 func (e Extractor) Version() int { return 0 } 99 100 // Requirements of the extractor. 101 func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } 102 103 // FileRequired returns true if the specified file could be a Docker Compose file. 104 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 105 path := api.Path() 106 // Skip directories and oversized files 107 fi, err := os.Stat(path) 108 if err != nil || fi.IsDir() { 109 return false 110 } 111 if e.maxFileSizeBytes > 0 && fi.Size() > e.maxFileSizeBytes { 112 return false 113 } 114 filename := filepath.Base(path) 115 if filepath.Ext(filename) != ".yml" && filepath.Ext(filename) != ".yaml" { 116 return false 117 } 118 return strings.HasPrefix(filename, "compose") || 119 strings.HasPrefix(filename, "docker-compose") 120 } 121 122 // Extract extracts image URLs from a Docker Compose file. 123 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 124 if input.Info == nil { 125 return inventory.Inventory{}, errors.New("input.Info is nil") 126 } 127 128 data, err := io.ReadAll(input.Reader) 129 if err != nil { 130 return inventory.Inventory{}, err 131 } 132 133 // Check for a top-level "services" field. 134 var content map[string]any 135 if err := yaml.Unmarshal(data, &content); err != nil { 136 // Not a valid yaml file, not an error. 137 return inventory.Inventory{}, err 138 } 139 if _, ok := content["services"]; !ok { 140 // Not a compose file, not an error. 141 return inventory.Inventory{}, nil 142 } 143 144 images, err := uniqueImagesFromReader(ctx, input) 145 if err != nil { 146 log.Warnf("Parsing docker-compose file %q failed: %v", input.Path, err) 147 return inventory.Inventory{}, nil 148 } 149 var pkgs []*extractor.Package 150 for _, image := range images { 151 name, version := parseName(image) 152 pkgs = append(pkgs, &extractor.Package{ 153 Locations: []string{input.Path}, 154 Name: name, 155 Version: version, 156 PURLType: purl.TypeDocker, 157 }) 158 } 159 160 return inventory.Inventory{Packages: pkgs}, nil 161 } 162 163 // uniqueImagesFromReader extracts unique image names from a Docker Compose file. 164 // It handles environment variable interpolation and returns a sorted list of unique images. 165 func uniqueImagesFromReader(ctx context.Context, input *filesystem.ScanInput) ([]string, error) { 166 absPath, err := input.GetRealPath() 167 if err != nil { 168 return nil, fmt.Errorf("GetRealPath(%v): %w", input, err) 169 } 170 if input.Root == "" { 171 // The file got copied to a temporary dir, remove it at the end. 172 defer func() { 173 dir := filepath.Dir(absPath) 174 if err := os.RemoveAll(dir); err != nil { 175 log.Errorf("os.RemoveAll(%q): %v", dir, err) 176 } 177 }() 178 } 179 180 // Load environment variables from a sibling .env file if it exists 181 workingDir := filepath.Dir(input.Path) 182 envPath := filepath.ToSlash(filepath.Join(workingDir, ".env")) 183 environment := types.Mapping{} 184 if f, err := input.FS.Open(envPath); err == nil { 185 defer f.Close() 186 if envVars, err := dotenv.Parse(f); err != nil { 187 log.Warnf("dotenv.Parse(%q): %v", envPath, err) 188 } else { 189 maps.Copy(environment, envVars) 190 } 191 } else if !errors.Is(err, os.ErrNotExist) { 192 log.Warnf("input.FS.Open(%q): %v", envPath, err) 193 } 194 configFiles := []types.ConfigFile{ 195 {Filename: absPath}, 196 } 197 details := types.ConfigDetails{ 198 WorkingDir: workingDir, 199 ConfigFiles: configFiles, 200 Environment: environment, 201 } 202 customOpts := loader.Options{ 203 Interpolate: &interpolation.Options{ 204 Substitute: substitute, 205 LookupValue: details.LookupEnv, 206 TypeCastMapping: make(map[tree.Path]interpolation.Cast), 207 }, 208 ResolvePaths: true, 209 } 210 project, err := loader.LoadWithContext( 211 ctx, 212 details, 213 func(opts *loader.Options) { 214 *opts = customOpts 215 }) 216 if err != nil { 217 return nil, err 218 } 219 220 uniq := map[string]struct{}{} 221 // We Skip services with an empty image version. 222 // An empty image version is not a valid image reference. 223 // This happened because some environment variables are not resolved 224 for _, s := range project.Services { 225 if s.Image != "" && !strings.Contains(s.Image, "<IMPERFECT_ENV_VAR_RESOLVING>") { 226 uniq[s.Image] = struct{}{} 227 } 228 } 229 230 out := make([]string, 0, len(uniq)) 231 for img := range uniq { 232 out = append(out, img) 233 } 234 sort.Strings(out) 235 return out, nil 236 } 237 238 // parseName extracts the name and version from an image reference. 239 // It handles both digest format (name@digest) and tag format (name:tag). 240 // If no version is specified, it returns "latest" as the default version. 241 func parseName(name string) (string, string) { 242 if strings.Contains(name, "@") { 243 parts := strings.SplitN(name, "@", 2) 244 return parts[0], parts[1] 245 } 246 247 if strings.Contains(name, ":") { 248 parts := strings.SplitN(name, ":", 2) 249 return parts[0], parts[1] 250 } 251 252 return name, "latest" 253 } 254 255 // substitute replaces environment variables in template strings with their values. 256 // For missing variables, it inserts a placeholder "<IMPERFECT_ENV_VAR_RESOLVING>" to indicate 257 // that the substitution was incomplete, allowing processing to continue. 258 func substitute(inTemplate string, mapping template.Mapping) (string, error) { 259 options := []template.Option{ 260 template.WithPattern(template.DefaultPattern), 261 template.WithReplacementFunction( 262 func(substring string, mapping template.Mapping, cfg *template.Config) (string, error) { 263 value, _, err := template.DefaultReplacementAppliedFunc(substring, mapping, cfg) 264 if err != nil { 265 return "", err 266 } 267 if value == "" { 268 // Use placeholder for unresolved variables 269 value = "<IMPERFECT_ENV_VAR_RESOLVING>" 270 } 271 return value, nil 272 }), 273 } 274 275 return template.SubstituteWithOptions(inTemplate, mapping, options...) 276 }