github.com/google/osv-scalibr@v0.4.1/enricher/baseimage/baseimage.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package baseimage enriches inventory layer details with potential base images from deps.dev. 16 package baseimage 17 18 import ( 19 "context" 20 "errors" 21 "fmt" 22 "log" 23 "slices" 24 25 "github.com/google/osv-scalibr/clients/depsdev/v1alpha1/grpcclient" 26 "github.com/google/osv-scalibr/enricher" 27 "github.com/google/osv-scalibr/extractor" 28 "github.com/google/osv-scalibr/inventory" 29 "github.com/google/osv-scalibr/plugin" 30 "github.com/opencontainers/go-digest" 31 "github.com/opencontainers/image-spec/identity" 32 "go.uber.org/multierr" 33 "golang.org/x/sync/errgroup" 34 ) 35 36 const ( 37 // Name is the name of the base image enricher. 38 Name = "baseimage" 39 // Version is the version of the base image enricher. 40 Version = 0 41 // digestSHA256EmptyTar is the canonical sha256 digest of empty tar file - 42 // (1024 NULL bytes) 43 digestSHA256EmptyTar = digest.Digest("sha256:5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef") 44 45 maxConcurrentRequests = 1000 46 ) 47 48 // Config is the configuration for the base image enricher. 49 type Config struct { 50 Client Client 51 } 52 53 // DefaultConfig returns the default configuration for the base image enricher. 54 func DefaultConfig() *Config { 55 grpcConfig := grpcclient.DefaultConfig() 56 grpcclient, err := grpcclient.New(grpcConfig) 57 if err != nil { 58 log.Fatalf("Failed to create base image client: %v", err) 59 } 60 61 client := NewClientGRPC(grpcclient) 62 63 return &Config{ 64 Client: client, 65 } 66 } 67 68 // Enricher enriches inventory layer details with potential base images from deps.dev. 69 type Enricher struct { 70 client Client 71 } 72 73 // New returns a new base image enricher. 74 func New(cfg *Config) (*Enricher, error) { 75 if cfg == nil { 76 return nil, errors.New("config is nil") 77 } 78 if cfg.Client == nil { 79 return nil, errors.New("client is nil") 80 } 81 return &Enricher{client: cfg.Client}, nil 82 } 83 84 // NewDefault returns a new base image enricher with the default configuration. 85 // It will log.Fatal if the enricher cannot be created. 86 func NewDefault() enricher.Enricher { 87 e, err := New(DefaultConfig()) 88 if err != nil { 89 log.Fatalf("Failed to create base image enricher: %v", err) 90 } 91 return e 92 } 93 94 // Config returns the configuration for the base image enricher. 95 func (e *Enricher) Config() *Config { 96 return &Config{ 97 Client: e.client, 98 } 99 } 100 101 // Name of the base image enricher. 102 func (*Enricher) Name() string { return Name } 103 104 // Version of the base image enricher. 105 func (*Enricher) Version() int { return Version } 106 107 // Requirements of the base image enricher. 108 func (*Enricher) Requirements() *plugin.Capabilities { 109 return &plugin.Capabilities{Network: plugin.NetworkOnline} 110 } 111 112 // RequiredPlugins returns a list of Plugins that need to be enabled for this Enricher to work. 113 func (*Enricher) RequiredPlugins() []string { 114 return []string{} 115 } 116 117 // Enrich enriches the inventory with base image information from deps.dev. 118 func (e *Enricher) Enrich(ctx context.Context, _ *enricher.ScanInput, inv *inventory.Inventory) error { 119 if inv.ContainerImageMetadata == nil { 120 return nil 121 } 122 123 // Map from chain ID to list of repositories it belongs to. 124 chainIDToBaseImage := make(map[string][]*extractor.BaseImageDetails) 125 var enrichErr error 126 for _, cim := range inv.ContainerImageMetadata { 127 if cim.LayerMetadata == nil { 128 continue 129 } 130 131 // Placeholder for the scanned image itself. 132 cim.BaseImages = [][]*extractor.BaseImageDetails{ 133 []*extractor.BaseImageDetails{}, 134 } 135 136 chainIDsByLayerIndex := make([]digest.Digest, len(cim.LayerMetadata)) 137 baseImagesByLayerIndex := make([][]*extractor.BaseImageDetails, len(cim.LayerMetadata)) 138 g, ctx := errgroup.WithContext(ctx) 139 g.SetLimit(maxConcurrentRequests) 140 141 // We do not want to use the normal chainID of the layer, because it does not include empty 142 // layers. Deps.dev does a special calculation of the chainID that includes empty layers, so we 143 // do the same here. 144 for i, l := range cim.LayerMetadata { 145 diffID := l.DiffID 146 if l.DiffID == "" { 147 diffID = digestSHA256EmptyTar 148 } 149 150 // first populate this with diffIDs 151 chainIDsByLayerIndex[i] = diffID 152 } 153 // This replaces the diffIDs with chainIDs for the corresponding index. 154 identity.ChainIDs(chainIDsByLayerIndex) 155 156 for i, chainID := range chainIDsByLayerIndex { 157 if val, ok := chainIDToBaseImage[chainID.String()]; ok { 158 // Already cached, we can just skip this layer. 159 baseImagesByLayerIndex[i] = val 160 continue 161 } 162 163 // Otherwise query deps.dev for the base images of this layer. 164 g.Go(func() error { 165 if ctx.Err() != nil { 166 // this return value doesn't matter to errgroup.Wait(), since it already errored 167 return ctx.Err() 168 } 169 170 req := &Request{ 171 ChainID: chainID.String(), 172 } 173 resp, err := e.client.QueryContainerImages(ctx, req) 174 if err != nil { 175 if !errors.Is(err, errNotFound) { 176 // If one query fails even with grpc retries, we cancel the rest of the 177 // queries and return the error. 178 return fmt.Errorf("failed to query container images for chain ID %q: %w", chainID.String(), err) 179 } 180 return nil 181 } 182 var baseImages []*extractor.BaseImageDetails 183 184 if resp != nil && resp.Results != nil && len(resp.Results) > 0 { 185 for _, result := range resp.Results { 186 if result.Repository != "" { 187 baseImages = append(baseImages, &extractor.BaseImageDetails{ 188 Repository: result.Repository, 189 Registry: "docker.io", // Currently all deps.dev images are from the docker mirror. 190 ChainID: chainID, 191 Plugin: Name, 192 }) 193 } 194 } 195 } 196 197 // Cache and also save to layer map. 198 baseImagesByLayerIndex[i] = baseImages 199 200 return nil 201 }) 202 } 203 204 if err := g.Wait(); err != nil { 205 enrichErr = multierr.Append(enrichErr, err) 206 // Move onto the next image 207 continue 208 } 209 210 // Loop backwards through the layers, from the newest to the oldest layer. 211 // This is because base images are identified by the chain ID of the newest layer in the image, 212 // so all older layer must belong to that base image. 213 for i, lm := range slices.Backward(cim.LayerMetadata) { 214 baseImages := baseImagesByLayerIndex[i] 215 lm.BaseImageIndex = len(cim.BaseImages) - 1 216 chainIDToBaseImage[chainIDsByLayerIndex[i].String()] = baseImages 217 218 if len(baseImages) == 0 { 219 continue 220 } 221 222 // Is the current set of baseImages the same as the previous? 223 isSame := false 224 lastBaseImages := cim.BaseImages[len(cim.BaseImages)-1] 225 if len(baseImages) == len(lastBaseImages) { 226 isSame = true 227 for j := range baseImages { 228 if baseImages[j].Repository != lastBaseImages[j].Repository || 229 baseImages[j].Registry != lastBaseImages[j].Registry { 230 isSame = false 231 break 232 } 233 } 234 } 235 236 if !isSame { 237 // Only if it's not the same base image, update 238 cim.BaseImages = append(cim.BaseImages, baseImages) 239 // And if we do update, also change the base image index to new last index. 240 lm.BaseImageIndex++ 241 } 242 } 243 } 244 245 return enrichErr 246 }