github.com/google/osv-scalibr@v0.4.1/clients/datasource/maven_registry.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package datasource 16 17 import ( 18 "bytes" 19 "context" 20 "encoding/xml" 21 "errors" 22 "fmt" 23 "io" 24 "net/http" 25 "net/url" 26 "os" 27 "path/filepath" 28 "slices" 29 "strings" 30 "sync" 31 "time" 32 33 "deps.dev/util/maven" 34 "deps.dev/util/semver" 35 "github.com/google/osv-scalibr/log" 36 "golang.org/x/net/html/charset" 37 "golang.org/x/oauth2/google" 38 ) 39 40 // mavenCentral holds the URL of Maven Central Repository. 41 const mavenCentral = "https://repo.maven.apache.org/maven2" 42 43 // artifactRegistryScheme defines the scheme for Google Artifact Registry. 44 const artifactRegistryScheme = "artifactregistry" 45 46 var errAPIFailed = errors.New("API query failed") 47 48 // MavenRegistryAPIClient defines a client to fetch metadata from a Maven registry. 49 type MavenRegistryAPIClient struct { 50 defaultRegistry MavenRegistry // The default registry that we are making requests 51 registries []MavenRegistry // Additional registries specified to fetch projects 52 registryAuths map[string]*HTTPAuthentication // Authentication for the registries keyed by registry ID. From settings.xml 53 localRegistry string // The local directory that holds Maven manifests 54 55 googleClient *http.Client // A client for authenticating with Google services, used for Artifact Registry. 56 disableGoogleAuth bool // If true, do not try to create google.DefaultClient for Artifact Registry. 57 58 // Cache fields 59 mu *sync.Mutex 60 cacheTimestamp *time.Time // If set, this means we loaded from a cache 61 responses *RequestCache[string, response] 62 } 63 64 type response struct { 65 StatusCode int 66 Body []byte 67 } 68 69 // MavenRegistry defines a Maven registry. 70 type MavenRegistry struct { 71 URL string 72 Parsed *url.URL 73 74 // Information from pom.xml 75 ID string 76 ReleasesEnabled bool 77 SnapshotsEnabled bool 78 } 79 80 // NewMavenRegistryAPIClient returns a new MavenRegistryAPIClient. 81 func NewMavenRegistryAPIClient(ctx context.Context, registry MavenRegistry, localRegistry string, disableGoogleClient bool) (*MavenRegistryAPIClient, error) { 82 if registry.URL == "" { 83 registry.URL = mavenCentral 84 registry.ID = "central" 85 } 86 if registry.ID == "" { 87 // Gives the default registry an ID so it is not overwritten by registry without an ID in pom.xml. 88 registry.ID = "default" 89 } 90 u, err := url.Parse(registry.URL) 91 if err != nil { 92 return nil, fmt.Errorf("invalid Maven registry %s: %w", registry.URL, err) 93 } 94 registry.Parsed = u 95 96 if localRegistry != "" { 97 localRegistry = filepath.Join(localRegistry, "maven") 98 } 99 100 // TODO: allow for manual specification of settings files 101 globalSettings := ParseMavenSettings(globalMavenSettingsFile()) 102 userSettings := ParseMavenSettings(userMavenSettingsFile()) 103 104 client := &MavenRegistryAPIClient{ 105 // We assume only downloading releases is allowed on the default registry. 106 defaultRegistry: registry, 107 localRegistry: localRegistry, 108 mu: &sync.Mutex{}, 109 responses: NewRequestCache[string, response](), 110 registryAuths: MakeMavenAuth(globalSettings, userSettings), 111 disableGoogleAuth: disableGoogleClient, 112 } 113 if registry.Parsed.Scheme == artifactRegistryScheme { 114 client.createGoogleClient(ctx) 115 } 116 return client, nil 117 } 118 119 // NewDefaultMavenRegistryAPIClient creates a new MavenRegistryAPIClient with default settings, 120 // using the provided registry URL. 121 func NewDefaultMavenRegistryAPIClient(ctx context.Context, registry string) (*MavenRegistryAPIClient, error) { 122 return NewMavenRegistryAPIClient(ctx, MavenRegistry{URL: registry, ReleasesEnabled: true}, "", false) 123 } 124 125 // SetLocalRegistry sets the local directory that stores the downloaded Maven manifests. 126 func (m *MavenRegistryAPIClient) SetLocalRegistry(localRegistry string) { 127 if localRegistry != "" { 128 localRegistry = filepath.Join(localRegistry, "maven") 129 } 130 m.localRegistry = localRegistry 131 } 132 133 // WithoutRegistries makes MavenRegistryAPIClient including its cache but not registries. 134 func (m *MavenRegistryAPIClient) WithoutRegistries() *MavenRegistryAPIClient { 135 return &MavenRegistryAPIClient{ 136 defaultRegistry: m.defaultRegistry, 137 localRegistry: m.localRegistry, 138 mu: m.mu, 139 cacheTimestamp: m.cacheTimestamp, 140 responses: m.responses, 141 registryAuths: m.registryAuths, 142 googleClient: m.googleClient, 143 disableGoogleAuth: m.disableGoogleAuth, 144 } 145 } 146 147 // AddRegistry adds the given registry to the list of registries if it has not been added. 148 func (m *MavenRegistryAPIClient) AddRegistry(ctx context.Context, registry MavenRegistry) error { 149 if registry.ID == m.defaultRegistry.ID { 150 return m.updateDefaultRegistry(ctx, registry) 151 } 152 153 for _, reg := range m.registries { 154 if reg.ID == registry.ID { 155 return nil 156 } 157 } 158 159 u, err := url.Parse(registry.URL) 160 if err != nil { 161 return err 162 } 163 164 registry.Parsed = u 165 m.registries = append(m.registries, registry) 166 if registry.Parsed.Scheme == artifactRegistryScheme { 167 m.createGoogleClient(ctx) 168 } 169 170 return nil 171 } 172 173 func (m *MavenRegistryAPIClient) updateDefaultRegistry(ctx context.Context, registry MavenRegistry) error { 174 u, err := url.Parse(registry.URL) 175 if err != nil { 176 return err 177 } 178 registry.Parsed = u 179 m.defaultRegistry = registry 180 if registry.Parsed.Scheme == artifactRegistryScheme { 181 m.createGoogleClient(ctx) 182 } 183 return nil 184 } 185 186 // createGoogleClient creates a client for authenticating with Google services. 187 func (m *MavenRegistryAPIClient) createGoogleClient(ctx context.Context) { 188 if m.googleClient != nil || m.disableGoogleAuth { 189 return 190 } 191 // This is the scope that artifact-registry-go-tools uses. 192 // https://github.com/GoogleCloudPlatform/artifact-registry-go-tools/blob/main/pkg/auth/auth.go 193 client, err := google.DefaultClient(ctx, "https://www.googleapis.com/auth/cloud-platform") 194 if err != nil { 195 // We don't return an error here so that we can fall back to a regular http client. 196 log.Warnf("failed to create Google default client, Artifact Registry access will be unavailable: %v", err) 197 return 198 } 199 m.googleClient = client 200 } 201 202 // DisableGoogleAuth prevents the creation of a Google client for authentication purpose. 203 func (m *MavenRegistryAPIClient) DisableGoogleAuth() { 204 m.disableGoogleAuth = true 205 } 206 207 // GetRegistries returns the registries added to this client. 208 func (m *MavenRegistryAPIClient) GetRegistries() (registries []MavenRegistry) { 209 return m.registries 210 } 211 212 // GetProject fetches a pom.xml specified by groupID, artifactID and version and parses it to maven.Project. 213 // Each registry in the list is tried until we find the project. 214 // For a snapshot version, version level metadata is used to find the extact version string. 215 // More about Maven Repository Metadata Model: https://maven.apache.org/ref/3.9.9/maven-repository-metadata/ 216 // More about Maven Metadata: https://maven.apache.org/repositories/metadata.html 217 func (m *MavenRegistryAPIClient) GetProject(ctx context.Context, groupID, artifactID, version string) (maven.Project, error) { 218 if !strings.HasSuffix(version, "-SNAPSHOT") { 219 for _, registry := range append(m.registries, m.defaultRegistry) { 220 if !registry.ReleasesEnabled { 221 continue 222 } 223 project, err := m.getProject(ctx, registry, groupID, artifactID, version, "") 224 if err == nil { 225 return project, nil 226 } 227 } 228 229 return maven.Project{}, fmt.Errorf("failed to fetch Maven project %s:%s@%s", groupID, artifactID, version) 230 } 231 232 for _, registry := range append(m.registries, m.defaultRegistry) { 233 // Fetch version metadata for snapshot versions from the registries enabling that. 234 if !registry.SnapshotsEnabled { 235 continue 236 } 237 metadata, err := m.getVersionMetadata(ctx, registry, groupID, artifactID, version) 238 if err != nil { 239 continue 240 } 241 242 snapshot := "" 243 for _, sv := range metadata.Versioning.SnapshotVersions { 244 if sv.Extension == "pom" { 245 // We only look for pom.xml for project metadata. 246 snapshot = string(sv.Value) 247 break 248 } 249 } 250 251 project, err := m.getProject(ctx, registry, groupID, artifactID, version, snapshot) 252 if err == nil { 253 return project, nil 254 } 255 } 256 257 return maven.Project{}, fmt.Errorf("failed to fetch Maven project %s:%s@%s", groupID, artifactID, version) 258 } 259 260 // GetVersions returns the list of available versions of a Maven package specified by groupID and artifactID. 261 // Versions found in all registries are unioned, then sorted by semver. 262 func (m *MavenRegistryAPIClient) GetVersions(ctx context.Context, groupID, artifactID string) ([]maven.String, error) { 263 var versions []maven.String 264 for _, registry := range append(m.registries, m.defaultRegistry) { 265 metadata, err := m.getArtifactMetadata(ctx, registry, groupID, artifactID) 266 if err != nil { 267 continue 268 } 269 versions = append(versions, metadata.Versioning.Versions...) 270 } 271 slices.SortFunc(versions, func(a, b maven.String) int { return semver.Maven.Compare(string(a), string(b)) }) 272 273 return slices.Compact(versions), nil 274 } 275 276 // getProject fetches a pom.xml specified by groupID, artifactID and version and parses it to maven.Project. 277 // For snapshot versions, the exact version value is specified by snapshot. 278 func (m *MavenRegistryAPIClient) getProject(ctx context.Context, registry MavenRegistry, groupID, artifactID, version, snapshot string) (maven.Project, error) { 279 if snapshot == "" { 280 snapshot = version 281 } 282 283 var project maven.Project 284 if err := m.get(ctx, m.registryAuths[registry.ID], registry, []string{strings.ReplaceAll(groupID, ".", "/"), artifactID, version, fmt.Sprintf("%s-%s.pom", artifactID, snapshot)}, &project); err != nil { 285 return maven.Project{}, err 286 } 287 288 return project, nil 289 } 290 291 // getVersionMetadata fetches a version level maven-metadata.xml and parses it to maven.Metadata. 292 func (m *MavenRegistryAPIClient) getVersionMetadata(ctx context.Context, registry MavenRegistry, groupID, artifactID, version string) (maven.Metadata, error) { 293 var metadata maven.Metadata 294 if err := m.get(ctx, m.registryAuths[registry.ID], registry, []string{strings.ReplaceAll(groupID, ".", "/"), artifactID, version, "maven-metadata.xml"}, &metadata); err != nil { 295 return maven.Metadata{}, err 296 } 297 298 return metadata, nil 299 } 300 301 // GetArtifactMetadata fetches an artifact level maven-metadata.xml and parses it to maven.Metadata. 302 func (m *MavenRegistryAPIClient) getArtifactMetadata(ctx context.Context, registry MavenRegistry, groupID, artifactID string) (maven.Metadata, error) { 303 var metadata maven.Metadata 304 if err := m.get(ctx, m.registryAuths[registry.ID], registry, []string{strings.ReplaceAll(groupID, ".", "/"), artifactID, "maven-metadata.xml"}, &metadata); err != nil { 305 return maven.Metadata{}, err 306 } 307 308 return metadata, nil 309 } 310 311 func (m *MavenRegistryAPIClient) get(ctx context.Context, auth *HTTPAuthentication, registry MavenRegistry, paths []string, dst any) error { 312 filePath := "" 313 if m.localRegistry != "" { 314 filePath = filepath.Join(append([]string{m.localRegistry}, paths...)...) 315 file, err := os.Open(filePath) 316 if err == nil { 317 defer file.Close() 318 // We can still fetch the file from upstream if error is not nil. 319 return NewMavenDecoder(file).Decode(dst) 320 } 321 if !os.IsNotExist(err) { 322 log.Warnf("Error reading from local cache %s: %v", filePath, err) 323 } 324 } 325 326 httpClient := http.DefaultClient 327 requestURL := *registry.Parsed 328 isArtifactRegistry := requestURL.Scheme == artifactRegistryScheme 329 if isArtifactRegistry { 330 requestURL.Scheme = "https" 331 // For Artifact Registry, use google.DefaultClient for ADC if available. 332 if m.googleClient != nil { 333 httpClient = m.googleClient 334 } 335 } 336 337 u := requestURL.JoinPath(paths...).String() 338 resp, err := m.responses.Get(u, func() (response, error) { 339 log.Infof("Fetching response from: %s", u) 340 resp, err := auth.Get(ctx, httpClient, u) 341 if err != nil { 342 return response{}, fmt.Errorf("%w: Maven registry query failed: %w", errAPIFailed, err) 343 } 344 defer resp.Body.Close() 345 346 if !slices.Contains([]int{http.StatusOK, http.StatusNotFound, http.StatusUnauthorized, http.StatusForbidden}, resp.StatusCode) { 347 // Only cache responses with Status OK, NotFound, Unauthorized, or Forbidden 348 return response{}, fmt.Errorf("%w: Maven registry query status: %d", errAPIFailed, resp.StatusCode) 349 } 350 351 b, err := io.ReadAll(resp.Body) 352 if err != nil { 353 return response{}, fmt.Errorf("failed to read body: %w", err) 354 } 355 356 if filePath != "" && resp.StatusCode == http.StatusOK { 357 if err := writeFile(filePath, b); err != nil { 358 log.Warnf("failed to write response to %s: %v", u, err) 359 } 360 } 361 362 return response{StatusCode: resp.StatusCode, Body: b}, nil 363 }) 364 if err != nil { 365 log.Warnf("failed to get response from %s: %v", u, err) 366 return err 367 } 368 369 if resp.StatusCode == http.StatusForbidden && isArtifactRegistry { 370 return fmt.Errorf("%w: Maven registry query status: %d (Forbidden). Please check your Application Default Credentials (ADC) have permission to read from %s", errAPIFailed, resp.StatusCode, registry.URL) 371 } 372 373 if resp.StatusCode != http.StatusOK { 374 return fmt.Errorf("%w: Maven registry query status: %d", errAPIFailed, resp.StatusCode) 375 } 376 377 return NewMavenDecoder(bytes.NewReader(resp.Body)).Decode(dst) 378 } 379 380 // writeFile writes the bytes to the file specified by the given path. 381 func writeFile(path string, data []byte) error { 382 dir := filepath.Dir(path) 383 // Create the directory if it doesn't exist. 384 if err := os.MkdirAll(dir, 0755); err != nil { 385 return fmt.Errorf("failed to create directory %s: %w", dir, err) 386 } 387 388 outFile, err := os.Create(path) 389 if err != nil { 390 return fmt.Errorf("failed to create file %s: %w", path, err) 391 } 392 defer outFile.Close() 393 394 if _, err := outFile.Write(data); err != nil { 395 return fmt.Errorf("failed to write file %s: %w", path, err) 396 } 397 398 return nil 399 } 400 401 // NewMavenDecoder returns an xml decoder with CharsetReader and Entity set. 402 func NewMavenDecoder(reader io.Reader) *xml.Decoder { 403 decoder := xml.NewDecoder(reader) 404 // Set charset reader for conversion from non-UTF-8 charset into UTF-8. 405 decoder.CharsetReader = charset.NewReaderLabel 406 // Set HTML entity map for translation between non-standard entity names 407 // and string replacements. 408 decoder.Entity = xml.HTMLEntity 409 410 return decoder 411 }