github.com/google/osv-scalibr@v0.4.1/enricher/reachability/java/java.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package java provides an Enricher to add reachability annotations for Java Packages. 16 package java 17 18 import ( 19 "archive/zip" 20 "bufio" 21 "context" 22 "errors" 23 "fmt" 24 "io" 25 "io/fs" 26 "maps" 27 "net/http" 28 "os" 29 "path" 30 "slices" 31 "strings" 32 33 "github.com/google/osv-scalibr/enricher" 34 "github.com/google/osv-scalibr/extractor" 35 "github.com/google/osv-scalibr/extractor/filesystem/language/java/archive" 36 archivemeta "github.com/google/osv-scalibr/extractor/filesystem/language/java/archive/metadata" 37 "github.com/google/osv-scalibr/inventory" 38 "github.com/google/osv-scalibr/inventory/vex" 39 "github.com/google/osv-scalibr/log" 40 "github.com/google/osv-scalibr/plugin" 41 ) 42 43 const ( 44 // Name is the unique name of this detector. 45 Name = "reachability/java" 46 // MetaDirPath is the path to the META-INF directory. 47 MetaDirPath = "META-INF" 48 pathSeparator = '/' 49 ) 50 51 var ( 52 // ManifestFilePath is the path to the MANIFEST.MF file. 53 ManifestFilePath = path.Join(MetaDirPath, "MANIFEST.MF") 54 // MavenDepDirPath is the path to the Maven dependency directory. 55 MavenDepDirPath = path.Join(MetaDirPath, "maven") 56 // ServiceDirPath is the path to the META-INF/services directory. 57 ServiceDirPath = path.Join(MetaDirPath, "services") 58 59 // ErrMavenDependencyNotFound is returned when a JAR is not a Maven dependency. 60 ErrMavenDependencyNotFound = errors.New(MavenDepDirPath + " directory not found") 61 ) 62 63 // Enricher is the Java Reach enricher. 64 type Enricher struct { 65 client *http.Client 66 } 67 68 // Name returns the name of the enricher. 69 func (Enricher) Name() string { 70 return Name 71 } 72 73 // Version returns the version of the enricher. 74 func (Enricher) Version() int { 75 return 0 76 } 77 78 // Requirements returns the requirements of the enricher. 79 func (Enricher) Requirements() *plugin.Capabilities { 80 return &plugin.Capabilities{ 81 Network: plugin.NetworkOnline, 82 } 83 } 84 85 // RequiredPlugins returns the names of the plugins required by the enricher. 86 func (Enricher) RequiredPlugins() []string { 87 return []string{archive.Name} 88 } 89 90 // NewEnricher creates a new Enricher. 91 // It accepts an http.Client as a dependency. If the provided client is nil, 92 // it defaults to the standard http.DefaultClient. 93 func NewEnricher(client *http.Client) *Enricher { 94 if client == nil { 95 client = http.DefaultClient 96 } 97 98 return &Enricher{ 99 client: client, 100 } 101 } 102 103 // NewDefault returns a new javareach enricher with the default configuration. 104 func NewDefault() enricher.Enricher { 105 return &Enricher{ 106 client: http.DefaultClient, 107 } 108 } 109 110 // Enrich enriches the inventory with Java Reach data. 111 func (enr Enricher) Enrich(ctx context.Context, input *enricher.ScanInput, inv *inventory.Inventory) error { 112 client := enr.client 113 if client == nil { 114 client = http.DefaultClient 115 } 116 jars := make(map[string]struct{}) 117 for i := range inv.Packages { 118 if slices.Contains(inv.Packages[i].Plugins, archive.Name) { 119 jars[inv.Packages[i].Locations[0]] = struct{}{} 120 } 121 } 122 123 for jar := range jars { 124 err := enumerateReachabilityForJar(ctx, jar, input, inv, client) 125 if err != nil { 126 return err 127 } 128 } 129 130 return nil 131 } 132 133 func getFullPackageName(i *extractor.Package) string { 134 return fmt.Sprintf("%s:%s", i.Metadata.(*archivemeta.Metadata).GroupID, 135 i.Metadata.(*archivemeta.Metadata).ArtifactID) 136 } 137 138 func enumerateReachabilityForJar(ctx context.Context, jarPath string, input *enricher.ScanInput, inv *inventory.Inventory, client *http.Client) error { 139 var allDeps []*extractor.Package 140 if client == nil { 141 client = http.DefaultClient 142 } 143 for i := range inv.Packages { 144 if inv.Packages[i].Locations[0] == jarPath { 145 allDeps = append(allDeps, inv.Packages[i]) 146 } 147 } 148 149 slices.SortFunc(allDeps, func(i1 *extractor.Package, i2 *extractor.Package) int { 150 return strings.Compare(getFullPackageName(i1), getFullPackageName(i2)) 151 }) 152 for _, dep := range allDeps { 153 log.Debug("extracted dep", 154 "group id", dep.Metadata.(*archivemeta.Metadata).GroupID, "artifact id", dep.Name, "version", dep.Version) 155 } 156 157 // Unpack .jar 158 jarDir, err := os.MkdirTemp("", "osv-scalibr-javareach-") 159 if err != nil { 160 return err 161 } 162 defer os.RemoveAll(jarDir) 163 log.Debug("Unzipping", "jar", jarPath, "to", jarDir) 164 165 jarRoot, err := os.OpenRoot(jarDir) 166 if err != nil { 167 return err 168 } 169 170 nestedJARs, err := unzipJAR(jarPath, input, jarRoot) 171 if err != nil { 172 return err 173 } 174 175 // Reachability analysis is limited to Maven-built JARs for now. 176 // Check for the existence of the Maven metadata directory. 177 _, err = jarRoot.Stat(MavenDepDirPath) 178 if err != nil { 179 log.Error("reachability analysis is only supported for JARs built with Maven.") 180 return ErrMavenDependencyNotFound 181 } 182 183 // Build .class -> Maven group ID:artifact ID mappings. 184 // TODO(#787): Handle BOOT-INF and loading .jar dependencies from there. 185 classFinder, err := NewDefaultPackageFinder(ctx, allDeps, jarRoot, client) 186 if err != nil { 187 return err 188 } 189 190 // Extract the main entrypoint. 191 manifest, err := jarRoot.Open(ManifestFilePath) 192 if err != nil { 193 return err 194 } 195 196 mainClasses, err := GetMainClasses(manifest) 197 if err != nil { 198 return err 199 } 200 log.Debug("Found", "main classes", mainClasses) 201 202 classPaths := []string{"./"} 203 classPaths = append(classPaths, nestedJARs...) 204 205 // Spring Boot applications have classes in BOOT-INF/classes. 206 if _, err := jarRoot.Stat(BootInfClasses); err == nil { 207 classPaths = append(classPaths, BootInfClasses) 208 } 209 210 // Look inside META-INF/services, which is used by 211 // https://docs.oracle.com/javase/8/docs/api/java/util/ServiceLoader.html 212 var optionalRootClasses []string 213 214 if _, err := jarRoot.Stat(ServiceDirPath); err == nil { 215 var entries []string 216 err = fs.WalkDir(jarRoot.FS(), ServiceDirPath, func(path string, d fs.DirEntry, err error) error { 217 if err != nil { 218 return err 219 } 220 221 if !d.IsDir() { 222 entries = append(entries, path) 223 } 224 225 return nil 226 }) 227 228 if err != nil { 229 return err 230 } 231 232 for _, entry := range entries { 233 f, err := jarRoot.Open(entry) 234 if err != nil { 235 return err 236 } 237 238 scanner := bufio.NewScanner(f) 239 for scanner.Scan() { 240 provider := scanner.Text() 241 provider = strings.Split(provider, "#")[0] // remove comments 242 243 // Some files specify the class name using the format: "class = foo". 244 if strings.Contains(provider, "=") { 245 provider = strings.Split(provider, "=")[1] 246 } 247 248 provider = strings.TrimSpace(provider) 249 250 if len(provider) == 0 { 251 continue 252 } 253 254 log.Debug("adding META-INF/services provider", "provider", provider, "from", entry) 255 optionalRootClasses = append(optionalRootClasses, strings.ReplaceAll(provider, ".", "/")) 256 } 257 if err := scanner.Err(); err != nil { 258 return err 259 } 260 } 261 } 262 263 // Enumerate reachable classes. 264 enumerator := NewReachabilityEnumerator(classPaths, classFinder, AssumeAllClassesReachable, AssumeAllClassesReachable) 265 result, err := enumerator.EnumerateReachabilityFromClasses(jarRoot, mainClasses, optionalRootClasses) 266 if err != nil { 267 return err 268 } 269 270 // Map reachable classes back to Maven group ID:artifact ID. 271 reachableDeps := map[string]struct{}{} 272 for _, class := range result.Classes { 273 deps, err := classFinder.Find(class) 274 if err != nil { 275 log.Debug("Failed to find dep mapping", "class", class, "error", err) 276 continue 277 } 278 279 for _, dep := range deps { 280 reachableDeps[dep] = struct{}{} 281 } 282 } 283 284 // Find Maven deps that use dynamic code loading and dependency injection. 285 dynamicLoadingDeps := map[string]struct{}{} 286 injectionDeps := map[string]struct{}{} 287 slices.Sort(result.UsesDynamicCodeLoading) 288 for _, class := range result.UsesDynamicCodeLoading { 289 log.Debug("Found use of dynamic code loading", "class", class) 290 deps, err := classFinder.Find(class) 291 if err != nil { 292 log.Debug("Failed to find dep mapping", "class", class, "error", err) 293 continue 294 } 295 for _, dep := range deps { 296 dynamicLoadingDeps[dep] = struct{}{} 297 } 298 } 299 for _, class := range result.UsesDependencyInjection { 300 log.Debug("Found use of dependency injection", "class", class) 301 deps, err := classFinder.Find(class) 302 if err != nil { 303 log.Debug("Failed to find dep mapping", "class", class, "error", err) 304 continue 305 } 306 for _, dep := range deps { 307 injectionDeps[dep] = struct{}{} 308 } 309 } 310 311 // Print results. 312 for _, dep := range slices.Sorted(maps.Keys(reachableDeps)) { 313 _, dynamicLoading := dynamicLoadingDeps[dep] 314 _, injection := injectionDeps[dep] 315 log.Debug("Reachable", "dep", dep, "dynamic code", dynamicLoading, "dep injection", injection) 316 } 317 318 for _, dep := range allDeps { 319 name := getFullPackageName(dep) 320 if _, ok := reachableDeps[name]; !ok { 321 log.Debug("Not reachable", "dep", name) 322 } 323 } 324 325 log.Debug("finished analysis", "reachable", len(reachableDeps), "unreachable", len(allDeps)-len(reachableDeps), "all", len(allDeps)) 326 327 totalUnreachable := 0 328 for i := range inv.Packages { 329 if inv.Packages[i].Locations[0] != jarPath { 330 continue 331 } 332 metadata := inv.Packages[i].Metadata.(*archivemeta.Metadata) 333 artifactName := fmt.Sprintf("%s:%s", metadata.GroupID, metadata.ArtifactID) 334 if _, exists := reachableDeps[artifactName]; !exists { 335 inv.Packages[i].ExploitabilitySignals = append(inv.Packages[i].ExploitabilitySignals, &vex.PackageExploitabilitySignal{ 336 Plugin: Name, 337 Justification: vex.VulnerableCodeNotInExecutePath, 338 VulnIdentifiers: nil, 339 MatchesAllVulns: true, 340 }) 341 log.Debugf("Added a unreachable signal to package '%s'", artifactName) 342 totalUnreachable++ 343 } 344 } 345 346 if totalUnreachable > 0 { 347 log.Infof("Java reachability enricher marked %d packages as unreachable", totalUnreachable) 348 } 349 350 return nil 351 } 352 353 // unzipJAR unzips a JAR to a target directory. It also returns a list of paths 354 // to all the nested JARs found while unzipping. 355 func unzipJAR(jarPath string, input *enricher.ScanInput, jarRoot *os.Root) (nestedJARs []string, err error) { 356 file, err := openFromRoot(input.ScanRoot, jarPath) 357 if err != nil { 358 return nil, err 359 } 360 361 fileReaderAt, _ := file.(io.ReaderAt) 362 363 defer file.Close() 364 365 info, _ := file.Stat() 366 l := info.Size() 367 368 r, err := zip.NewReader(fileReaderAt, l) 369 370 if err != nil { 371 return nil, err 372 } 373 374 maxFileSize := 500 * 1024 * 1024 // 500 MB in bytes 375 376 for _, file := range r.File { 377 relativePath := file.Name 378 if err != nil { 379 return nil, err 380 } 381 382 if file.FileInfo().IsDir() { 383 if err := mkdirAll(jarRoot, relativePath, 0755); err != nil { 384 return nil, err 385 } 386 } else { 387 if err := mkdirAll(jarRoot, path.Dir(relativePath), 0755); err != nil { 388 return nil, err 389 } 390 391 if strings.HasSuffix(relativePath, ".jar") { 392 nestedJARs = append(nestedJARs, relativePath) 393 } 394 395 source, err := file.Open() 396 if err != nil { 397 return nil, err 398 } 399 400 f, err := jarRoot.Create(relativePath) 401 if err != nil { 402 return nil, err 403 } 404 405 limitedSource := &io.LimitedReader{R: source, N: int64(maxFileSize)} 406 _, err = io.Copy(f, limitedSource) 407 if err != nil { 408 f.Close() 409 return nil, err 410 } 411 f.Close() 412 } 413 } 414 415 return nestedJARs, nil 416 }