github.com/google/osv-scalibr@v0.4.1/enricher/reachability/java/java.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package java provides an Enricher to add reachability annotations for Java Packages.
    16  package java
    17  
    18  import (
    19  	"archive/zip"
    20  	"bufio"
    21  	"context"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"io/fs"
    26  	"maps"
    27  	"net/http"
    28  	"os"
    29  	"path"
    30  	"slices"
    31  	"strings"
    32  
    33  	"github.com/google/osv-scalibr/enricher"
    34  	"github.com/google/osv-scalibr/extractor"
    35  	"github.com/google/osv-scalibr/extractor/filesystem/language/java/archive"
    36  	archivemeta "github.com/google/osv-scalibr/extractor/filesystem/language/java/archive/metadata"
    37  	"github.com/google/osv-scalibr/inventory"
    38  	"github.com/google/osv-scalibr/inventory/vex"
    39  	"github.com/google/osv-scalibr/log"
    40  	"github.com/google/osv-scalibr/plugin"
    41  )
    42  
    43  const (
    44  	// Name is the unique name of this detector.
    45  	Name = "reachability/java"
    46  	// MetaDirPath is the path to the META-INF directory.
    47  	MetaDirPath   = "META-INF"
    48  	pathSeparator = '/'
    49  )
    50  
    51  var (
    52  	// ManifestFilePath is the path to the MANIFEST.MF file.
    53  	ManifestFilePath = path.Join(MetaDirPath, "MANIFEST.MF")
    54  	// MavenDepDirPath is the path to the Maven dependency directory.
    55  	MavenDepDirPath = path.Join(MetaDirPath, "maven")
    56  	// ServiceDirPath is the path to the META-INF/services directory.
    57  	ServiceDirPath = path.Join(MetaDirPath, "services")
    58  
    59  	// ErrMavenDependencyNotFound is returned when a JAR is not a Maven dependency.
    60  	ErrMavenDependencyNotFound = errors.New(MavenDepDirPath + " directory not found")
    61  )
    62  
    63  // Enricher is the Java Reach enricher.
    64  type Enricher struct {
    65  	client *http.Client
    66  }
    67  
    68  // Name returns the name of the enricher.
    69  func (Enricher) Name() string {
    70  	return Name
    71  }
    72  
    73  // Version returns the version of the enricher.
    74  func (Enricher) Version() int {
    75  	return 0
    76  }
    77  
    78  // Requirements returns the requirements of the enricher.
    79  func (Enricher) Requirements() *plugin.Capabilities {
    80  	return &plugin.Capabilities{
    81  		Network: plugin.NetworkOnline,
    82  	}
    83  }
    84  
    85  // RequiredPlugins returns the names of the plugins required by the enricher.
    86  func (Enricher) RequiredPlugins() []string {
    87  	return []string{archive.Name}
    88  }
    89  
    90  // NewEnricher creates a new Enricher.
    91  // It accepts an http.Client as a dependency. If the provided client is nil,
    92  // it defaults to the standard http.DefaultClient.
    93  func NewEnricher(client *http.Client) *Enricher {
    94  	if client == nil {
    95  		client = http.DefaultClient
    96  	}
    97  
    98  	return &Enricher{
    99  		client: client,
   100  	}
   101  }
   102  
   103  // NewDefault returns a new javareach enricher with the default configuration.
   104  func NewDefault() enricher.Enricher {
   105  	return &Enricher{
   106  		client: http.DefaultClient,
   107  	}
   108  }
   109  
   110  // Enrich enriches the inventory with Java Reach data.
   111  func (enr Enricher) Enrich(ctx context.Context, input *enricher.ScanInput, inv *inventory.Inventory) error {
   112  	client := enr.client
   113  	if client == nil {
   114  		client = http.DefaultClient
   115  	}
   116  	jars := make(map[string]struct{})
   117  	for i := range inv.Packages {
   118  		if slices.Contains(inv.Packages[i].Plugins, archive.Name) {
   119  			jars[inv.Packages[i].Locations[0]] = struct{}{}
   120  		}
   121  	}
   122  
   123  	for jar := range jars {
   124  		err := enumerateReachabilityForJar(ctx, jar, input, inv, client)
   125  		if err != nil {
   126  			return err
   127  		}
   128  	}
   129  
   130  	return nil
   131  }
   132  
   133  func getFullPackageName(i *extractor.Package) string {
   134  	return fmt.Sprintf("%s:%s", i.Metadata.(*archivemeta.Metadata).GroupID,
   135  		i.Metadata.(*archivemeta.Metadata).ArtifactID)
   136  }
   137  
   138  func enumerateReachabilityForJar(ctx context.Context, jarPath string, input *enricher.ScanInput, inv *inventory.Inventory, client *http.Client) error {
   139  	var allDeps []*extractor.Package
   140  	if client == nil {
   141  		client = http.DefaultClient
   142  	}
   143  	for i := range inv.Packages {
   144  		if inv.Packages[i].Locations[0] == jarPath {
   145  			allDeps = append(allDeps, inv.Packages[i])
   146  		}
   147  	}
   148  
   149  	slices.SortFunc(allDeps, func(i1 *extractor.Package, i2 *extractor.Package) int {
   150  		return strings.Compare(getFullPackageName(i1), getFullPackageName(i2))
   151  	})
   152  	for _, dep := range allDeps {
   153  		log.Debug("extracted dep",
   154  			"group id", dep.Metadata.(*archivemeta.Metadata).GroupID, "artifact id", dep.Name, "version", dep.Version)
   155  	}
   156  
   157  	// Unpack .jar
   158  	jarDir, err := os.MkdirTemp("", "osv-scalibr-javareach-")
   159  	if err != nil {
   160  		return err
   161  	}
   162  	defer os.RemoveAll(jarDir)
   163  	log.Debug("Unzipping", "jar", jarPath, "to", jarDir)
   164  
   165  	jarRoot, err := os.OpenRoot(jarDir)
   166  	if err != nil {
   167  		return err
   168  	}
   169  
   170  	nestedJARs, err := unzipJAR(jarPath, input, jarRoot)
   171  	if err != nil {
   172  		return err
   173  	}
   174  
   175  	// Reachability analysis is limited to Maven-built JARs for now.
   176  	// Check for the existence of the Maven metadata directory.
   177  	_, err = jarRoot.Stat(MavenDepDirPath)
   178  	if err != nil {
   179  		log.Error("reachability analysis is only supported for JARs built with Maven.")
   180  		return ErrMavenDependencyNotFound
   181  	}
   182  
   183  	// Build .class -> Maven group ID:artifact ID mappings.
   184  	// TODO(#787): Handle BOOT-INF and loading .jar dependencies from there.
   185  	classFinder, err := NewDefaultPackageFinder(ctx, allDeps, jarRoot, client)
   186  	if err != nil {
   187  		return err
   188  	}
   189  
   190  	// Extract the main entrypoint.
   191  	manifest, err := jarRoot.Open(ManifestFilePath)
   192  	if err != nil {
   193  		return err
   194  	}
   195  
   196  	mainClasses, err := GetMainClasses(manifest)
   197  	if err != nil {
   198  		return err
   199  	}
   200  	log.Debug("Found", "main classes", mainClasses)
   201  
   202  	classPaths := []string{"./"}
   203  	classPaths = append(classPaths, nestedJARs...)
   204  
   205  	// Spring Boot applications have classes in BOOT-INF/classes.
   206  	if _, err := jarRoot.Stat(BootInfClasses); err == nil {
   207  		classPaths = append(classPaths, BootInfClasses)
   208  	}
   209  
   210  	// Look inside META-INF/services, which is used by
   211  	// https://docs.oracle.com/javase/8/docs/api/java/util/ServiceLoader.html
   212  	var optionalRootClasses []string
   213  
   214  	if _, err := jarRoot.Stat(ServiceDirPath); err == nil {
   215  		var entries []string
   216  		err = fs.WalkDir(jarRoot.FS(), ServiceDirPath, func(path string, d fs.DirEntry, err error) error {
   217  			if err != nil {
   218  				return err
   219  			}
   220  
   221  			if !d.IsDir() {
   222  				entries = append(entries, path)
   223  			}
   224  
   225  			return nil
   226  		})
   227  
   228  		if err != nil {
   229  			return err
   230  		}
   231  
   232  		for _, entry := range entries {
   233  			f, err := jarRoot.Open(entry)
   234  			if err != nil {
   235  				return err
   236  			}
   237  
   238  			scanner := bufio.NewScanner(f)
   239  			for scanner.Scan() {
   240  				provider := scanner.Text()
   241  				provider = strings.Split(provider, "#")[0] // remove comments
   242  
   243  				// Some files specify the class name using the format: "class = foo".
   244  				if strings.Contains(provider, "=") {
   245  					provider = strings.Split(provider, "=")[1]
   246  				}
   247  
   248  				provider = strings.TrimSpace(provider)
   249  
   250  				if len(provider) == 0 {
   251  					continue
   252  				}
   253  
   254  				log.Debug("adding META-INF/services provider", "provider", provider, "from", entry)
   255  				optionalRootClasses = append(optionalRootClasses, strings.ReplaceAll(provider, ".", "/"))
   256  			}
   257  			if err := scanner.Err(); err != nil {
   258  				return err
   259  			}
   260  		}
   261  	}
   262  
   263  	// Enumerate reachable classes.
   264  	enumerator := NewReachabilityEnumerator(classPaths, classFinder, AssumeAllClassesReachable, AssumeAllClassesReachable)
   265  	result, err := enumerator.EnumerateReachabilityFromClasses(jarRoot, mainClasses, optionalRootClasses)
   266  	if err != nil {
   267  		return err
   268  	}
   269  
   270  	// Map reachable classes back to Maven group ID:artifact ID.
   271  	reachableDeps := map[string]struct{}{}
   272  	for _, class := range result.Classes {
   273  		deps, err := classFinder.Find(class)
   274  		if err != nil {
   275  			log.Debug("Failed to find dep mapping", "class", class, "error", err)
   276  			continue
   277  		}
   278  
   279  		for _, dep := range deps {
   280  			reachableDeps[dep] = struct{}{}
   281  		}
   282  	}
   283  
   284  	// Find Maven deps that use dynamic code loading and dependency injection.
   285  	dynamicLoadingDeps := map[string]struct{}{}
   286  	injectionDeps := map[string]struct{}{}
   287  	slices.Sort(result.UsesDynamicCodeLoading)
   288  	for _, class := range result.UsesDynamicCodeLoading {
   289  		log.Debug("Found use of dynamic code loading", "class", class)
   290  		deps, err := classFinder.Find(class)
   291  		if err != nil {
   292  			log.Debug("Failed to find dep mapping", "class", class, "error", err)
   293  			continue
   294  		}
   295  		for _, dep := range deps {
   296  			dynamicLoadingDeps[dep] = struct{}{}
   297  		}
   298  	}
   299  	for _, class := range result.UsesDependencyInjection {
   300  		log.Debug("Found use of dependency injection", "class", class)
   301  		deps, err := classFinder.Find(class)
   302  		if err != nil {
   303  			log.Debug("Failed to find dep mapping", "class", class, "error", err)
   304  			continue
   305  		}
   306  		for _, dep := range deps {
   307  			injectionDeps[dep] = struct{}{}
   308  		}
   309  	}
   310  
   311  	// Print results.
   312  	for _, dep := range slices.Sorted(maps.Keys(reachableDeps)) {
   313  		_, dynamicLoading := dynamicLoadingDeps[dep]
   314  		_, injection := injectionDeps[dep]
   315  		log.Debug("Reachable", "dep", dep, "dynamic code", dynamicLoading, "dep injection", injection)
   316  	}
   317  
   318  	for _, dep := range allDeps {
   319  		name := getFullPackageName(dep)
   320  		if _, ok := reachableDeps[name]; !ok {
   321  			log.Debug("Not reachable", "dep", name)
   322  		}
   323  	}
   324  
   325  	log.Debug("finished analysis", "reachable", len(reachableDeps), "unreachable", len(allDeps)-len(reachableDeps), "all", len(allDeps))
   326  
   327  	totalUnreachable := 0
   328  	for i := range inv.Packages {
   329  		if inv.Packages[i].Locations[0] != jarPath {
   330  			continue
   331  		}
   332  		metadata := inv.Packages[i].Metadata.(*archivemeta.Metadata)
   333  		artifactName := fmt.Sprintf("%s:%s", metadata.GroupID, metadata.ArtifactID)
   334  		if _, exists := reachableDeps[artifactName]; !exists {
   335  			inv.Packages[i].ExploitabilitySignals = append(inv.Packages[i].ExploitabilitySignals, &vex.PackageExploitabilitySignal{
   336  				Plugin:          Name,
   337  				Justification:   vex.VulnerableCodeNotInExecutePath,
   338  				VulnIdentifiers: nil,
   339  				MatchesAllVulns: true,
   340  			})
   341  			log.Debugf("Added a unreachable signal to package '%s'", artifactName)
   342  			totalUnreachable++
   343  		}
   344  	}
   345  
   346  	if totalUnreachable > 0 {
   347  		log.Infof("Java reachability enricher marked %d packages as unreachable", totalUnreachable)
   348  	}
   349  
   350  	return nil
   351  }
   352  
   353  // unzipJAR unzips a JAR to a target directory. It also returns a list of paths
   354  // to all the nested JARs found while unzipping.
   355  func unzipJAR(jarPath string, input *enricher.ScanInput, jarRoot *os.Root) (nestedJARs []string, err error) {
   356  	file, err := openFromRoot(input.ScanRoot, jarPath)
   357  	if err != nil {
   358  		return nil, err
   359  	}
   360  
   361  	fileReaderAt, _ := file.(io.ReaderAt)
   362  
   363  	defer file.Close()
   364  
   365  	info, _ := file.Stat()
   366  	l := info.Size()
   367  
   368  	r, err := zip.NewReader(fileReaderAt, l)
   369  
   370  	if err != nil {
   371  		return nil, err
   372  	}
   373  
   374  	maxFileSize := 500 * 1024 * 1024 // 500 MB in bytes
   375  
   376  	for _, file := range r.File {
   377  		relativePath := file.Name
   378  		if err != nil {
   379  			return nil, err
   380  		}
   381  
   382  		if file.FileInfo().IsDir() {
   383  			if err := mkdirAll(jarRoot, relativePath, 0755); err != nil {
   384  				return nil, err
   385  			}
   386  		} else {
   387  			if err := mkdirAll(jarRoot, path.Dir(relativePath), 0755); err != nil {
   388  				return nil, err
   389  			}
   390  
   391  			if strings.HasSuffix(relativePath, ".jar") {
   392  				nestedJARs = append(nestedJARs, relativePath)
   393  			}
   394  
   395  			source, err := file.Open()
   396  			if err != nil {
   397  				return nil, err
   398  			}
   399  
   400  			f, err := jarRoot.Create(relativePath)
   401  			if err != nil {
   402  				return nil, err
   403  			}
   404  
   405  			limitedSource := &io.LimitedReader{R: source, N: int64(maxFileSize)}
   406  			_, err = io.Copy(f, limitedSource)
   407  			if err != nil {
   408  				f.Close()
   409  				return nil, err
   410  			}
   411  			f.Close()
   412  		}
   413  	}
   414  
   415  	return nestedJARs, nil
   416  }