github.com/google/osv-scalibr@v0.4.1/enricher/reachability/java/jar.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package java
    16  
    17  import (
    18  	"archive/zip"
    19  	"bufio"
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"io/fs"
    25  	"net/http"
    26  	"os"
    27  	"path/filepath"
    28  	"strings"
    29  	"sync"
    30  
    31  	"github.com/google/osv-scalibr/extractor"
    32  	archivemeta "github.com/google/osv-scalibr/extractor/filesystem/language/java/archive/metadata"
    33  	"github.com/google/osv-scalibr/log"
    34  	"golang.org/x/sync/errgroup"
    35  )
    36  
    37  const (
    38  	maxGoroutines = 4
    39  	rootArtifact  = "<root>"
    40  )
    41  
    42  // MavenBaseURL is the base URL for the repository.
    43  var MavenBaseURL = "https://repo1.maven.org/maven2"
    44  
    45  var (
    46  	// ErrClassNotFound is returned when a class is not found.
    47  	ErrClassNotFound = errors.New("class not found")
    48  	// ErrArtifactNotFound is returned when an artifact is not found.
    49  	ErrArtifactNotFound = errors.New("artifact not found")
    50  )
    51  
    52  // MavenPackageFinder is an interface for finding Maven packages that contain a
    53  // class path.
    54  type MavenPackageFinder interface {
    55  	// Find returns a list of package names that contain a class path.
    56  	Find(classPath string) ([]string, error)
    57  	// Find returns a list of class names that are part of a package.
    58  	Classes(artifact string) ([]string, error)
    59  }
    60  
    61  // DefaultPackageFinder implements a MavenPackageFinder that downloads all .jar
    62  // dependencies on demand and computes a local class to jar mapping.
    63  type DefaultPackageFinder struct {
    64  	// map of class to maven dependencies.
    65  	classMap map[string][]string
    66  	// map of maven dependency to class files.
    67  	artifactMap map[string][]string
    68  }
    69  
    70  // loadJARMappings loads class mappings from a JAR archive.
    71  func loadJARMappings(metadata *archivemeta.Metadata, reader *zip.Reader, classMap map[string][]string, artifactMap map[string][]string, lock *sync.Mutex) {
    72  	lock.Lock()
    73  	for _, f := range reader.File {
    74  		if strings.HasSuffix(f.Name, ".class") {
    75  			artifactName := fmt.Sprintf("%s:%s", metadata.GroupID, metadata.ArtifactID)
    76  			addClassMapping(artifactName, f.Name, classMap, artifactMap)
    77  		}
    78  	}
    79  	lock.Unlock()
    80  }
    81  
    82  func addClassMapping(artifactName, class string, classMap map[string][]string, artifactMap map[string][]string) {
    83  	name := strings.TrimSuffix(class, ".class")
    84  	if after, ok := strings.CutPrefix(name, MetaInfVersions); ok {
    85  		// Strip the version after the META-INF/versions/<version>/
    86  		name = after[1:]
    87  		name = name[strings.Index(name, "/")+1:]
    88  	}
    89  
    90  	classMap[name] = append(classMap[name], artifactName)
    91  	artifactMap[artifactName] = append(artifactMap[artifactName], name)
    92  	log.Debug("mapping", "name", name, "to", classMap[name])
    93  }
    94  
    95  // extractClassMappings extracts class mappings from a .jar dependency by
    96  // downloading and unpacking the .jar from the relevant registry.
    97  func extractClassMappings(ctx context.Context, inv *extractor.Package, classMap map[string][]string, artifactMap map[string][]string, client *http.Client, lock *sync.Mutex) error {
    98  	metadata := inv.Metadata.(*archivemeta.Metadata)
    99  	// TODO(#841): Handle when a class file contains in a nested JAR.
   100  
   101  	// Try downloading the same package from Maven Central.
   102  	jarURL := fmt.Sprintf("%s/%s/%s/%s/%s-%s.jar",
   103  		MavenBaseURL,
   104  		strings.ReplaceAll(metadata.GroupID, ".", "/"), metadata.ArtifactID, inv.Version, metadata.ArtifactID, inv.Version)
   105  	file, err := os.CreateTemp("", "")
   106  	if err != nil {
   107  		return err
   108  	}
   109  	defer os.Remove(file.Name())
   110  	defer file.Close()
   111  
   112  	log.Debug("downloading", "jar", jarURL)
   113  	req, err := http.NewRequestWithContext(ctx, http.MethodGet, jarURL, nil)
   114  	if err != nil {
   115  		return err
   116  	}
   117  
   118  	resp, err := client.Do(req)
   119  
   120  	if err != nil {
   121  		return err
   122  	}
   123  
   124  	if resp.StatusCode != http.StatusOK {
   125  		return fmt.Errorf("jar not found: %s", jarURL)
   126  	}
   127  
   128  	nbytes, err := io.Copy(file, resp.Body)
   129  	if err != nil {
   130  		resp.Body.Close()
   131  		return err
   132  	}
   133  	resp.Body.Close()
   134  
   135  	_, err = file.Seek(0, io.SeekStart)
   136  	if err != nil {
   137  		return err
   138  	}
   139  
   140  	var reader *zip.Reader
   141  	reader, err = zip.NewReader(file, nbytes)
   142  	if err != nil {
   143  		return err
   144  	}
   145  
   146  	loadJARMappings(metadata, reader, classMap, artifactMap, lock)
   147  
   148  	return nil
   149  }
   150  
   151  // NewDefaultPackageFinder creates a new DefaultPackageFinder based on a set of
   152  // inventory.
   153  func NewDefaultPackageFinder(ctx context.Context, inv []*extractor.Package, jarRoot *os.Root, client *http.Client) (*DefaultPackageFinder, error) {
   154  	// Download pkg, unpack, and store class mappings for each detected dependency.
   155  	classMap := map[string][]string{}
   156  	artifactMap := map[string][]string{}
   157  	lock := new(sync.Mutex)
   158  	group := new(errgroup.Group)
   159  	group.SetLimit(maxGoroutines)
   160  
   161  	for _, i := range inv {
   162  		group.Go(func() error {
   163  			return extractClassMappings(ctx, i, classMap, artifactMap, client, lock)
   164  		})
   165  	}
   166  
   167  	if err := group.Wait(); err != nil {
   168  		// Tolerate some errors.
   169  		log.Error("failed to download package", "err", err)
   170  	}
   171  
   172  	if err := mapRootClasses(jarRoot, classMap, artifactMap); err != nil {
   173  		return nil, err
   174  	}
   175  
   176  	return &DefaultPackageFinder{
   177  		classMap:    classMap,
   178  		artifactMap: artifactMap,
   179  	}, nil
   180  }
   181  
   182  // mapRootClasses maps class files to the root application where we can determine that association.
   183  func mapRootClasses(jarRoot *os.Root, classMap map[string][]string, artifactMap map[string][]string) error {
   184  	// Spring Boot.
   185  	// TODO(#787): Handle non-Spring Boot applications. We could add heuristic for
   186  	// detecting root application classes when the class structure is flat based
   187  	// on the class hierarchy.
   188  	if _, err := jarRoot.Stat(BootInfClasses); err != nil {
   189  		if errors.Is(err, fs.ErrNotExist) {
   190  			return nil
   191  		}
   192  
   193  		return err
   194  	}
   195  	log.Debug("Found Spring Boot classes", "classes", BootInfClasses)
   196  
   197  	return fs.WalkDir(jarRoot.FS(), BootInfClasses, func(path string, info fs.DirEntry, err error) error {
   198  		if err != nil {
   199  			return err
   200  		}
   201  
   202  		if !info.IsDir() && strings.HasSuffix(path, ".class") {
   203  			name, err := filepath.Rel(BootInfClasses, path)
   204  			if err != nil {
   205  				return err
   206  			}
   207  
   208  			addClassMapping(rootArtifact, name, classMap, artifactMap)
   209  
   210  			return nil
   211  		}
   212  
   213  		return nil
   214  	})
   215  }
   216  
   217  // Find returns a list of package names that contain a class path.
   218  func (f *DefaultPackageFinder) Find(classPath string) ([]string, error) {
   219  	if pkg, ok := f.classMap[classPath]; ok {
   220  		return pkg, nil
   221  	}
   222  
   223  	return nil, ErrClassNotFound
   224  }
   225  
   226  // Classes find returns a list of package names that contain a class path.
   227  func (f *DefaultPackageFinder) Classes(artifact string) ([]string, error) {
   228  	if classes, ok := f.artifactMap[artifact]; ok {
   229  		return classes, nil
   230  	}
   231  
   232  	return nil, ErrArtifactNotFound
   233  }
   234  
   235  // GetMainClasses extracts the main class name from the MANIFEST.MF file in a .jar.
   236  func GetMainClasses(manifest io.Reader) ([]string, error) {
   237  	// Extract the Main-Class specified in MANIFEST.MF:
   238  	// https://docs.oracle.com/javase/tutorial/deployment/jar/appman.html
   239  	const mainClass = "Main-Class:"
   240  	// Spring Boot specific metadata.
   241  	const startClass = "Start-Class:"
   242  	markers := []string{mainClass, startClass}
   243  
   244  	scanner := bufio.NewScanner(manifest)
   245  
   246  	var classes []string
   247  	var lines []string
   248  
   249  	// Read all lines into memory for easier processing.
   250  	for scanner.Scan() {
   251  		lines = append(lines, scanner.Text())
   252  	}
   253  
   254  	for i := range lines {
   255  		line := strings.TrimSpace(lines[i])
   256  		for _, marker := range markers {
   257  			if strings.HasPrefix(line, marker) {
   258  				var class strings.Builder
   259  
   260  				class.WriteString(strings.TrimSpace(strings.TrimPrefix(line, marker)))
   261  
   262  				// Handle wrapped lines. Class names exceeding line length limits
   263  				// may be split across multiple lines, starting with a space.
   264  				for index := i + 1; index < len(lines); index++ {
   265  					nextLine := lines[index]
   266  					if strings.HasPrefix(nextLine, " ") {
   267  						class.WriteString(strings.TrimSpace(nextLine))
   268  					} else {
   269  						break
   270  					}
   271  				}
   272  				classes = append(classes, strings.ReplaceAll(class.String(), ".", "/"))
   273  			}
   274  		}
   275  	}
   276  
   277  	if err := scanner.Err(); err != nil {
   278  		return nil, err
   279  	}
   280  
   281  	if len(classes) > 0 {
   282  		return classes, nil
   283  	}
   284  
   285  	return nil, errors.New("no main class")
   286  }