github.com/google/osv-scalibr@v0.4.1/enricher/reachability/java/jar.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package java 16 17 import ( 18 "archive/zip" 19 "bufio" 20 "context" 21 "errors" 22 "fmt" 23 "io" 24 "io/fs" 25 "net/http" 26 "os" 27 "path/filepath" 28 "strings" 29 "sync" 30 31 "github.com/google/osv-scalibr/extractor" 32 archivemeta "github.com/google/osv-scalibr/extractor/filesystem/language/java/archive/metadata" 33 "github.com/google/osv-scalibr/log" 34 "golang.org/x/sync/errgroup" 35 ) 36 37 const ( 38 maxGoroutines = 4 39 rootArtifact = "<root>" 40 ) 41 42 // MavenBaseURL is the base URL for the repository. 43 var MavenBaseURL = "https://repo1.maven.org/maven2" 44 45 var ( 46 // ErrClassNotFound is returned when a class is not found. 47 ErrClassNotFound = errors.New("class not found") 48 // ErrArtifactNotFound is returned when an artifact is not found. 49 ErrArtifactNotFound = errors.New("artifact not found") 50 ) 51 52 // MavenPackageFinder is an interface for finding Maven packages that contain a 53 // class path. 54 type MavenPackageFinder interface { 55 // Find returns a list of package names that contain a class path. 56 Find(classPath string) ([]string, error) 57 // Find returns a list of class names that are part of a package. 58 Classes(artifact string) ([]string, error) 59 } 60 61 // DefaultPackageFinder implements a MavenPackageFinder that downloads all .jar 62 // dependencies on demand and computes a local class to jar mapping. 63 type DefaultPackageFinder struct { 64 // map of class to maven dependencies. 65 classMap map[string][]string 66 // map of maven dependency to class files. 67 artifactMap map[string][]string 68 } 69 70 // loadJARMappings loads class mappings from a JAR archive. 71 func loadJARMappings(metadata *archivemeta.Metadata, reader *zip.Reader, classMap map[string][]string, artifactMap map[string][]string, lock *sync.Mutex) { 72 lock.Lock() 73 for _, f := range reader.File { 74 if strings.HasSuffix(f.Name, ".class") { 75 artifactName := fmt.Sprintf("%s:%s", metadata.GroupID, metadata.ArtifactID) 76 addClassMapping(artifactName, f.Name, classMap, artifactMap) 77 } 78 } 79 lock.Unlock() 80 } 81 82 func addClassMapping(artifactName, class string, classMap map[string][]string, artifactMap map[string][]string) { 83 name := strings.TrimSuffix(class, ".class") 84 if after, ok := strings.CutPrefix(name, MetaInfVersions); ok { 85 // Strip the version after the META-INF/versions/<version>/ 86 name = after[1:] 87 name = name[strings.Index(name, "/")+1:] 88 } 89 90 classMap[name] = append(classMap[name], artifactName) 91 artifactMap[artifactName] = append(artifactMap[artifactName], name) 92 log.Debug("mapping", "name", name, "to", classMap[name]) 93 } 94 95 // extractClassMappings extracts class mappings from a .jar dependency by 96 // downloading and unpacking the .jar from the relevant registry. 97 func extractClassMappings(ctx context.Context, inv *extractor.Package, classMap map[string][]string, artifactMap map[string][]string, client *http.Client, lock *sync.Mutex) error { 98 metadata := inv.Metadata.(*archivemeta.Metadata) 99 // TODO(#841): Handle when a class file contains in a nested JAR. 100 101 // Try downloading the same package from Maven Central. 102 jarURL := fmt.Sprintf("%s/%s/%s/%s/%s-%s.jar", 103 MavenBaseURL, 104 strings.ReplaceAll(metadata.GroupID, ".", "/"), metadata.ArtifactID, inv.Version, metadata.ArtifactID, inv.Version) 105 file, err := os.CreateTemp("", "") 106 if err != nil { 107 return err 108 } 109 defer os.Remove(file.Name()) 110 defer file.Close() 111 112 log.Debug("downloading", "jar", jarURL) 113 req, err := http.NewRequestWithContext(ctx, http.MethodGet, jarURL, nil) 114 if err != nil { 115 return err 116 } 117 118 resp, err := client.Do(req) 119 120 if err != nil { 121 return err 122 } 123 124 if resp.StatusCode != http.StatusOK { 125 return fmt.Errorf("jar not found: %s", jarURL) 126 } 127 128 nbytes, err := io.Copy(file, resp.Body) 129 if err != nil { 130 resp.Body.Close() 131 return err 132 } 133 resp.Body.Close() 134 135 _, err = file.Seek(0, io.SeekStart) 136 if err != nil { 137 return err 138 } 139 140 var reader *zip.Reader 141 reader, err = zip.NewReader(file, nbytes) 142 if err != nil { 143 return err 144 } 145 146 loadJARMappings(metadata, reader, classMap, artifactMap, lock) 147 148 return nil 149 } 150 151 // NewDefaultPackageFinder creates a new DefaultPackageFinder based on a set of 152 // inventory. 153 func NewDefaultPackageFinder(ctx context.Context, inv []*extractor.Package, jarRoot *os.Root, client *http.Client) (*DefaultPackageFinder, error) { 154 // Download pkg, unpack, and store class mappings for each detected dependency. 155 classMap := map[string][]string{} 156 artifactMap := map[string][]string{} 157 lock := new(sync.Mutex) 158 group := new(errgroup.Group) 159 group.SetLimit(maxGoroutines) 160 161 for _, i := range inv { 162 group.Go(func() error { 163 return extractClassMappings(ctx, i, classMap, artifactMap, client, lock) 164 }) 165 } 166 167 if err := group.Wait(); err != nil { 168 // Tolerate some errors. 169 log.Error("failed to download package", "err", err) 170 } 171 172 if err := mapRootClasses(jarRoot, classMap, artifactMap); err != nil { 173 return nil, err 174 } 175 176 return &DefaultPackageFinder{ 177 classMap: classMap, 178 artifactMap: artifactMap, 179 }, nil 180 } 181 182 // mapRootClasses maps class files to the root application where we can determine that association. 183 func mapRootClasses(jarRoot *os.Root, classMap map[string][]string, artifactMap map[string][]string) error { 184 // Spring Boot. 185 // TODO(#787): Handle non-Spring Boot applications. We could add heuristic for 186 // detecting root application classes when the class structure is flat based 187 // on the class hierarchy. 188 if _, err := jarRoot.Stat(BootInfClasses); err != nil { 189 if errors.Is(err, fs.ErrNotExist) { 190 return nil 191 } 192 193 return err 194 } 195 log.Debug("Found Spring Boot classes", "classes", BootInfClasses) 196 197 return fs.WalkDir(jarRoot.FS(), BootInfClasses, func(path string, info fs.DirEntry, err error) error { 198 if err != nil { 199 return err 200 } 201 202 if !info.IsDir() && strings.HasSuffix(path, ".class") { 203 name, err := filepath.Rel(BootInfClasses, path) 204 if err != nil { 205 return err 206 } 207 208 addClassMapping(rootArtifact, name, classMap, artifactMap) 209 210 return nil 211 } 212 213 return nil 214 }) 215 } 216 217 // Find returns a list of package names that contain a class path. 218 func (f *DefaultPackageFinder) Find(classPath string) ([]string, error) { 219 if pkg, ok := f.classMap[classPath]; ok { 220 return pkg, nil 221 } 222 223 return nil, ErrClassNotFound 224 } 225 226 // Classes find returns a list of package names that contain a class path. 227 func (f *DefaultPackageFinder) Classes(artifact string) ([]string, error) { 228 if classes, ok := f.artifactMap[artifact]; ok { 229 return classes, nil 230 } 231 232 return nil, ErrArtifactNotFound 233 } 234 235 // GetMainClasses extracts the main class name from the MANIFEST.MF file in a .jar. 236 func GetMainClasses(manifest io.Reader) ([]string, error) { 237 // Extract the Main-Class specified in MANIFEST.MF: 238 // https://docs.oracle.com/javase/tutorial/deployment/jar/appman.html 239 const mainClass = "Main-Class:" 240 // Spring Boot specific metadata. 241 const startClass = "Start-Class:" 242 markers := []string{mainClass, startClass} 243 244 scanner := bufio.NewScanner(manifest) 245 246 var classes []string 247 var lines []string 248 249 // Read all lines into memory for easier processing. 250 for scanner.Scan() { 251 lines = append(lines, scanner.Text()) 252 } 253 254 for i := range lines { 255 line := strings.TrimSpace(lines[i]) 256 for _, marker := range markers { 257 if strings.HasPrefix(line, marker) { 258 var class strings.Builder 259 260 class.WriteString(strings.TrimSpace(strings.TrimPrefix(line, marker))) 261 262 // Handle wrapped lines. Class names exceeding line length limits 263 // may be split across multiple lines, starting with a space. 264 for index := i + 1; index < len(lines); index++ { 265 nextLine := lines[index] 266 if strings.HasPrefix(nextLine, " ") { 267 class.WriteString(strings.TrimSpace(nextLine)) 268 } else { 269 break 270 } 271 } 272 classes = append(classes, strings.ReplaceAll(class.String(), ".", "/")) 273 } 274 } 275 } 276 277 if err := scanner.Err(); err != nil { 278 return nil, err 279 } 280 281 if len(classes) > 0 { 282 return classes, nil 283 } 284 285 return nil, errors.New("no main class") 286 }