github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/java/parse_java_manifest.go (about) 1 package java 2 3 import ( 4 "bufio" 5 "fmt" 6 "io" 7 "strings" 8 "unicode" 9 10 "github.com/anchore/syft/internal/log" 11 "github.com/anchore/syft/syft/pkg" 12 ) 13 14 const manifestGlob = "/META-INF/MANIFEST.MF" 15 16 // parseJavaManifest takes MANIFEST.MF file content and returns sections of parsed key/value pairs. 17 // For more information: https://docs.oracle.com/en/java/javase/11/docs/specs/jar/jar.html#jar-manifest 18 // 19 //nolint:funlen 20 func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error) { 21 var manifest pkg.JavaManifest 22 sections := make([]pkg.KeyValues, 0) 23 24 currentSection := func() int { 25 return len(sections) - 1 26 } 27 28 var lastKey string 29 scanner := bufio.NewScanner(reader) 30 31 for scanner.Scan() { 32 line := scanner.Text() 33 34 // empty lines denote section separators 35 if line == "" { 36 // we don't want to allocate a new section map that won't necessarily be used, do that once there is 37 // a non-empty line to process 38 39 // do not process line continuations after this 40 lastKey = "" 41 42 continue 43 } 44 45 if line[0] == ' ' { 46 // this is a continuation 47 48 if lastKey == "" { 49 log.Debugf("java manifest %q: found continuation with no previous key: %q", path, line) 50 continue 51 } 52 53 lastSection := sections[currentSection()] 54 55 sections[currentSection()][len(lastSection)-1].Value += strings.TrimSpace(line) 56 57 continue 58 } 59 60 // this is a new key-value pair 61 idx := strings.Index(line, ":") 62 if idx == -1 { 63 log.Debugf("java manifest %q: unable to split java manifest key-value pairs: %q", path, line) 64 continue 65 } 66 67 key := strings.TrimSpace(line[0:idx]) 68 value := strings.TrimSpace(line[idx+1:]) 69 70 if key == "" { 71 // don't attempt to add new keys or sections unless there is a non-empty key 72 continue 73 } 74 75 if lastKey == "" { 76 // we're entering a new section 77 sections = append(sections, make(pkg.KeyValues, 0)) 78 } 79 80 sections[currentSection()] = append(sections[currentSection()], pkg.KeyValue{ 81 Key: key, 82 Value: value, 83 }) 84 85 // keep track of key for potential future continuations 86 lastKey = key 87 } 88 89 if err := scanner.Err(); err != nil { 90 return nil, fmt.Errorf("unable to read java manifest: %w", err) 91 } 92 93 if len(sections) > 0 { 94 manifest.Main = sections[0] 95 if len(sections) > 1 { 96 manifest.Sections = sections[1:] 97 } 98 } 99 100 return &manifest, nil 101 } 102 103 func extractNameFromApacheMavenBundlePlugin(manifest *pkg.JavaManifest) string { 104 // special case: from https://svn.apache.org/repos/asf/felix/releases/maven-bundle-plugin-1.2.0/doc/maven-bundle-plugin-bnd.html 105 // "<Bundle-SymbolicName> is assumed to be "${groupId}.${artifactId}"." 106 // 107 // documentation from https://felix.apache.org/documentation/subprojects/apache-felix-maven-bundle-plugin-bnd.html 108 // agrees this is the default behavior: 109 // 110 // - [1] if artifact.getFile is not null and the jar contains a OSGi Manifest with Bundle-SymbolicName property then that value is returned 111 // 112 // - [2] if groupId has only one section (no dots) and artifact.getFile is not null then the first package name with classes 113 // is returned. eg. commons-logging:commons-logging -> org.apache.commons.logging 114 // 115 // - [3] if artifactId is equal to last section of groupId then groupId is returned. eg. org.apache.maven:maven -> org.apache.maven 116 // 117 // - [4] if artifactId starts with last section of groupId that portion is removed. eg. org.apache.maven:maven-core -> org.apache.maven.core 118 // The computed symbolic name is also stored in the $(maven-symbolicname) property in case you want to add attributes or directives to it. 119 // 120 if manifest != nil { 121 if strings.Contains(manifest.Main.MustGet("Created-By"), "Apache Maven Bundle Plugin") { 122 if symbolicName := manifest.Main.MustGet("Bundle-SymbolicName"); symbolicName != "" { 123 // It is possible that `Bundle-SymbolicName` is just the groupID (like in the case of 124 // https://repo1.maven.org/maven2/com/google/oauth-client/google-oauth-client/1.25.0/google-oauth-client-1.25.0.jar), 125 // so if `Implementation-Vendor-Id` is equal to `Bundle-SymbolicName`, bail on this logic 126 if vendorID := manifest.Main.MustGet("Implementation-Vendor-Id"); vendorID != "" && vendorID == symbolicName { 127 return "" 128 } 129 130 // the problem with this approach is that we don't have a strong indication of the artifactId 131 // not having a "." in it. However, by convention it is unlikely that an artifactId would have a ".". 132 fields := strings.Split(symbolicName, ".") 133 134 // grab the last field, this is the artifactId. Note: because of [3] we do not know if this value is 135 // correct. That is, a group id of "commons-logging" may have caused BND to swap out the reference to 136 // "org.apache.commons.logging", which means we'd interpret this as an artifact id of "logging", 137 // which is not correct. 138 // [correct] https://mvnrepository.com/artifact/commons-logging/commons-logging 139 // [still incorrect] https://mvnrepository.com/artifact/org.apache.commons.logging/org.apache.commons.logging 140 return fields[len(fields)-1] 141 } 142 } 143 } 144 145 return "" 146 } 147 148 func extractNameFromArchiveFilename(a archiveFilename) string { 149 if strings.Contains(a.name, ".") { 150 // special case: this *might* be a group id + artifact id. By convention artifact ids do not have "." in them; 151 // however, there are some specific exceptions like with the artifacts under 152 // https://repo1.maven.org/maven2/org/eclipse/platform/ 153 if strings.HasPrefix(a.name, "org.eclipse.") { 154 return a.name 155 } 156 157 // Maybe the filename is like groupid + . + artifactid. If so, return artifact id. 158 fields := strings.Split(a.name, ".") 159 maybeGroupID := true 160 for _, f := range fields { 161 if !isValidJavaIdentifier(f) { 162 maybeGroupID = false 163 break 164 } 165 } 166 if maybeGroupID { 167 return fields[len(fields)-1] 168 } 169 } 170 171 return a.name 172 } 173 174 func isValidJavaIdentifier(field string) bool { 175 runes := []rune(field) 176 if len(runes) == 0 { 177 return false 178 } 179 // check whether first rune can start an identifier name in Java 180 // Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc] 181 // see https://developer.classpath.org/doc/java/lang/Character-source.html 182 // line 3295 183 r := runes[0] 184 return unicode.Is(unicode.Lu, r) || 185 unicode.Is(unicode.Ll, r) || unicode.Is(unicode.Lt, r) || 186 unicode.Is(unicode.Lm, r) || unicode.Is(unicode.Lo, r) || 187 unicode.Is(unicode.Nl, r) || 188 unicode.Is(unicode.Sc, r) || unicode.Is(unicode.Pc, r) 189 } 190 191 func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string { 192 name := extractNameFromApacheMavenBundlePlugin(manifest) 193 if name != "" { 194 return name 195 } 196 197 // the filename tends to be the next-best reference for the package name 198 name = extractNameFromArchiveFilename(filenameObj) 199 if name != "" { 200 return name 201 } 202 203 // remaining fields in the manifest is a bit of a free-for-all depending on the build tooling used and package maintainer preferences 204 if manifest != nil { 205 switch { 206 case manifest.Main.MustGet("Name") != "": 207 // Manifest original spec... 208 return manifest.Main.MustGet("Name") 209 case manifest.Main.MustGet("Bundle-Name") != "": 210 // BND tooling... TODO: this does not seem accurate (I don't see a reference in the BND tooling docs for this) 211 return manifest.Main.MustGet("Bundle-Name") 212 case manifest.Main.MustGet("Short-Name") != "": 213 // Jenkins... 214 return manifest.Main.MustGet("Short-Name") 215 case manifest.Main.MustGet("Extension-Name") != "": 216 // Jenkins... 217 return manifest.Main.MustGet("Extension-Name") 218 case manifest.Main.MustGet("Implementation-Title") != "": 219 // last ditch effort... 220 return manifest.Main.MustGet("Implementation-Title") 221 } 222 } 223 return "" 224 } 225 226 func selectVersion(manifest *pkg.JavaManifest, filenameObj archiveFilename) string { 227 if v := filenameObj.version; v != "" { 228 return v 229 } 230 231 if manifest == nil { 232 return "" 233 } 234 235 fieldNames := []string{ 236 "Implementation-Version", 237 "Specification-Version", 238 "Plugin-Version", 239 "Bundle-Version", 240 } 241 242 for _, fieldName := range fieldNames { 243 if v := fieldValueFromManifest(*manifest, fieldName); v != "" { 244 return v 245 } 246 } 247 248 return "" 249 } 250 251 func selectLicenses(manifest *pkg.JavaManifest) []string { 252 result := []string{} 253 if manifest == nil { 254 return result 255 } 256 257 fieldNames := []string{ 258 "Bundle-License", 259 "Plugin-License-Name", 260 } 261 262 for _, fieldName := range fieldNames { 263 if v := fieldValueFromManifest(*manifest, fieldName); v != "" { 264 result = append(result, v) 265 } 266 } 267 268 return result 269 } 270 271 func fieldValueFromManifest(manifest pkg.JavaManifest, fieldName string) string { 272 if value := manifest.Main.MustGet(fieldName); value != "" { 273 return value 274 } 275 276 for _, section := range manifest.Sections { 277 if value := section.MustGet(fieldName); value != "" { 278 return value 279 } 280 } 281 282 return "" 283 }