github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/java/parse_java_manifest.go (about) 1 package java 2 3 import ( 4 "bufio" 5 "fmt" 6 "io" 7 "strconv" 8 "strings" 9 10 "github.com/anchore/syft/syft/pkg" 11 "github.com/lineaje-labs/syft/internal/log" 12 ) 13 14 const manifestGlob = "/META-INF/MANIFEST.MF" 15 16 // parseJavaManifest takes MANIFEST.MF file content and returns sections of parsed key/value pairs. 17 // For more information: https://docs.oracle.com/en/java/javase/11/docs/specs/jar/jar.html#jar-manifest 18 // 19 //nolint:funlen 20 func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error) { 21 var manifest pkg.JavaManifest 22 var sections []map[string]string 23 24 currentSection := func() int { 25 return len(sections) - 1 26 } 27 28 var lastKey string 29 scanner := bufio.NewScanner(reader) 30 31 for scanner.Scan() { 32 line := scanner.Text() 33 34 // empty lines denote section separators 35 if line == "" { 36 // we don't want to allocate a new section map that won't necessarily be used, do that once there is 37 // a non-empty line to process 38 39 // do not process line continuations after this 40 lastKey = "" 41 42 continue 43 } 44 45 if line[0] == ' ' { 46 // this is a continuation 47 48 if lastKey == "" { 49 log.Debugf("java manifest %q: found continuation with no previous key: %q", path, line) 50 continue 51 } 52 53 sections[currentSection()][lastKey] += strings.TrimSpace(line) 54 55 continue 56 } 57 58 // this is a new key-value pair 59 idx := strings.Index(line, ":") 60 if idx == -1 { 61 log.Debugf("java manifest %q: unable to split java manifest key-value pairs: %q", path, line) 62 continue 63 } 64 65 key := strings.TrimSpace(line[0:idx]) 66 value := strings.TrimSpace(line[idx+1:]) 67 68 if key == "" { 69 // don't attempt to add new keys or sections unless there is a non-empty key 70 continue 71 } 72 73 if lastKey == "" { 74 // we're entering a new section 75 sections = append(sections, make(map[string]string)) 76 } 77 78 sections[currentSection()][key] = value 79 80 // keep track of key for potential future continuations 81 lastKey = key 82 } 83 84 if err := scanner.Err(); err != nil { 85 return nil, fmt.Errorf("unable to read java manifest: %w", err) 86 } 87 88 if len(sections) > 0 { 89 manifest.Main = sections[0] 90 if len(sections) > 1 { 91 manifest.NamedSections = make(map[string]map[string]string) 92 for i, s := range sections[1:] { 93 name, ok := s["Name"] 94 if !ok { 95 // per the manifest spec (https://docs.oracle.com/en/java/javase/11/docs/specs/jar/jar.html#jar-manifest) 96 // this should never happen. If it does, we want to know about it, but not necessarily stop 97 // cataloging entirely... for this reason we only log. 98 log.Debugf("java manifest section found without a name: %s", path) 99 name = strconv.Itoa(i) 100 } else { 101 delete(s, "Name") 102 } 103 manifest.NamedSections[name] = s 104 } 105 } 106 } 107 108 return &manifest, nil 109 } 110 111 func extractNameFromApacheMavenBundlePlugin(manifest *pkg.JavaManifest) string { 112 // special case: from https://svn.apache.org/repos/asf/felix/releases/maven-bundle-plugin-1.2.0/doc/maven-bundle-plugin-bnd.html 113 // "<Bundle-SymbolicName> is assumed to be "${groupId}.${artifactId}"." 114 // 115 // documentation from https://felix.apache.org/documentation/subprojects/apache-felix-maven-bundle-plugin-bnd.html 116 // agrees this is the default behavior: 117 // 118 // - [1] if artifact.getFile is not null and the jar contains a OSGi Manifest with Bundle-SymbolicName property then that value is returned 119 // 120 // - [2] if groupId has only one section (no dots) and artifact.getFile is not null then the first package name with classes 121 // is returned. eg. commons-logging:commons-logging -> org.apache.commons.logging 122 // 123 // - [3] if artifactId is equal to last section of groupId then groupId is returned. eg. org.apache.maven:maven -> org.apache.maven 124 // 125 // - [4] if artifactId starts with last section of groupId that portion is removed. eg. org.apache.maven:maven-core -> org.apache.maven.core 126 // The computed symbolic name is also stored in the $(maven-symbolicname) property in case you want to add attributes or directives to it. 127 // 128 if manifest != nil { 129 if strings.Contains(manifest.Main["Created-By"], "Apache Maven Bundle Plugin") { 130 if symbolicName := manifest.Main["Bundle-SymbolicName"]; symbolicName != "" { 131 // It is possible that `Bundle-SymbolicName` is just the groupID (like in the case of 132 // https://repo1.maven.org/maven2/com/google/oauth-client/google-oauth-client/1.25.0/google-oauth-client-1.25.0.jar), 133 // so if `Implementation-Vendor-Id` is equal to `Bundle-SymbolicName`, bail on this logic 134 if vendorID := manifest.Main["Implementation-Vendor-Id"]; vendorID != "" && vendorID == symbolicName { 135 return "" 136 } 137 138 // the problem with this approach is that we don't have a strong indication of the artifactId 139 // not having a "." in it. However, by convention it is unlikely that an artifactId would have a ".". 140 fields := strings.Split(symbolicName, ".") 141 142 // grab the last field, this is the artifactId. Note: because of [3] we do not know if this value is 143 // correct. That is, a group id of "commons-logging" may have caused BND to swap out the reference to 144 // "org.apache.commons.logging", which means we'd interpret this as an artifact id of "logging", 145 // which is not correct. 146 // [correct] https://mvnrepository.com/artifact/commons-logging/commons-logging 147 // [still incorrect] https://mvnrepository.com/artifact/org.apache.commons.logging/org.apache.commons.logging 148 return fields[len(fields)-1] 149 } 150 } 151 } 152 153 return "" 154 } 155 156 func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string { 157 name := extractNameFromApacheMavenBundlePlugin(manifest) 158 if name != "" { 159 return name 160 } 161 162 // the filename tends to be the next-best reference for the package name 163 if filenameObj.name != "" { 164 if strings.Contains(filenameObj.name, ".") { 165 // special case: this *might* be a group id + artifact id. By convention artifact ids do not have "." in them. 166 fields := strings.Split(filenameObj.name, ".") 167 return fields[len(fields)-1] 168 } 169 return filenameObj.name 170 } 171 172 // remaining fields in the manifest is a bit of a free-for-all depending on the build tooling used and package maintainer preferences 173 if manifest != nil { 174 switch { 175 case manifest.Main["Name"] != "": 176 // Manifest original spec... 177 return manifest.Main["Name"] 178 case manifest.Main["Bundle-Name"] != "": 179 // BND tooling... TODO: this does not seem accurate (I don't see a reference in the BND tooling docs for this) 180 return manifest.Main["Bundle-Name"] 181 case manifest.Main["Short-Name"] != "": 182 // Jenkins... 183 return manifest.Main["Short-Name"] 184 case manifest.Main["Extension-Name"] != "": 185 // Jenkins... 186 return manifest.Main["Extension-Name"] 187 case manifest.Main["Implementation-Title"] != "": 188 // last ditch effort... 189 return manifest.Main["Implementation-Title"] 190 } 191 } 192 return "" 193 } 194 195 func selectVersion(manifest *pkg.JavaManifest, filenameObj archiveFilename) string { 196 if v := filenameObj.version; v != "" { 197 return v 198 } 199 200 if manifest == nil { 201 return "" 202 } 203 204 fieldNames := []string{ 205 "Implementation-Version", 206 "Specification-Version", 207 "Plugin-Version", 208 "Bundle-Version", 209 } 210 211 for _, fieldName := range fieldNames { 212 if v := fieldValueFromManifest(*manifest, fieldName); v != "" { 213 return v 214 } 215 } 216 217 return "" 218 } 219 220 func selectLicenses(manifest *pkg.JavaManifest) []string { 221 result := []string{} 222 if manifest == nil { 223 return result 224 } 225 226 fieldNames := []string{ 227 "Bundle-License", 228 "Plugin-License-Name", 229 } 230 231 for _, fieldName := range fieldNames { 232 if v := fieldValueFromManifest(*manifest, fieldName); v != "" { 233 result = append(result, v) 234 } 235 } 236 237 return result 238 } 239 240 func fieldValueFromManifest(manifest pkg.JavaManifest, fieldName string) string { 241 if value := manifest.Main[fieldName]; value != "" { 242 return value 243 } 244 245 for _, section := range manifest.NamedSections { 246 if value := section[fieldName]; value != "" { 247 return value 248 } 249 } 250 251 return "" 252 }