github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/java/parse_java_manifest.go (about)

     1  package java
     2  
     3  import (
     4  	"bufio"
     5  	"fmt"
     6  	"io"
     7  	"strconv"
     8  	"strings"
     9  
    10  	"github.com/anchore/syft/syft/pkg"
    11  	"github.com/lineaje-labs/syft/internal/log"
    12  )
    13  
    14  const manifestGlob = "/META-INF/MANIFEST.MF"
    15  
    16  // parseJavaManifest takes MANIFEST.MF file content and returns sections of parsed key/value pairs.
    17  // For more information: https://docs.oracle.com/en/java/javase/11/docs/specs/jar/jar.html#jar-manifest
    18  //
    19  //nolint:funlen
    20  func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error) {
    21  	var manifest pkg.JavaManifest
    22  	var sections []map[string]string
    23  
    24  	currentSection := func() int {
    25  		return len(sections) - 1
    26  	}
    27  
    28  	var lastKey string
    29  	scanner := bufio.NewScanner(reader)
    30  
    31  	for scanner.Scan() {
    32  		line := scanner.Text()
    33  
    34  		// empty lines denote section separators
    35  		if line == "" {
    36  			// we don't want to allocate a new section map that won't necessarily be used, do that once there is
    37  			// a non-empty line to process
    38  
    39  			// do not process line continuations after this
    40  			lastKey = ""
    41  
    42  			continue
    43  		}
    44  
    45  		if line[0] == ' ' {
    46  			// this is a continuation
    47  
    48  			if lastKey == "" {
    49  				log.Debugf("java manifest %q: found continuation with no previous key: %q", path, line)
    50  				continue
    51  			}
    52  
    53  			sections[currentSection()][lastKey] += strings.TrimSpace(line)
    54  
    55  			continue
    56  		}
    57  
    58  		// this is a new key-value pair
    59  		idx := strings.Index(line, ":")
    60  		if idx == -1 {
    61  			log.Debugf("java manifest %q: unable to split java manifest key-value pairs: %q", path, line)
    62  			continue
    63  		}
    64  
    65  		key := strings.TrimSpace(line[0:idx])
    66  		value := strings.TrimSpace(line[idx+1:])
    67  
    68  		if key == "" {
    69  			// don't attempt to add new keys or sections unless there is a non-empty key
    70  			continue
    71  		}
    72  
    73  		if lastKey == "" {
    74  			// we're entering a new section
    75  			sections = append(sections, make(map[string]string))
    76  		}
    77  
    78  		sections[currentSection()][key] = value
    79  
    80  		// keep track of key for potential future continuations
    81  		lastKey = key
    82  	}
    83  
    84  	if err := scanner.Err(); err != nil {
    85  		return nil, fmt.Errorf("unable to read java manifest: %w", err)
    86  	}
    87  
    88  	if len(sections) > 0 {
    89  		manifest.Main = sections[0]
    90  		if len(sections) > 1 {
    91  			manifest.NamedSections = make(map[string]map[string]string)
    92  			for i, s := range sections[1:] {
    93  				name, ok := s["Name"]
    94  				if !ok {
    95  					// per the manifest spec (https://docs.oracle.com/en/java/javase/11/docs/specs/jar/jar.html#jar-manifest)
    96  					// this should never happen. If it does, we want to know about it, but not necessarily stop
    97  					// cataloging entirely... for this reason we only log.
    98  					log.Debugf("java manifest section found without a name: %s", path)
    99  					name = strconv.Itoa(i)
   100  				} else {
   101  					delete(s, "Name")
   102  				}
   103  				manifest.NamedSections[name] = s
   104  			}
   105  		}
   106  	}
   107  
   108  	return &manifest, nil
   109  }
   110  
   111  func extractNameFromApacheMavenBundlePlugin(manifest *pkg.JavaManifest) string {
   112  	// special case: from https://svn.apache.org/repos/asf/felix/releases/maven-bundle-plugin-1.2.0/doc/maven-bundle-plugin-bnd.html
   113  	// "<Bundle-SymbolicName> is assumed to be "${groupId}.${artifactId}"."
   114  	//
   115  	// documentation from https://felix.apache.org/documentation/subprojects/apache-felix-maven-bundle-plugin-bnd.html
   116  	// agrees this is the default behavior:
   117  	//
   118  	// - [1] if artifact.getFile is not null and the jar contains a OSGi Manifest with Bundle-SymbolicName property then that value is returned
   119  	//
   120  	// - [2] if groupId has only one section (no dots) and artifact.getFile is not null then the first package name with classes
   121  	//   is returned. eg. commons-logging:commons-logging -> org.apache.commons.logging
   122  	//
   123  	// - [3] if artifactId is equal to last section of groupId then groupId is returned. eg. org.apache.maven:maven -> org.apache.maven
   124  	//
   125  	// - [4] if artifactId starts with last section of groupId that portion is removed. eg. org.apache.maven:maven-core -> org.apache.maven.core
   126  	//   The computed symbolic name is also stored in the $(maven-symbolicname) property in case you want to add attributes or directives to it.
   127  	//
   128  	if manifest != nil {
   129  		if strings.Contains(manifest.Main["Created-By"], "Apache Maven Bundle Plugin") {
   130  			if symbolicName := manifest.Main["Bundle-SymbolicName"]; symbolicName != "" {
   131  				// It is possible that `Bundle-SymbolicName` is just the groupID (like in the case of
   132  				// https://repo1.maven.org/maven2/com/google/oauth-client/google-oauth-client/1.25.0/google-oauth-client-1.25.0.jar),
   133  				// so if `Implementation-Vendor-Id` is equal to `Bundle-SymbolicName`, bail on this logic
   134  				if vendorID := manifest.Main["Implementation-Vendor-Id"]; vendorID != "" && vendorID == symbolicName {
   135  					return ""
   136  				}
   137  
   138  				// the problem with this approach is that we don't have a strong indication of the artifactId
   139  				// not having a "." in it. However, by convention it is unlikely that an artifactId would have a ".".
   140  				fields := strings.Split(symbolicName, ".")
   141  
   142  				// grab the last field, this is the artifactId. Note: because of [3] we do not know if this value is
   143  				// correct. That is, a group id of "commons-logging" may have caused BND to swap out the reference to
   144  				// "org.apache.commons.logging", which means we'd interpret this as an artifact id of "logging",
   145  				// which is not correct.
   146  				// [correct]         https://mvnrepository.com/artifact/commons-logging/commons-logging
   147  				// [still incorrect] https://mvnrepository.com/artifact/org.apache.commons.logging/org.apache.commons.logging
   148  				return fields[len(fields)-1]
   149  			}
   150  		}
   151  	}
   152  
   153  	return ""
   154  }
   155  
   156  func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string {
   157  	name := extractNameFromApacheMavenBundlePlugin(manifest)
   158  	if name != "" {
   159  		return name
   160  	}
   161  
   162  	// the filename tends to be the next-best reference for the package name
   163  	if filenameObj.name != "" {
   164  		if strings.Contains(filenameObj.name, ".") {
   165  			// special case: this *might* be a group id + artifact id. By convention artifact ids do not have "." in them.
   166  			fields := strings.Split(filenameObj.name, ".")
   167  			return fields[len(fields)-1]
   168  		}
   169  		return filenameObj.name
   170  	}
   171  
   172  	// remaining fields in the manifest is a bit of a free-for-all depending on the build tooling used and package maintainer preferences
   173  	if manifest != nil {
   174  		switch {
   175  		case manifest.Main["Name"] != "":
   176  			// Manifest original spec...
   177  			return manifest.Main["Name"]
   178  		case manifest.Main["Bundle-Name"] != "":
   179  			// BND tooling... TODO: this does not seem accurate (I don't see a reference in the BND tooling docs for this)
   180  			return manifest.Main["Bundle-Name"]
   181  		case manifest.Main["Short-Name"] != "":
   182  			// Jenkins...
   183  			return manifest.Main["Short-Name"]
   184  		case manifest.Main["Extension-Name"] != "":
   185  			// Jenkins...
   186  			return manifest.Main["Extension-Name"]
   187  		case manifest.Main["Implementation-Title"] != "":
   188  			// last ditch effort...
   189  			return manifest.Main["Implementation-Title"]
   190  		}
   191  	}
   192  	return ""
   193  }
   194  
   195  func selectVersion(manifest *pkg.JavaManifest, filenameObj archiveFilename) string {
   196  	if v := filenameObj.version; v != "" {
   197  		return v
   198  	}
   199  
   200  	if manifest == nil {
   201  		return ""
   202  	}
   203  
   204  	fieldNames := []string{
   205  		"Implementation-Version",
   206  		"Specification-Version",
   207  		"Plugin-Version",
   208  		"Bundle-Version",
   209  	}
   210  
   211  	for _, fieldName := range fieldNames {
   212  		if v := fieldValueFromManifest(*manifest, fieldName); v != "" {
   213  			return v
   214  		}
   215  	}
   216  
   217  	return ""
   218  }
   219  
   220  func selectLicenses(manifest *pkg.JavaManifest) []string {
   221  	result := []string{}
   222  	if manifest == nil {
   223  		return result
   224  	}
   225  
   226  	fieldNames := []string{
   227  		"Bundle-License",
   228  		"Plugin-License-Name",
   229  	}
   230  
   231  	for _, fieldName := range fieldNames {
   232  		if v := fieldValueFromManifest(*manifest, fieldName); v != "" {
   233  			result = append(result, v)
   234  		}
   235  	}
   236  
   237  	return result
   238  }
   239  
   240  func fieldValueFromManifest(manifest pkg.JavaManifest, fieldName string) string {
   241  	if value := manifest.Main[fieldName]; value != "" {
   242  		return value
   243  	}
   244  
   245  	for _, section := range manifest.NamedSections {
   246  		if value := section[fieldName]; value != "" {
   247  			return value
   248  		}
   249  	}
   250  
   251  	return ""
   252  }