github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/java/parse_java_manifest.go (about)

     1  package java
     2  
     3  import (
     4  	"bufio"
     5  	"fmt"
     6  	"io"
     7  	"strings"
     8  	"unicode"
     9  
    10  	"github.com/anchore/syft/internal/log"
    11  	"github.com/anchore/syft/syft/pkg"
    12  )
    13  
    14  const manifestGlob = "/META-INF/MANIFEST.MF"
    15  
    16  // parseJavaManifest takes MANIFEST.MF file content and returns sections of parsed key/value pairs.
    17  // For more information: https://docs.oracle.com/en/java/javase/11/docs/specs/jar/jar.html#jar-manifest
    18  //
    19  //nolint:funlen
    20  func parseJavaManifest(path string, reader io.Reader) (*pkg.JavaManifest, error) {
    21  	var manifest pkg.JavaManifest
    22  	sections := make([]pkg.KeyValues, 0)
    23  
    24  	currentSection := func() int {
    25  		return len(sections) - 1
    26  	}
    27  
    28  	var lastKey string
    29  	scanner := bufio.NewScanner(reader)
    30  
    31  	for scanner.Scan() {
    32  		line := scanner.Text()
    33  
    34  		// empty lines denote section separators
    35  		if line == "" {
    36  			// we don't want to allocate a new section map that won't necessarily be used, do that once there is
    37  			// a non-empty line to process
    38  
    39  			// do not process line continuations after this
    40  			lastKey = ""
    41  
    42  			continue
    43  		}
    44  
    45  		if line[0] == ' ' {
    46  			// this is a continuation
    47  
    48  			if lastKey == "" {
    49  				log.Debugf("java manifest %q: found continuation with no previous key: %q", path, line)
    50  				continue
    51  			}
    52  
    53  			lastSection := sections[currentSection()]
    54  
    55  			sections[currentSection()][len(lastSection)-1].Value += strings.TrimSpace(line)
    56  
    57  			continue
    58  		}
    59  
    60  		// this is a new key-value pair
    61  		idx := strings.Index(line, ":")
    62  		if idx == -1 {
    63  			log.Debugf("java manifest %q: unable to split java manifest key-value pairs: %q", path, line)
    64  			continue
    65  		}
    66  
    67  		key := strings.TrimSpace(line[0:idx])
    68  		value := strings.TrimSpace(line[idx+1:])
    69  
    70  		if key == "" {
    71  			// don't attempt to add new keys or sections unless there is a non-empty key
    72  			continue
    73  		}
    74  
    75  		if lastKey == "" {
    76  			// we're entering a new section
    77  			sections = append(sections, make(pkg.KeyValues, 0))
    78  		}
    79  
    80  		sections[currentSection()] = append(sections[currentSection()], pkg.KeyValue{
    81  			Key:   key,
    82  			Value: value,
    83  		})
    84  
    85  		// keep track of key for potential future continuations
    86  		lastKey = key
    87  	}
    88  
    89  	if err := scanner.Err(); err != nil {
    90  		return nil, fmt.Errorf("unable to read java manifest: %w", err)
    91  	}
    92  
    93  	if len(sections) > 0 {
    94  		manifest.Main = sections[0]
    95  		if len(sections) > 1 {
    96  			manifest.Sections = sections[1:]
    97  		}
    98  	}
    99  
   100  	return &manifest, nil
   101  }
   102  
   103  func extractNameFromApacheMavenBundlePlugin(manifest *pkg.JavaManifest) string {
   104  	// special case: from https://svn.apache.org/repos/asf/felix/releases/maven-bundle-plugin-1.2.0/doc/maven-bundle-plugin-bnd.html
   105  	// "<Bundle-SymbolicName> is assumed to be "${groupId}.${artifactId}"."
   106  	//
   107  	// documentation from https://felix.apache.org/documentation/subprojects/apache-felix-maven-bundle-plugin-bnd.html
   108  	// agrees this is the default behavior:
   109  	//
   110  	// - [1] if artifact.getFile is not null and the jar contains a OSGi Manifest with Bundle-SymbolicName property then that value is returned
   111  	//
   112  	// - [2] if groupId has only one section (no dots) and artifact.getFile is not null then the first package name with classes
   113  	//   is returned. eg. commons-logging:commons-logging -> org.apache.commons.logging
   114  	//
   115  	// - [3] if artifactId is equal to last section of groupId then groupId is returned. eg. org.apache.maven:maven -> org.apache.maven
   116  	//
   117  	// - [4] if artifactId starts with last section of groupId that portion is removed. eg. org.apache.maven:maven-core -> org.apache.maven.core
   118  	//   The computed symbolic name is also stored in the $(maven-symbolicname) property in case you want to add attributes or directives to it.
   119  	//
   120  	if manifest != nil {
   121  		if strings.Contains(manifest.Main.MustGet("Created-By"), "Apache Maven Bundle Plugin") {
   122  			if symbolicName := manifest.Main.MustGet("Bundle-SymbolicName"); symbolicName != "" {
   123  				// It is possible that `Bundle-SymbolicName` is just the groupID (like in the case of
   124  				// https://repo1.maven.org/maven2/com/google/oauth-client/google-oauth-client/1.25.0/google-oauth-client-1.25.0.jar),
   125  				// so if `Implementation-Vendor-Id` is equal to `Bundle-SymbolicName`, bail on this logic
   126  				if vendorID := manifest.Main.MustGet("Implementation-Vendor-Id"); vendorID != "" && vendorID == symbolicName {
   127  					return ""
   128  				}
   129  
   130  				// the problem with this approach is that we don't have a strong indication of the artifactId
   131  				// not having a "." in it. However, by convention it is unlikely that an artifactId would have a ".".
   132  				fields := strings.Split(symbolicName, ".")
   133  
   134  				// grab the last field, this is the artifactId. Note: because of [3] we do not know if this value is
   135  				// correct. That is, a group id of "commons-logging" may have caused BND to swap out the reference to
   136  				// "org.apache.commons.logging", which means we'd interpret this as an artifact id of "logging",
   137  				// which is not correct.
   138  				// [correct]         https://mvnrepository.com/artifact/commons-logging/commons-logging
   139  				// [still incorrect] https://mvnrepository.com/artifact/org.apache.commons.logging/org.apache.commons.logging
   140  				return fields[len(fields)-1]
   141  			}
   142  		}
   143  	}
   144  
   145  	return ""
   146  }
   147  
   148  func extractNameFromArchiveFilename(a archiveFilename) string {
   149  	if strings.Contains(a.name, ".") {
   150  		// special case: this *might* be a group id + artifact id. By convention artifact ids do not have "." in them;
   151  		// however, there are some specific exceptions like with the artifacts under
   152  		// https://repo1.maven.org/maven2/org/eclipse/platform/
   153  		if strings.HasPrefix(a.name, "org.eclipse.") {
   154  			return a.name
   155  		}
   156  
   157  		// Maybe the filename is like groupid + . + artifactid. If so, return artifact id.
   158  		fields := strings.Split(a.name, ".")
   159  		maybeGroupID := true
   160  		for _, f := range fields {
   161  			if !isValidJavaIdentifier(f) {
   162  				maybeGroupID = false
   163  				break
   164  			}
   165  		}
   166  		if maybeGroupID {
   167  			return fields[len(fields)-1]
   168  		}
   169  	}
   170  
   171  	return a.name
   172  }
   173  
   174  func isValidJavaIdentifier(field string) bool {
   175  	runes := []rune(field)
   176  	if len(runes) == 0 {
   177  		return false
   178  	}
   179  	// check whether first rune can start an identifier name in Java
   180  	// Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
   181  	// see https://developer.classpath.org/doc/java/lang/Character-source.html
   182  	// line 3295
   183  	r := runes[0]
   184  	return unicode.Is(unicode.Lu, r) ||
   185  		unicode.Is(unicode.Ll, r) || unicode.Is(unicode.Lt, r) ||
   186  		unicode.Is(unicode.Lm, r) || unicode.Is(unicode.Lo, r) ||
   187  		unicode.Is(unicode.Nl, r) ||
   188  		unicode.Is(unicode.Sc, r) || unicode.Is(unicode.Pc, r)
   189  }
   190  
   191  func selectName(manifest *pkg.JavaManifest, filenameObj archiveFilename) string {
   192  	name := extractNameFromApacheMavenBundlePlugin(manifest)
   193  	if name != "" {
   194  		return name
   195  	}
   196  
   197  	// the filename tends to be the next-best reference for the package name
   198  	name = extractNameFromArchiveFilename(filenameObj)
   199  	if name != "" {
   200  		return name
   201  	}
   202  
   203  	// remaining fields in the manifest is a bit of a free-for-all depending on the build tooling used and package maintainer preferences
   204  	if manifest != nil {
   205  		switch {
   206  		case manifest.Main.MustGet("Name") != "":
   207  			// Manifest original spec...
   208  			return manifest.Main.MustGet("Name")
   209  		case manifest.Main.MustGet("Bundle-Name") != "":
   210  			// BND tooling... TODO: this does not seem accurate (I don't see a reference in the BND tooling docs for this)
   211  			return manifest.Main.MustGet("Bundle-Name")
   212  		case manifest.Main.MustGet("Short-Name") != "":
   213  			// Jenkins...
   214  			return manifest.Main.MustGet("Short-Name")
   215  		case manifest.Main.MustGet("Extension-Name") != "":
   216  			// Jenkins...
   217  			return manifest.Main.MustGet("Extension-Name")
   218  		case manifest.Main.MustGet("Implementation-Title") != "":
   219  			// last ditch effort...
   220  			return manifest.Main.MustGet("Implementation-Title")
   221  		}
   222  	}
   223  	return ""
   224  }
   225  
   226  func selectVersion(manifest *pkg.JavaManifest, filenameObj archiveFilename) string {
   227  	if v := filenameObj.version; v != "" {
   228  		return v
   229  	}
   230  
   231  	if manifest == nil {
   232  		return ""
   233  	}
   234  
   235  	fieldNames := []string{
   236  		"Implementation-Version",
   237  		"Specification-Version",
   238  		"Plugin-Version",
   239  		"Bundle-Version",
   240  	}
   241  
   242  	for _, fieldName := range fieldNames {
   243  		if v := fieldValueFromManifest(*manifest, fieldName); v != "" {
   244  			return v
   245  		}
   246  	}
   247  
   248  	return ""
   249  }
   250  
   251  func selectLicenses(manifest *pkg.JavaManifest) []string {
   252  	result := []string{}
   253  	if manifest == nil {
   254  		return result
   255  	}
   256  
   257  	fieldNames := []string{
   258  		"Bundle-License",
   259  		"Plugin-License-Name",
   260  	}
   261  
   262  	for _, fieldName := range fieldNames {
   263  		if v := fieldValueFromManifest(*manifest, fieldName); v != "" {
   264  			result = append(result, v)
   265  		}
   266  	}
   267  
   268  	return result
   269  }
   270  
   271  func fieldValueFromManifest(manifest pkg.JavaManifest, fieldName string) string {
   272  	if value := manifest.Main.MustGet(fieldName); value != "" {
   273  		return value
   274  	}
   275  
   276  	for _, section := range manifest.Sections {
   277  		if value := section.MustGet(fieldName); value != "" {
   278  			return value
   279  		}
   280  	}
   281  
   282  	return ""
   283  }