github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/java/parse_pom_xml.go (about)

     1  package java
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/xml"
     6  	"fmt"
     7  	"io"
     8  	"reflect"
     9  	"regexp"
    10  	"strings"
    11  
    12  	"github.com/saintfish/chardet"
    13  	"github.com/vifraa/gopom"
    14  	"golang.org/x/net/html/charset"
    15  
    16  	"github.com/anchore/syft/internal/log"
    17  	"github.com/anchore/syft/syft/artifact"
    18  	"github.com/anchore/syft/syft/file"
    19  	"github.com/anchore/syft/syft/pkg"
    20  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    21  )
    22  
    23  const pomXMLGlob = "*pom.xml"
    24  
    25  var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]")
    26  
    27  func parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    28  	pom, err := decodePomXML(reader)
    29  	if err != nil {
    30  		return nil, nil, err
    31  	}
    32  
    33  	var pkgs []pkg.Package
    34  	if pom.Dependencies != nil {
    35  		for _, dep := range *pom.Dependencies {
    36  			p := newPackageFromPom(
    37  				pom,
    38  				dep,
    39  				reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
    40  			)
    41  			if p.Name == "" {
    42  				continue
    43  			}
    44  
    45  			pkgs = append(pkgs, p)
    46  		}
    47  	}
    48  
    49  	return pkgs, nil, nil
    50  }
    51  
    52  func parsePomXMLProject(path string, reader io.Reader) (*pkg.PomProject, error) {
    53  	project, err := decodePomXML(reader)
    54  	if err != nil {
    55  		return nil, err
    56  	}
    57  	return newPomProject(path, project), nil
    58  }
    59  
    60  func newPomProject(path string, p gopom.Project) *pkg.PomProject {
    61  	artifactID := safeString(p.ArtifactID)
    62  	name := safeString(p.Name)
    63  	projectURL := safeString(p.URL)
    64  	log.WithFields("path", path, "artifactID", artifactID, "name", name, "projectURL", projectURL).Trace("parsing pom.xml")
    65  	return &pkg.PomProject{
    66  		Path:        path,
    67  		Parent:      pomParent(p, p.Parent),
    68  		GroupID:     resolveProperty(p, p.GroupID, "groupId"),
    69  		ArtifactID:  artifactID,
    70  		Version:     resolveProperty(p, p.Version, "version"),
    71  		Name:        name,
    72  		Description: cleanDescription(p.Description),
    73  		URL:         projectURL,
    74  	}
    75  }
    76  
    77  func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...file.Location) pkg.Package {
    78  	m := pkg.JavaMetadata{
    79  		PomProperties: &pkg.PomProperties{
    80  			GroupID:    resolveProperty(pom, dep.GroupID, "groupId"),
    81  			ArtifactID: resolveProperty(pom, dep.ArtifactID, "artifactId"),
    82  			Scope:      resolveProperty(pom, dep.Scope, "scope"),
    83  		},
    84  	}
    85  
    86  	name := safeString(dep.ArtifactID)
    87  	version := resolveProperty(pom, dep.Version, "version")
    88  
    89  	p := pkg.Package{
    90  		Name:         name,
    91  		Version:      version,
    92  		Locations:    file.NewLocationSet(locations...),
    93  		PURL:         packageURL(name, version, m),
    94  		Language:     pkg.Java,
    95  		Type:         pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet?
    96  		MetadataType: pkg.JavaMetadataType,
    97  		Metadata:     m,
    98  	}
    99  
   100  	p.SetID()
   101  
   102  	return p
   103  }
   104  
   105  func decodePomXML(content io.Reader) (project gopom.Project, err error) {
   106  	inputReader, err := getUtf8Reader(content)
   107  	if err != nil {
   108  		return project, fmt.Errorf("unable to read pom.xml: %w", err)
   109  	}
   110  
   111  	decoder := xml.NewDecoder(inputReader)
   112  	// when an xml file has a character set declaration (e.g. '<?xml version="1.0" encoding="ISO-8859-1"?>') read that and use the correct decoder
   113  	decoder.CharsetReader = charset.NewReaderLabel
   114  
   115  	if err := decoder.Decode(&project); err != nil {
   116  		return project, fmt.Errorf("unable to unmarshal pom.xml: %w", err)
   117  	}
   118  
   119  	return project, nil
   120  }
   121  
   122  func getUtf8Reader(content io.Reader) (io.Reader, error) {
   123  	pomContents, err := io.ReadAll(content)
   124  	if err != nil {
   125  		return nil, err
   126  	}
   127  
   128  	detector := chardet.NewTextDetector()
   129  	detection, err := detector.DetectBest(pomContents)
   130  
   131  	var inputReader io.Reader
   132  	if err == nil && detection != nil {
   133  		if detection.Charset == "UTF-8" {
   134  			inputReader = bytes.NewReader(pomContents)
   135  		} else {
   136  			inputReader, err = charset.NewReaderLabel(detection.Charset, bytes.NewReader(pomContents))
   137  			if err != nil {
   138  				return nil, fmt.Errorf("unable to get encoding: %w", err)
   139  			}
   140  		}
   141  	} else {
   142  		// we could not detect the encoding, but we want a valid file to read. Replace unreadable
   143  		// characters with the UTF-8 replacement character.
   144  		inputReader = strings.NewReader(strings.ToValidUTF8(string(pomContents), "�"))
   145  	}
   146  	return inputReader, nil
   147  }
   148  
   149  func pomParent(pom gopom.Project, parent *gopom.Parent) (result *pkg.PomParent) {
   150  	if parent == nil {
   151  		return nil
   152  	}
   153  
   154  	artifactID := safeString(parent.ArtifactID)
   155  	result = &pkg.PomParent{
   156  		GroupID:    resolveProperty(pom, parent.GroupID, "groupId"),
   157  		ArtifactID: artifactID,
   158  		Version:    resolveProperty(pom, parent.Version, "version"),
   159  	}
   160  
   161  	if result.GroupID == "" && result.ArtifactID == "" && result.Version == "" {
   162  		return nil
   163  	}
   164  	return result
   165  }
   166  
   167  func cleanDescription(original *string) (cleaned string) {
   168  	if original == nil {
   169  		return ""
   170  	}
   171  	descriptionLines := strings.Split(*original, "\n")
   172  	for _, line := range descriptionLines {
   173  		line = strings.TrimSpace(line)
   174  		if len(line) == 0 {
   175  			continue
   176  		}
   177  		cleaned += line + " "
   178  	}
   179  	return strings.TrimSpace(cleaned)
   180  }
   181  
   182  // resolveProperty emulates some maven property resolution logic by looking in the project's variables
   183  // as well as supporting the project expressions like ${project.parent.groupId}.
   184  // If no match is found, the entire expression including ${} is returned
   185  //
   186  //nolint:gocognit
   187  func resolveProperty(pom gopom.Project, property *string, propertyName string) string {
   188  	propertyCase := safeString(property)
   189  	log.WithFields("existingPropertyValue", propertyCase, "propertyName", propertyName).Trace("resolving property")
   190  	return propertyMatcher.ReplaceAllStringFunc(propertyCase, func(match string) string {
   191  		propertyName := strings.TrimSpace(match[2 : len(match)-1]) // remove leading ${ and trailing }
   192  		entries := pomProperties(pom)
   193  		if value, ok := entries[propertyName]; ok {
   194  			return value
   195  		}
   196  
   197  		// if we don't find anything directly in the pom properties,
   198  		// see if we have a project.x expression and process this based
   199  		// on the xml tags in gopom
   200  		parts := strings.Split(propertyName, ".")
   201  		numParts := len(parts)
   202  		if numParts > 1 && strings.TrimSpace(parts[0]) == "project" {
   203  			pomValue := reflect.ValueOf(pom)
   204  			pomValueType := pomValue.Type()
   205  			for partNum := 1; partNum < numParts; partNum++ {
   206  				if pomValueType.Kind() != reflect.Struct {
   207  					break
   208  				}
   209  				part := parts[partNum]
   210  				for fieldNum := 0; fieldNum < pomValueType.NumField(); fieldNum++ {
   211  					f := pomValueType.Field(fieldNum)
   212  					tag := f.Tag.Get("xml")
   213  					tag = strings.Split(tag, ",")[0]
   214  					// a segment of the property name matches the xml tag for the field,
   215  					// so we need to recurse down the nested structs or return a match
   216  					// if we're done.
   217  					if part == tag {
   218  						pomValue = pomValue.Field(fieldNum)
   219  						pomValueType = pomValue.Type()
   220  						if pomValueType.Kind() == reflect.Ptr {
   221  							// we were recursing down the nested structs, but one of the steps
   222  							// we need to take is a nil pointer, so give up and return the original match
   223  							if pomValue.IsNil() {
   224  								return match
   225  							}
   226  							pomValue = pomValue.Elem()
   227  							if !pomValue.IsZero() {
   228  								// we found a non-zero value whose tag matches this part of the property name
   229  								pomValueType = pomValue.Type()
   230  							}
   231  						}
   232  						// If this was the last part of the property name, return the value
   233  						if partNum == numParts-1 {
   234  							return fmt.Sprintf("%v", pomValue.Interface())
   235  						}
   236  						break
   237  					}
   238  				}
   239  			}
   240  		}
   241  		return match
   242  	})
   243  }
   244  
   245  func pomProperties(p gopom.Project) map[string]string {
   246  	if p.Properties != nil {
   247  		return p.Properties.Entries
   248  	}
   249  	return map[string]string{}
   250  }
   251  
   252  func safeString(s *string) string {
   253  	if s == nil {
   254  		return ""
   255  	}
   256  	return *s
   257  }