github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/java/parse_pom_xml.go (about)

     1  package java
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/xml"
     6  	"fmt"
     7  	"io"
     8  	"reflect"
     9  	"regexp"
    10  	"strings"
    11  
    12  	"github.com/saintfish/chardet"
    13  	"github.com/vifraa/gopom"
    14  	"golang.org/x/net/html/charset"
    15  
    16  	"github.com/anchore/syft/syft/artifact"
    17  	"github.com/anchore/syft/syft/file"
    18  	"github.com/anchore/syft/syft/pkg"
    19  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    20  	"github.com/lineaje-labs/syft/internal/log"
    21  )
    22  
    23  const pomXMLGlob = "*pom.xml"
    24  
    25  var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]")
    26  
    27  func parserPomXML(
    28  	_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser,
    29  ) ([]pkg.Package, []artifact.Relationship, error) {
    30  	pom, err := decodePomXML(reader)
    31  	if err != nil {
    32  		return nil, nil, err
    33  	}
    34  
    35  	var pkgs []pkg.Package
    36  	if pom.Dependencies != nil {
    37  		for _, dep := range *pom.Dependencies {
    38  			p := newPackageFromPom(
    39  				pom,
    40  				dep,
    41  				reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
    42  			)
    43  			if p.Name == "" {
    44  				continue
    45  			}
    46  
    47  			pkgs = append(pkgs, p)
    48  		}
    49  	}
    50  
    51  	return pkgs, nil, nil
    52  }
    53  
    54  func parsePomXMLProject(path string, reader io.Reader, location file.Location) (*parsedPomProject, error) {
    55  	project, err := decodePomXML(reader)
    56  	if err != nil {
    57  		return nil, err
    58  	}
    59  	return newPomProject(path, project, location), nil
    60  }
    61  
    62  func newPomProject(path string, p gopom.Project, location file.Location) *parsedPomProject {
    63  	artifactID := safeString(p.ArtifactID)
    64  	name := safeString(p.Name)
    65  	projectURL := safeString(p.URL)
    66  
    67  	var licenses []pkg.License
    68  	if p.Licenses != nil {
    69  		for _, license := range *p.Licenses {
    70  			var licenseName, licenseURL string
    71  			if license.Name != nil {
    72  				licenseName = *license.Name
    73  			}
    74  			if license.URL != nil {
    75  				licenseURL = *license.URL
    76  			}
    77  
    78  			if licenseName == "" && licenseURL == "" {
    79  				continue
    80  			}
    81  
    82  			licenses = append(licenses, pkg.NewLicenseFromFields(licenseName, licenseURL, &location))
    83  		}
    84  	}
    85  
    86  	log.WithFields("path", path, "artifactID", artifactID, "name", name, "projectURL", projectURL).Trace("parsing pom.xml")
    87  	return &parsedPomProject{
    88  		JavaPomProject: &pkg.JavaPomProject{
    89  			Path:        path,
    90  			Parent:      pomParent(p, p.Parent),
    91  			GroupID:     resolveProperty(p, p.GroupID, "groupId"),
    92  			ArtifactID:  artifactID,
    93  			Version:     resolveProperty(p, p.Version, "version"),
    94  			Name:        name,
    95  			Description: cleanDescription(p.Description),
    96  			URL:         projectURL,
    97  		},
    98  		Licenses: licenses,
    99  	}
   100  }
   101  
   102  func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...file.Location) pkg.Package {
   103  	m := pkg.JavaArchive{
   104  		PomProperties: &pkg.JavaPomProperties{
   105  			GroupID:    resolveProperty(pom, dep.GroupID, "groupId"),
   106  			ArtifactID: resolveProperty(pom, dep.ArtifactID, "artifactId"),
   107  			Scope:      resolveProperty(pom, dep.Scope, "scope"),
   108  		},
   109  	}
   110  
   111  	name := safeString(dep.ArtifactID)
   112  	version := resolveProperty(pom, dep.Version, "version")
   113  
   114  	p := pkg.Package{
   115  		Name:      name,
   116  		Version:   version,
   117  		Locations: file.NewLocationSet(locations...),
   118  		PURL:      packageURL(name, version, m),
   119  		Language:  pkg.Java,
   120  		Type:      pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet?
   121  		Metadata:  m,
   122  	}
   123  
   124  	p.SetID()
   125  
   126  	return p
   127  }
   128  
   129  func decodePomXML(content io.Reader) (project gopom.Project, err error) {
   130  	inputReader, err := getUtf8Reader(content)
   131  	if err != nil {
   132  		return project, fmt.Errorf("unable to read pom.xml: %w", err)
   133  	}
   134  
   135  	decoder := xml.NewDecoder(inputReader)
   136  	// when an xml file has a character set declaration (e.g. '<?xml version="1.0" encoding="ISO-8859-1"?>') read that and use the correct decoder
   137  	decoder.CharsetReader = charset.NewReaderLabel
   138  
   139  	if err := decoder.Decode(&project); err != nil {
   140  		return project, fmt.Errorf("unable to unmarshal pom.xml: %w", err)
   141  	}
   142  
   143  	return project, nil
   144  }
   145  
   146  func getUtf8Reader(content io.Reader) (io.Reader, error) {
   147  	pomContents, err := io.ReadAll(content)
   148  	if err != nil {
   149  		return nil, err
   150  	}
   151  
   152  	detector := chardet.NewTextDetector()
   153  	detection, err := detector.DetectBest(pomContents)
   154  
   155  	var inputReader io.Reader
   156  	if err == nil && detection != nil {
   157  		if detection.Charset == "UTF-8" {
   158  			inputReader = bytes.NewReader(pomContents)
   159  		} else {
   160  			inputReader, err = charset.NewReaderLabel(detection.Charset, bytes.NewReader(pomContents))
   161  			if err != nil {
   162  				return nil, fmt.Errorf("unable to get encoding: %w", err)
   163  			}
   164  		}
   165  	} else {
   166  		// we could not detect the encoding, but we want a valid file to read. Replace unreadable
   167  		// characters with the UTF-8 replacement character.
   168  		inputReader = strings.NewReader(strings.ToValidUTF8(string(pomContents), "�"))
   169  	}
   170  	return inputReader, nil
   171  }
   172  
   173  func pomParent(pom gopom.Project, parent *gopom.Parent) (result *pkg.JavaPomParent) {
   174  	if parent == nil {
   175  		return nil
   176  	}
   177  
   178  	artifactID := safeString(parent.ArtifactID)
   179  	result = &pkg.JavaPomParent{
   180  		GroupID:    resolveProperty(pom, parent.GroupID, "groupId"),
   181  		ArtifactID: artifactID,
   182  		Version:    resolveProperty(pom, parent.Version, "version"),
   183  	}
   184  
   185  	if result.GroupID == "" && result.ArtifactID == "" && result.Version == "" {
   186  		return nil
   187  	}
   188  	return result
   189  }
   190  
   191  func cleanDescription(original *string) (cleaned string) {
   192  	if original == nil {
   193  		return ""
   194  	}
   195  	descriptionLines := strings.Split(*original, "\n")
   196  	for _, line := range descriptionLines {
   197  		line = strings.TrimSpace(line)
   198  		if len(line) == 0 {
   199  			continue
   200  		}
   201  		cleaned += line + " "
   202  	}
   203  	return strings.TrimSpace(cleaned)
   204  }
   205  
   206  // resolveProperty emulates some maven property resolution logic by looking in the project's variables
   207  // as well as supporting the project expressions like ${project.parent.groupId}.
   208  // If no match is found, the entire expression including ${} is returned
   209  //
   210  //nolint:gocognit
   211  func resolveProperty(pom gopom.Project, property *string, propertyName string) string {
   212  	propertyCase := safeString(property)
   213  	log.WithFields("existingPropertyValue", propertyCase, "propertyName", propertyName).Trace("resolving property")
   214  	return propertyMatcher.ReplaceAllStringFunc(propertyCase, func(match string) string {
   215  		propertyName := strings.TrimSpace(match[2 : len(match)-1]) // remove leading ${ and trailing }
   216  		entries := pomProperties(pom)
   217  		if value, ok := entries[propertyName]; ok {
   218  			return value
   219  		}
   220  
   221  		// if we don't find anything directly in the pom properties,
   222  		// see if we have a project.x expression and process this based
   223  		// on the xml tags in gopom
   224  		parts := strings.Split(propertyName, ".")
   225  		numParts := len(parts)
   226  		if numParts > 1 && strings.TrimSpace(parts[0]) == "project" {
   227  			pomValue := reflect.ValueOf(pom)
   228  			pomValueType := pomValue.Type()
   229  			for partNum := 1; partNum < numParts; partNum++ {
   230  				if pomValueType.Kind() != reflect.Struct {
   231  					break
   232  				}
   233  				part := parts[partNum]
   234  				for fieldNum := 0; fieldNum < pomValueType.NumField(); fieldNum++ {
   235  					f := pomValueType.Field(fieldNum)
   236  					tag := f.Tag.Get("xml")
   237  					tag = strings.Split(tag, ",")[0]
   238  					// a segment of the property name matches the xml tag for the field,
   239  					// so we need to recurse down the nested structs or return a match
   240  					// if we're done.
   241  					if part == tag {
   242  						pomValue = pomValue.Field(fieldNum)
   243  						pomValueType = pomValue.Type()
   244  						if pomValueType.Kind() == reflect.Ptr {
   245  							// we were recursing down the nested structs, but one of the steps
   246  							// we need to take is a nil pointer, so give up and return the original match
   247  							if pomValue.IsNil() {
   248  								return match
   249  							}
   250  							pomValue = pomValue.Elem()
   251  							if !pomValue.IsZero() {
   252  								// we found a non-zero value whose tag matches this part of the property name
   253  								pomValueType = pomValue.Type()
   254  							}
   255  						}
   256  						// If this was the last part of the property name, return the value
   257  						if partNum == numParts-1 {
   258  							return fmt.Sprintf("%v", pomValue.Interface())
   259  						}
   260  						break
   261  					}
   262  				}
   263  			}
   264  		}
   265  		return match
   266  	})
   267  }
   268  
   269  func pomProperties(p gopom.Project) map[string]string {
   270  	if p.Properties != nil {
   271  		return p.Properties.Entries
   272  	}
   273  	return map[string]string{}
   274  }
   275  
   276  func safeString(s *string) string {
   277  	if s == nil {
   278  		return ""
   279  	}
   280  	return *s
   281  }