github.com/kastenhq/syft@v0.0.0-20230821225854-0710af25cdbe/syft/pkg/cataloger/java/parse_pom_xml.go (about)

     1  package java
     2  
     3  import (
     4  	"encoding/xml"
     5  	"fmt"
     6  	"io"
     7  	"reflect"
     8  	"regexp"
     9  	"strings"
    10  
    11  	"github.com/vifraa/gopom"
    12  	"golang.org/x/net/html/charset"
    13  
    14  	"github.com/kastenhq/syft/syft/artifact"
    15  	"github.com/kastenhq/syft/syft/file"
    16  	"github.com/kastenhq/syft/syft/pkg"
    17  	"github.com/kastenhq/syft/syft/pkg/cataloger/generic"
    18  )
    19  
    20  const pomXMLGlob = "*pom.xml"
    21  
    22  var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]")
    23  
    24  func parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    25  	pom, err := decodePomXML(reader)
    26  	if err != nil {
    27  		return nil, nil, err
    28  	}
    29  
    30  	var pkgs []pkg.Package
    31  	for _, dep := range *pom.Dependencies {
    32  		p := newPackageFromPom(
    33  			pom,
    34  			dep,
    35  			reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
    36  		)
    37  		if p.Name == "" {
    38  			continue
    39  		}
    40  
    41  		pkgs = append(pkgs, p)
    42  	}
    43  
    44  	return pkgs, nil, nil
    45  }
    46  
    47  func parsePomXMLProject(path string, reader io.Reader) (*pkg.PomProject, error) {
    48  	project, err := decodePomXML(reader)
    49  	if err != nil {
    50  		return nil, err
    51  	}
    52  	return newPomProject(path, project), nil
    53  }
    54  
    55  func newPomProject(path string, p gopom.Project) *pkg.PomProject {
    56  	return &pkg.PomProject{
    57  		Path:        path,
    58  		Parent:      pomParent(p, *p.Parent),
    59  		GroupID:     resolveProperty(p, *p.GroupID),
    60  		ArtifactID:  *p.ArtifactID,
    61  		Version:     resolveProperty(p, *p.Version),
    62  		Name:        *p.Name,
    63  		Description: cleanDescription(*p.Description),
    64  		URL:         *p.URL,
    65  	}
    66  }
    67  
    68  func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...file.Location) pkg.Package {
    69  	m := pkg.JavaMetadata{
    70  		PomProperties: &pkg.PomProperties{
    71  			GroupID:    resolveProperty(pom, *dep.GroupID),
    72  			ArtifactID: resolveProperty(pom, *dep.ArtifactID),
    73  			Scope:      resolveProperty(pom, *dep.Scope),
    74  		},
    75  	}
    76  
    77  	name := dep.ArtifactID
    78  	version := resolveProperty(pom, *dep.Version)
    79  
    80  	p := pkg.Package{
    81  		Name:         *name,
    82  		Version:      version,
    83  		Locations:    file.NewLocationSet(locations...),
    84  		PURL:         packageURL(*name, version, m),
    85  		Language:     pkg.Java,
    86  		Type:         pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet?
    87  		MetadataType: pkg.JavaMetadataType,
    88  		Metadata:     m,
    89  	}
    90  
    91  	p.SetID()
    92  
    93  	return p
    94  }
    95  
    96  func decodePomXML(content io.Reader) (project gopom.Project, err error) {
    97  	decoder := xml.NewDecoder(content)
    98  	// prevent against warnings for "xml: encoding "iso-8859-1" declared but Decoder.CharsetReader is nil"
    99  	decoder.CharsetReader = charset.NewReaderLabel
   100  	if err := decoder.Decode(&project); err != nil {
   101  		return project, fmt.Errorf("unable to unmarshal pom.xml: %w", err)
   102  	}
   103  
   104  	return project, nil
   105  }
   106  
   107  func pomParent(pom gopom.Project, parent gopom.Parent) (result *pkg.PomParent) {
   108  	if *parent.ArtifactID != "" || *parent.GroupID != "" || *parent.Version != "" {
   109  		result = &pkg.PomParent{
   110  			GroupID:    resolveProperty(pom, *parent.GroupID),
   111  			ArtifactID: *parent.ArtifactID,
   112  			Version:    resolveProperty(pom, *parent.Version),
   113  		}
   114  	}
   115  	return result
   116  }
   117  
   118  func cleanDescription(original string) (cleaned string) {
   119  	descriptionLines := strings.Split(original, "\n")
   120  	for _, line := range descriptionLines {
   121  		line = strings.TrimSpace(line)
   122  		if len(line) == 0 {
   123  			continue
   124  		}
   125  		cleaned += line + " "
   126  	}
   127  	return strings.TrimSpace(cleaned)
   128  }
   129  
   130  // resolveProperty emulates some maven property resolution logic by looking in the project's variables
   131  // as well as supporting the project expressions like ${project.parent.groupId}.
   132  // If no match is found, the entire expression including ${} is returned
   133  func resolveProperty(pom gopom.Project, property string) string {
   134  	return propertyMatcher.ReplaceAllStringFunc(property, func(match string) string {
   135  		propertyName := strings.TrimSpace(match[2 : len(match)-1])
   136  		if value, ok := pom.Properties.Entries[propertyName]; ok {
   137  			return value
   138  		}
   139  		// if we don't find anything directly in the pom properties,
   140  		// see if we have a project.x expression and process this based
   141  		// on the xml tags in gopom
   142  		parts := strings.Split(propertyName, ".")
   143  		numParts := len(parts)
   144  		if numParts > 1 && strings.TrimSpace(parts[0]) == "project" {
   145  			pomValue := reflect.ValueOf(pom)
   146  			pomValueType := pomValue.Type()
   147  			for partNum := 1; partNum < numParts; partNum++ {
   148  				if pomValueType.Kind() != reflect.Struct {
   149  					break
   150  				}
   151  				part := parts[partNum]
   152  				for fieldNum := 0; fieldNum < pomValueType.NumField(); fieldNum++ {
   153  					f := pomValueType.Field(fieldNum)
   154  					if part == f.Tag.Get("xml") {
   155  						pomValue = pomValue.Field(fieldNum)
   156  						pomValueType = pomValue.Type()
   157  						if partNum == numParts-1 {
   158  							return fmt.Sprintf("%v", pomValue.Interface())
   159  						}
   160  						break
   161  					}
   162  				}
   163  			}
   164  		}
   165  		return match
   166  	})
   167  }