github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/java/parse_pom_xml.go (about)

     1  package java
     2  
     3  import (
     4  	"encoding/xml"
     5  	"fmt"
     6  	"io"
     7  	"reflect"
     8  	"regexp"
     9  	"strings"
    10  
    11  	"github.com/nextlinux/gosbom/gosbom/artifact"
    12  	"github.com/nextlinux/gosbom/gosbom/file"
    13  	"github.com/nextlinux/gosbom/gosbom/pkg"
    14  	"github.com/nextlinux/gosbom/gosbom/pkg/cataloger/generic"
    15  	"github.com/vifraa/gopom"
    16  	"golang.org/x/net/html/charset"
    17  )
    18  
    19  const pomXMLGlob = "*pom.xml"
    20  
    21  var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]")
    22  
    23  func parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    24  	pom, err := decodePomXML(reader)
    25  	if err != nil {
    26  		return nil, nil, err
    27  	}
    28  
    29  	var pkgs []pkg.Package
    30  	for _, dep := range pom.Dependencies {
    31  		p := newPackageFromPom(
    32  			pom,
    33  			dep,
    34  			reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
    35  		)
    36  		if p.Name == "" {
    37  			continue
    38  		}
    39  
    40  		pkgs = append(pkgs, p)
    41  	}
    42  
    43  	return pkgs, nil, nil
    44  }
    45  
    46  func parsePomXMLProject(path string, reader io.Reader) (*pkg.PomProject, error) {
    47  	project, err := decodePomXML(reader)
    48  	if err != nil {
    49  		return nil, err
    50  	}
    51  	return newPomProject(path, project), nil
    52  }
    53  
    54  func newPomProject(path string, p gopom.Project) *pkg.PomProject {
    55  	return &pkg.PomProject{
    56  		Path:        path,
    57  		Parent:      pomParent(p, p.Parent),
    58  		GroupID:     resolveProperty(p, p.GroupID),
    59  		ArtifactID:  p.ArtifactID,
    60  		Version:     resolveProperty(p, p.Version),
    61  		Name:        p.Name,
    62  		Description: cleanDescription(p.Description),
    63  		URL:         p.URL,
    64  	}
    65  }
    66  
    67  func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...file.Location) pkg.Package {
    68  	m := pkg.JavaMetadata{
    69  		PomProperties: &pkg.PomProperties{
    70  			GroupID:    resolveProperty(pom, dep.GroupID),
    71  			ArtifactID: resolveProperty(pom, dep.ArtifactID),
    72  			Scope:      resolveProperty(pom, dep.Scope),
    73  		},
    74  	}
    75  
    76  	name := dep.ArtifactID
    77  	version := resolveProperty(pom, dep.Version)
    78  
    79  	p := pkg.Package{
    80  		Name:         name,
    81  		Version:      version,
    82  		Locations:    file.NewLocationSet(locations...),
    83  		PURL:         packageURL(name, version, m),
    84  		Language:     pkg.Java,
    85  		Type:         pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet?
    86  		MetadataType: pkg.JavaMetadataType,
    87  		Metadata:     m,
    88  	}
    89  
    90  	p.SetID()
    91  
    92  	return p
    93  }
    94  
    95  func decodePomXML(content io.Reader) (project gopom.Project, err error) {
    96  	decoder := xml.NewDecoder(content)
    97  	// prevent against warnings for "xml: encoding "iso-8859-1" declared but Decoder.CharsetReader is nil"
    98  	decoder.CharsetReader = charset.NewReaderLabel
    99  	if err := decoder.Decode(&project); err != nil {
   100  		return project, fmt.Errorf("unable to unmarshal pom.xml: %w", err)
   101  	}
   102  
   103  	return project, nil
   104  }
   105  
   106  func pomParent(pom gopom.Project, parent gopom.Parent) (result *pkg.PomParent) {
   107  	if parent.ArtifactID != "" || parent.GroupID != "" || parent.Version != "" {
   108  		result = &pkg.PomParent{
   109  			GroupID:    resolveProperty(pom, parent.GroupID),
   110  			ArtifactID: parent.ArtifactID,
   111  			Version:    resolveProperty(pom, parent.Version),
   112  		}
   113  	}
   114  	return result
   115  }
   116  
   117  func cleanDescription(original string) (cleaned string) {
   118  	descriptionLines := strings.Split(original, "\n")
   119  	for _, line := range descriptionLines {
   120  		line = strings.TrimSpace(line)
   121  		if len(line) == 0 {
   122  			continue
   123  		}
   124  		cleaned += line + " "
   125  	}
   126  	return strings.TrimSpace(cleaned)
   127  }
   128  
   129  // resolveProperty emulates some maven property resolution logic by looking in the project's variables
   130  // as well as supporting the project expressions like ${project.parent.groupId}.
   131  // If no match is found, the entire expression including ${} is returned
   132  func resolveProperty(pom gopom.Project, property string) string {
   133  	return propertyMatcher.ReplaceAllStringFunc(property, func(match string) string {
   134  		propertyName := strings.TrimSpace(match[2 : len(match)-1])
   135  		if value, ok := pom.Properties.Entries[propertyName]; ok {
   136  			return value
   137  		}
   138  		// if we don't find anything directly in the pom properties,
   139  		// see if we have a project.x expression and process this based
   140  		// on the xml tags in gopom
   141  		parts := strings.Split(propertyName, ".")
   142  		numParts := len(parts)
   143  		if numParts > 1 && strings.TrimSpace(parts[0]) == "project" {
   144  			pomValue := reflect.ValueOf(pom)
   145  			pomValueType := pomValue.Type()
   146  			for partNum := 1; partNum < numParts; partNum++ {
   147  				if pomValueType.Kind() != reflect.Struct {
   148  					break
   149  				}
   150  				part := parts[partNum]
   151  				for fieldNum := 0; fieldNum < pomValueType.NumField(); fieldNum++ {
   152  					f := pomValueType.Field(fieldNum)
   153  					if part == f.Tag.Get("xml") {
   154  						pomValue = pomValue.Field(fieldNum)
   155  						pomValueType = pomValue.Type()
   156  						if partNum == numParts-1 {
   157  							return fmt.Sprintf("%v", pomValue.Interface())
   158  						}
   159  						break
   160  					}
   161  				}
   162  			}
   163  		}
   164  		return match
   165  	})
   166  }