github.com/kastenhq/syft@v0.0.0-20230821225854-0710af25cdbe/syft/pkg/cataloger/java/parse_pom_xml.go (about) 1 package java 2 3 import ( 4 "encoding/xml" 5 "fmt" 6 "io" 7 "reflect" 8 "regexp" 9 "strings" 10 11 "github.com/vifraa/gopom" 12 "golang.org/x/net/html/charset" 13 14 "github.com/kastenhq/syft/syft/artifact" 15 "github.com/kastenhq/syft/syft/file" 16 "github.com/kastenhq/syft/syft/pkg" 17 "github.com/kastenhq/syft/syft/pkg/cataloger/generic" 18 ) 19 20 const pomXMLGlob = "*pom.xml" 21 22 var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]") 23 24 func parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 25 pom, err := decodePomXML(reader) 26 if err != nil { 27 return nil, nil, err 28 } 29 30 var pkgs []pkg.Package 31 for _, dep := range *pom.Dependencies { 32 p := newPackageFromPom( 33 pom, 34 dep, 35 reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 36 ) 37 if p.Name == "" { 38 continue 39 } 40 41 pkgs = append(pkgs, p) 42 } 43 44 return pkgs, nil, nil 45 } 46 47 func parsePomXMLProject(path string, reader io.Reader) (*pkg.PomProject, error) { 48 project, err := decodePomXML(reader) 49 if err != nil { 50 return nil, err 51 } 52 return newPomProject(path, project), nil 53 } 54 55 func newPomProject(path string, p gopom.Project) *pkg.PomProject { 56 return &pkg.PomProject{ 57 Path: path, 58 Parent: pomParent(p, *p.Parent), 59 GroupID: resolveProperty(p, *p.GroupID), 60 ArtifactID: *p.ArtifactID, 61 Version: resolveProperty(p, *p.Version), 62 Name: *p.Name, 63 Description: cleanDescription(*p.Description), 64 URL: *p.URL, 65 } 66 } 67 68 func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...file.Location) pkg.Package { 69 m := pkg.JavaMetadata{ 70 PomProperties: &pkg.PomProperties{ 71 GroupID: resolveProperty(pom, *dep.GroupID), 72 ArtifactID: resolveProperty(pom, *dep.ArtifactID), 73 Scope: resolveProperty(pom, *dep.Scope), 74 }, 75 } 76 77 name := dep.ArtifactID 78 version := resolveProperty(pom, *dep.Version) 79 80 p := pkg.Package{ 81 Name: *name, 82 Version: version, 83 Locations: file.NewLocationSet(locations...), 84 PURL: packageURL(*name, version, m), 85 Language: pkg.Java, 86 Type: pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet? 87 MetadataType: pkg.JavaMetadataType, 88 Metadata: m, 89 } 90 91 p.SetID() 92 93 return p 94 } 95 96 func decodePomXML(content io.Reader) (project gopom.Project, err error) { 97 decoder := xml.NewDecoder(content) 98 // prevent against warnings for "xml: encoding "iso-8859-1" declared but Decoder.CharsetReader is nil" 99 decoder.CharsetReader = charset.NewReaderLabel 100 if err := decoder.Decode(&project); err != nil { 101 return project, fmt.Errorf("unable to unmarshal pom.xml: %w", err) 102 } 103 104 return project, nil 105 } 106 107 func pomParent(pom gopom.Project, parent gopom.Parent) (result *pkg.PomParent) { 108 if *parent.ArtifactID != "" || *parent.GroupID != "" || *parent.Version != "" { 109 result = &pkg.PomParent{ 110 GroupID: resolveProperty(pom, *parent.GroupID), 111 ArtifactID: *parent.ArtifactID, 112 Version: resolveProperty(pom, *parent.Version), 113 } 114 } 115 return result 116 } 117 118 func cleanDescription(original string) (cleaned string) { 119 descriptionLines := strings.Split(original, "\n") 120 for _, line := range descriptionLines { 121 line = strings.TrimSpace(line) 122 if len(line) == 0 { 123 continue 124 } 125 cleaned += line + " " 126 } 127 return strings.TrimSpace(cleaned) 128 } 129 130 // resolveProperty emulates some maven property resolution logic by looking in the project's variables 131 // as well as supporting the project expressions like ${project.parent.groupId}. 132 // If no match is found, the entire expression including ${} is returned 133 func resolveProperty(pom gopom.Project, property string) string { 134 return propertyMatcher.ReplaceAllStringFunc(property, func(match string) string { 135 propertyName := strings.TrimSpace(match[2 : len(match)-1]) 136 if value, ok := pom.Properties.Entries[propertyName]; ok { 137 return value 138 } 139 // if we don't find anything directly in the pom properties, 140 // see if we have a project.x expression and process this based 141 // on the xml tags in gopom 142 parts := strings.Split(propertyName, ".") 143 numParts := len(parts) 144 if numParts > 1 && strings.TrimSpace(parts[0]) == "project" { 145 pomValue := reflect.ValueOf(pom) 146 pomValueType := pomValue.Type() 147 for partNum := 1; partNum < numParts; partNum++ { 148 if pomValueType.Kind() != reflect.Struct { 149 break 150 } 151 part := parts[partNum] 152 for fieldNum := 0; fieldNum < pomValueType.NumField(); fieldNum++ { 153 f := pomValueType.Field(fieldNum) 154 if part == f.Tag.Get("xml") { 155 pomValue = pomValue.Field(fieldNum) 156 pomValueType = pomValue.Type() 157 if partNum == numParts-1 { 158 return fmt.Sprintf("%v", pomValue.Interface()) 159 } 160 break 161 } 162 } 163 } 164 } 165 return match 166 }) 167 }