github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/java/parse_pom_xml.go (about) 1 package java 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/xml" 7 "fmt" 8 "io" 9 "reflect" 10 "regexp" 11 "strings" 12 13 "github.com/saintfish/chardet" 14 "github.com/vifraa/gopom" 15 "golang.org/x/net/html/charset" 16 17 "github.com/anchore/syft/internal/log" 18 "github.com/anchore/syft/syft/artifact" 19 "github.com/anchore/syft/syft/file" 20 "github.com/anchore/syft/syft/pkg" 21 "github.com/anchore/syft/syft/pkg/cataloger/generic" 22 ) 23 24 const pomXMLGlob = "*pom.xml" 25 26 var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]") 27 28 func (gap genericArchiveParserAdapter) parserPomXML(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 29 pom, err := decodePomXML(reader) 30 if err != nil { 31 return nil, nil, err 32 } 33 34 var pkgs []pkg.Package 35 if pom.Dependencies != nil { 36 for _, dep := range *pom.Dependencies { 37 p := newPackageFromPom( 38 ctx, 39 pom, 40 dep, 41 gap.cfg, 42 reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 43 ) 44 if p.Name == "" { 45 continue 46 } 47 48 pkgs = append(pkgs, p) 49 } 50 } 51 52 return pkgs, nil, nil 53 } 54 55 func parsePomXMLProject(path string, reader io.Reader, location file.Location) (*parsedPomProject, error) { 56 project, err := decodePomXML(reader) 57 if err != nil { 58 return nil, err 59 } 60 return newPomProject(path, project, location), nil 61 } 62 63 func newPomProject(path string, p gopom.Project, location file.Location) *parsedPomProject { 64 artifactID := safeString(p.ArtifactID) 65 name := safeString(p.Name) 66 projectURL := safeString(p.URL) 67 68 var licenses []pkg.License 69 if p.Licenses != nil { 70 for _, license := range *p.Licenses { 71 var licenseName, licenseURL string 72 if license.Name != nil { 73 licenseName = *license.Name 74 } 75 if license.URL != nil { 76 licenseURL = *license.URL 77 } 78 79 if licenseName == "" && licenseURL == "" { 80 continue 81 } 82 83 licenses = append(licenses, pkg.NewLicenseFromFields(licenseName, licenseURL, &location)) 84 } 85 } 86 87 log.WithFields("path", path, "artifactID", artifactID, "name", name, "projectURL", projectURL).Trace("parsing pom.xml") 88 return &parsedPomProject{ 89 JavaPomProject: &pkg.JavaPomProject{ 90 Path: path, 91 Parent: pomParent(p, p.Parent), 92 GroupID: resolveProperty(p, p.GroupID, "groupId"), 93 ArtifactID: artifactID, 94 Version: resolveProperty(p, p.Version, "version"), 95 Name: name, 96 Description: cleanDescription(p.Description), 97 URL: projectURL, 98 }, 99 Licenses: licenses, 100 } 101 } 102 103 func newPackageFromPom(ctx context.Context, pom gopom.Project, dep gopom.Dependency, cfg ArchiveCatalogerConfig, locations ...file.Location) pkg.Package { 104 m := pkg.JavaArchive{ 105 PomProperties: &pkg.JavaPomProperties{ 106 GroupID: resolveProperty(pom, dep.GroupID, "groupId"), 107 ArtifactID: resolveProperty(pom, dep.ArtifactID, "artifactId"), 108 Scope: resolveProperty(pom, dep.Scope, "scope"), 109 }, 110 } 111 112 name := safeString(dep.ArtifactID) 113 version := resolveProperty(pom, dep.Version, "version") 114 115 licenses := make([]pkg.License, 0) 116 if cfg.UseNetwork { 117 if version == "" { 118 // If we have no version then let's try to get it from a parent pom DependencyManagement section 119 version = recursivelyFindVersionFromParentPom(ctx, *dep.GroupID, *dep.ArtifactID, *pom.Parent.GroupID, *pom.Parent.ArtifactID, *pom.Parent.Version, cfg) 120 } 121 if version != "" { 122 parentLicenses := recursivelyFindLicensesFromParentPom( 123 ctx, 124 m.PomProperties.GroupID, 125 m.PomProperties.ArtifactID, 126 version, 127 cfg) 128 129 if len(parentLicenses) > 0 { 130 for _, licenseName := range parentLicenses { 131 licenses = append(licenses, pkg.NewLicenseFromFields(licenseName, "", nil)) 132 } 133 } 134 } 135 } 136 137 p := pkg.Package{ 138 Name: name, 139 Version: version, 140 Locations: file.NewLocationSet(locations...), 141 Licenses: pkg.NewLicenseSet(licenses...), 142 PURL: packageURL(name, version, m), 143 Language: pkg.Java, 144 Type: pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet? 145 Metadata: m, 146 } 147 148 p.SetID() 149 150 return p 151 } 152 153 func decodePomXML(content io.Reader) (project gopom.Project, err error) { 154 inputReader, err := getUtf8Reader(content) 155 if err != nil { 156 return project, fmt.Errorf("unable to read pom.xml: %w", err) 157 } 158 159 decoder := xml.NewDecoder(inputReader) 160 // when an xml file has a character set declaration (e.g. '<?xml version="1.0" encoding="ISO-8859-1"?>') read that and use the correct decoder 161 decoder.CharsetReader = charset.NewReaderLabel 162 163 if err := decoder.Decode(&project); err != nil { 164 return project, fmt.Errorf("unable to unmarshal pom.xml: %w", err) 165 } 166 167 return project, nil 168 } 169 170 func getUtf8Reader(content io.Reader) (io.Reader, error) { 171 pomContents, err := io.ReadAll(content) 172 if err != nil { 173 return nil, err 174 } 175 176 detector := chardet.NewTextDetector() 177 detection, err := detector.DetectBest(pomContents) 178 179 var inputReader io.Reader 180 if err == nil && detection != nil { 181 if detection.Charset == "UTF-8" { 182 inputReader = bytes.NewReader(pomContents) 183 } else { 184 inputReader, err = charset.NewReaderLabel(detection.Charset, bytes.NewReader(pomContents)) 185 if err != nil { 186 return nil, fmt.Errorf("unable to get encoding: %w", err) 187 } 188 } 189 } else { 190 // we could not detect the encoding, but we want a valid file to read. Replace unreadable 191 // characters with the UTF-8 replacement character. 192 inputReader = strings.NewReader(strings.ToValidUTF8(string(pomContents), "�")) 193 } 194 return inputReader, nil 195 } 196 197 func pomParent(pom gopom.Project, parent *gopom.Parent) (result *pkg.JavaPomParent) { 198 if parent == nil { 199 return nil 200 } 201 202 artifactID := safeString(parent.ArtifactID) 203 result = &pkg.JavaPomParent{ 204 GroupID: resolveProperty(pom, parent.GroupID, "groupId"), 205 ArtifactID: artifactID, 206 Version: resolveProperty(pom, parent.Version, "version"), 207 } 208 209 if result.GroupID == "" && result.ArtifactID == "" && result.Version == "" { 210 return nil 211 } 212 return result 213 } 214 215 func cleanDescription(original *string) (cleaned string) { 216 if original == nil { 217 return "" 218 } 219 descriptionLines := strings.Split(*original, "\n") 220 for _, line := range descriptionLines { 221 line = strings.TrimSpace(line) 222 if len(line) == 0 { 223 continue 224 } 225 cleaned += line + " " 226 } 227 return strings.TrimSpace(cleaned) 228 } 229 230 // resolveProperty emulates some maven property resolution logic by looking in the project's variables 231 // as well as supporting the project expressions like ${project.parent.groupId}. 232 // If no match is found, the entire expression including ${} is returned 233 func resolveProperty(pom gopom.Project, property *string, propertyName string) string { 234 propertyCase := safeString(property) 235 log.WithFields("existingPropertyValue", propertyCase, "propertyName", propertyName).Trace("resolving property") 236 seenBeforePropertyNames := map[string]struct{}{ 237 propertyName: {}, 238 } 239 result := recursiveResolveProperty(pom, propertyCase, seenBeforePropertyNames) 240 if propertyMatcher.MatchString(result) { 241 return "" // dereferencing variable failed; fall back to empty string 242 } 243 return result 244 } 245 246 //nolint:gocognit 247 func recursiveResolveProperty(pom gopom.Project, propertyCase string, seenPropertyNames map[string]struct{}) string { 248 return propertyMatcher.ReplaceAllStringFunc(propertyCase, func(match string) string { 249 propertyName := strings.TrimSpace(match[2 : len(match)-1]) // remove leading ${ and trailing } 250 if _, seen := seenPropertyNames[propertyName]; seen { 251 return propertyCase 252 } 253 entries := pomProperties(pom) 254 if value, ok := entries[propertyName]; ok { 255 seenPropertyNames[propertyName] = struct{}{} 256 return recursiveResolveProperty(pom, value, seenPropertyNames) // recursively resolve in case a variable points to a variable. 257 } 258 259 // if we don't find anything directly in the pom properties, 260 // see if we have a project.x expression and process this based 261 // on the xml tags in gopom 262 parts := strings.Split(propertyName, ".") 263 numParts := len(parts) 264 if numParts > 1 && strings.TrimSpace(parts[0]) == "project" { 265 pomValue := reflect.ValueOf(pom) 266 pomValueType := pomValue.Type() 267 for partNum := 1; partNum < numParts; partNum++ { 268 if pomValueType.Kind() != reflect.Struct { 269 break 270 } 271 part := parts[partNum] 272 for fieldNum := 0; fieldNum < pomValueType.NumField(); fieldNum++ { 273 f := pomValueType.Field(fieldNum) 274 tag := f.Tag.Get("xml") 275 tag = strings.Split(tag, ",")[0] 276 // a segment of the property name matches the xml tag for the field, 277 // so we need to recurse down the nested structs or return a match 278 // if we're done. 279 if part == tag { 280 pomValue = pomValue.Field(fieldNum) 281 pomValueType = pomValue.Type() 282 if pomValueType.Kind() == reflect.Ptr { 283 // we were recursing down the nested structs, but one of the steps 284 // we need to take is a nil pointer, so give up and return the original match 285 if pomValue.IsNil() { 286 return match 287 } 288 pomValue = pomValue.Elem() 289 if !pomValue.IsZero() { 290 // we found a non-zero value whose tag matches this part of the property name 291 pomValueType = pomValue.Type() 292 } 293 } 294 // If this was the last part of the property name, return the value 295 if partNum == numParts-1 { 296 return fmt.Sprintf("%v", pomValue.Interface()) 297 } 298 break 299 } 300 } 301 } 302 } 303 return match 304 }) 305 } 306 307 func pomProperties(p gopom.Project) map[string]string { 308 if p.Properties != nil { 309 return p.Properties.Entries 310 } 311 return map[string]string{} 312 } 313 314 func safeString(s *string) string { 315 if s == nil { 316 return "" 317 } 318 return *s 319 }