github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/java/parse_pom_xml.go (about) 1 package java 2 3 import ( 4 "bytes" 5 "encoding/xml" 6 "fmt" 7 "io" 8 "reflect" 9 "regexp" 10 "strings" 11 12 "github.com/saintfish/chardet" 13 "github.com/vifraa/gopom" 14 "golang.org/x/net/html/charset" 15 16 "github.com/anchore/syft/syft/artifact" 17 "github.com/anchore/syft/syft/file" 18 "github.com/anchore/syft/syft/pkg" 19 "github.com/anchore/syft/syft/pkg/cataloger/generic" 20 "github.com/lineaje-labs/syft/internal/log" 21 ) 22 23 const pomXMLGlob = "*pom.xml" 24 25 var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]") 26 27 func parserPomXML( 28 _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser, 29 ) ([]pkg.Package, []artifact.Relationship, error) { 30 pom, err := decodePomXML(reader) 31 if err != nil { 32 return nil, nil, err 33 } 34 35 var pkgs []pkg.Package 36 if pom.Dependencies != nil { 37 for _, dep := range *pom.Dependencies { 38 p := newPackageFromPom( 39 pom, 40 dep, 41 reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 42 ) 43 if p.Name == "" { 44 continue 45 } 46 47 pkgs = append(pkgs, p) 48 } 49 } 50 51 return pkgs, nil, nil 52 } 53 54 func parsePomXMLProject(path string, reader io.Reader, location file.Location) (*parsedPomProject, error) { 55 project, err := decodePomXML(reader) 56 if err != nil { 57 return nil, err 58 } 59 return newPomProject(path, project, location), nil 60 } 61 62 func newPomProject(path string, p gopom.Project, location file.Location) *parsedPomProject { 63 artifactID := safeString(p.ArtifactID) 64 name := safeString(p.Name) 65 projectURL := safeString(p.URL) 66 67 var licenses []pkg.License 68 if p.Licenses != nil { 69 for _, license := range *p.Licenses { 70 var licenseName, licenseURL string 71 if license.Name != nil { 72 licenseName = *license.Name 73 } 74 if license.URL != nil { 75 licenseURL = *license.URL 76 } 77 78 if licenseName == "" && licenseURL == "" { 79 continue 80 } 81 82 licenses = append(licenses, pkg.NewLicenseFromFields(licenseName, licenseURL, &location)) 83 } 84 } 85 86 log.WithFields("path", path, "artifactID", artifactID, "name", name, "projectURL", projectURL).Trace("parsing pom.xml") 87 return &parsedPomProject{ 88 JavaPomProject: &pkg.JavaPomProject{ 89 Path: path, 90 Parent: pomParent(p, p.Parent), 91 GroupID: resolveProperty(p, p.GroupID, "groupId"), 92 ArtifactID: artifactID, 93 Version: resolveProperty(p, p.Version, "version"), 94 Name: name, 95 Description: cleanDescription(p.Description), 96 URL: projectURL, 97 }, 98 Licenses: licenses, 99 } 100 } 101 102 func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...file.Location) pkg.Package { 103 m := pkg.JavaArchive{ 104 PomProperties: &pkg.JavaPomProperties{ 105 GroupID: resolveProperty(pom, dep.GroupID, "groupId"), 106 ArtifactID: resolveProperty(pom, dep.ArtifactID, "artifactId"), 107 Scope: resolveProperty(pom, dep.Scope, "scope"), 108 }, 109 } 110 111 name := safeString(dep.ArtifactID) 112 version := resolveProperty(pom, dep.Version, "version") 113 114 p := pkg.Package{ 115 Name: name, 116 Version: version, 117 Locations: file.NewLocationSet(locations...), 118 PURL: packageURL(name, version, m), 119 Language: pkg.Java, 120 Type: pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet? 121 Metadata: m, 122 } 123 124 p.SetID() 125 126 return p 127 } 128 129 func decodePomXML(content io.Reader) (project gopom.Project, err error) { 130 inputReader, err := getUtf8Reader(content) 131 if err != nil { 132 return project, fmt.Errorf("unable to read pom.xml: %w", err) 133 } 134 135 decoder := xml.NewDecoder(inputReader) 136 // when an xml file has a character set declaration (e.g. '<?xml version="1.0" encoding="ISO-8859-1"?>') read that and use the correct decoder 137 decoder.CharsetReader = charset.NewReaderLabel 138 139 if err := decoder.Decode(&project); err != nil { 140 return project, fmt.Errorf("unable to unmarshal pom.xml: %w", err) 141 } 142 143 return project, nil 144 } 145 146 func getUtf8Reader(content io.Reader) (io.Reader, error) { 147 pomContents, err := io.ReadAll(content) 148 if err != nil { 149 return nil, err 150 } 151 152 detector := chardet.NewTextDetector() 153 detection, err := detector.DetectBest(pomContents) 154 155 var inputReader io.Reader 156 if err == nil && detection != nil { 157 if detection.Charset == "UTF-8" { 158 inputReader = bytes.NewReader(pomContents) 159 } else { 160 inputReader, err = charset.NewReaderLabel(detection.Charset, bytes.NewReader(pomContents)) 161 if err != nil { 162 return nil, fmt.Errorf("unable to get encoding: %w", err) 163 } 164 } 165 } else { 166 // we could not detect the encoding, but we want a valid file to read. Replace unreadable 167 // characters with the UTF-8 replacement character. 168 inputReader = strings.NewReader(strings.ToValidUTF8(string(pomContents), "�")) 169 } 170 return inputReader, nil 171 } 172 173 func pomParent(pom gopom.Project, parent *gopom.Parent) (result *pkg.JavaPomParent) { 174 if parent == nil { 175 return nil 176 } 177 178 artifactID := safeString(parent.ArtifactID) 179 result = &pkg.JavaPomParent{ 180 GroupID: resolveProperty(pom, parent.GroupID, "groupId"), 181 ArtifactID: artifactID, 182 Version: resolveProperty(pom, parent.Version, "version"), 183 } 184 185 if result.GroupID == "" && result.ArtifactID == "" && result.Version == "" { 186 return nil 187 } 188 return result 189 } 190 191 func cleanDescription(original *string) (cleaned string) { 192 if original == nil { 193 return "" 194 } 195 descriptionLines := strings.Split(*original, "\n") 196 for _, line := range descriptionLines { 197 line = strings.TrimSpace(line) 198 if len(line) == 0 { 199 continue 200 } 201 cleaned += line + " " 202 } 203 return strings.TrimSpace(cleaned) 204 } 205 206 // resolveProperty emulates some maven property resolution logic by looking in the project's variables 207 // as well as supporting the project expressions like ${project.parent.groupId}. 208 // If no match is found, the entire expression including ${} is returned 209 // 210 //nolint:gocognit 211 func resolveProperty(pom gopom.Project, property *string, propertyName string) string { 212 propertyCase := safeString(property) 213 log.WithFields("existingPropertyValue", propertyCase, "propertyName", propertyName).Trace("resolving property") 214 return propertyMatcher.ReplaceAllStringFunc(propertyCase, func(match string) string { 215 propertyName := strings.TrimSpace(match[2 : len(match)-1]) // remove leading ${ and trailing } 216 entries := pomProperties(pom) 217 if value, ok := entries[propertyName]; ok { 218 return value 219 } 220 221 // if we don't find anything directly in the pom properties, 222 // see if we have a project.x expression and process this based 223 // on the xml tags in gopom 224 parts := strings.Split(propertyName, ".") 225 numParts := len(parts) 226 if numParts > 1 && strings.TrimSpace(parts[0]) == "project" { 227 pomValue := reflect.ValueOf(pom) 228 pomValueType := pomValue.Type() 229 for partNum := 1; partNum < numParts; partNum++ { 230 if pomValueType.Kind() != reflect.Struct { 231 break 232 } 233 part := parts[partNum] 234 for fieldNum := 0; fieldNum < pomValueType.NumField(); fieldNum++ { 235 f := pomValueType.Field(fieldNum) 236 tag := f.Tag.Get("xml") 237 tag = strings.Split(tag, ",")[0] 238 // a segment of the property name matches the xml tag for the field, 239 // so we need to recurse down the nested structs or return a match 240 // if we're done. 241 if part == tag { 242 pomValue = pomValue.Field(fieldNum) 243 pomValueType = pomValue.Type() 244 if pomValueType.Kind() == reflect.Ptr { 245 // we were recursing down the nested structs, but one of the steps 246 // we need to take is a nil pointer, so give up and return the original match 247 if pomValue.IsNil() { 248 return match 249 } 250 pomValue = pomValue.Elem() 251 if !pomValue.IsZero() { 252 // we found a non-zero value whose tag matches this part of the property name 253 pomValueType = pomValue.Type() 254 } 255 } 256 // If this was the last part of the property name, return the value 257 if partNum == numParts-1 { 258 return fmt.Sprintf("%v", pomValue.Interface()) 259 } 260 break 261 } 262 } 263 } 264 } 265 return match 266 }) 267 } 268 269 func pomProperties(p gopom.Project) map[string]string { 270 if p.Properties != nil { 271 return p.Properties.Entries 272 } 273 return map[string]string{} 274 } 275 276 func safeString(s *string) string { 277 if s == nil { 278 return "" 279 } 280 return *s 281 }