github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/java/parse_pom_xml.go (about) 1 package java 2 3 import ( 4 "encoding/xml" 5 "fmt" 6 "io" 7 "reflect" 8 "regexp" 9 "strings" 10 11 "github.com/nextlinux/gosbom/gosbom/artifact" 12 "github.com/nextlinux/gosbom/gosbom/file" 13 "github.com/nextlinux/gosbom/gosbom/pkg" 14 "github.com/nextlinux/gosbom/gosbom/pkg/cataloger/generic" 15 "github.com/vifraa/gopom" 16 "golang.org/x/net/html/charset" 17 ) 18 19 const pomXMLGlob = "*pom.xml" 20 21 var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]") 22 23 func parserPomXML(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 24 pom, err := decodePomXML(reader) 25 if err != nil { 26 return nil, nil, err 27 } 28 29 var pkgs []pkg.Package 30 for _, dep := range pom.Dependencies { 31 p := newPackageFromPom( 32 pom, 33 dep, 34 reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 35 ) 36 if p.Name == "" { 37 continue 38 } 39 40 pkgs = append(pkgs, p) 41 } 42 43 return pkgs, nil, nil 44 } 45 46 func parsePomXMLProject(path string, reader io.Reader) (*pkg.PomProject, error) { 47 project, err := decodePomXML(reader) 48 if err != nil { 49 return nil, err 50 } 51 return newPomProject(path, project), nil 52 } 53 54 func newPomProject(path string, p gopom.Project) *pkg.PomProject { 55 return &pkg.PomProject{ 56 Path: path, 57 Parent: pomParent(p, p.Parent), 58 GroupID: resolveProperty(p, p.GroupID), 59 ArtifactID: p.ArtifactID, 60 Version: resolveProperty(p, p.Version), 61 Name: p.Name, 62 Description: cleanDescription(p.Description), 63 URL: p.URL, 64 } 65 } 66 67 func newPackageFromPom(pom gopom.Project, dep gopom.Dependency, locations ...file.Location) pkg.Package { 68 m := pkg.JavaMetadata{ 69 PomProperties: &pkg.PomProperties{ 70 GroupID: resolveProperty(pom, dep.GroupID), 71 ArtifactID: resolveProperty(pom, dep.ArtifactID), 72 Scope: resolveProperty(pom, dep.Scope), 73 }, 74 } 75 76 name := dep.ArtifactID 77 version := resolveProperty(pom, dep.Version) 78 79 p := pkg.Package{ 80 Name: name, 81 Version: version, 82 Locations: file.NewLocationSet(locations...), 83 PURL: packageURL(name, version, m), 84 Language: pkg.Java, 85 Type: pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet? 86 MetadataType: pkg.JavaMetadataType, 87 Metadata: m, 88 } 89 90 p.SetID() 91 92 return p 93 } 94 95 func decodePomXML(content io.Reader) (project gopom.Project, err error) { 96 decoder := xml.NewDecoder(content) 97 // prevent against warnings for "xml: encoding "iso-8859-1" declared but Decoder.CharsetReader is nil" 98 decoder.CharsetReader = charset.NewReaderLabel 99 if err := decoder.Decode(&project); err != nil { 100 return project, fmt.Errorf("unable to unmarshal pom.xml: %w", err) 101 } 102 103 return project, nil 104 } 105 106 func pomParent(pom gopom.Project, parent gopom.Parent) (result *pkg.PomParent) { 107 if parent.ArtifactID != "" || parent.GroupID != "" || parent.Version != "" { 108 result = &pkg.PomParent{ 109 GroupID: resolveProperty(pom, parent.GroupID), 110 ArtifactID: parent.ArtifactID, 111 Version: resolveProperty(pom, parent.Version), 112 } 113 } 114 return result 115 } 116 117 func cleanDescription(original string) (cleaned string) { 118 descriptionLines := strings.Split(original, "\n") 119 for _, line := range descriptionLines { 120 line = strings.TrimSpace(line) 121 if len(line) == 0 { 122 continue 123 } 124 cleaned += line + " " 125 } 126 return strings.TrimSpace(cleaned) 127 } 128 129 // resolveProperty emulates some maven property resolution logic by looking in the project's variables 130 // as well as supporting the project expressions like ${project.parent.groupId}. 131 // If no match is found, the entire expression including ${} is returned 132 func resolveProperty(pom gopom.Project, property string) string { 133 return propertyMatcher.ReplaceAllStringFunc(property, func(match string) string { 134 propertyName := strings.TrimSpace(match[2 : len(match)-1]) 135 if value, ok := pom.Properties.Entries[propertyName]; ok { 136 return value 137 } 138 // if we don't find anything directly in the pom properties, 139 // see if we have a project.x expression and process this based 140 // on the xml tags in gopom 141 parts := strings.Split(propertyName, ".") 142 numParts := len(parts) 143 if numParts > 1 && strings.TrimSpace(parts[0]) == "project" { 144 pomValue := reflect.ValueOf(pom) 145 pomValueType := pomValue.Type() 146 for partNum := 1; partNum < numParts; partNum++ { 147 if pomValueType.Kind() != reflect.Struct { 148 break 149 } 150 part := parts[partNum] 151 for fieldNum := 0; fieldNum < pomValueType.NumField(); fieldNum++ { 152 f := pomValueType.Field(fieldNum) 153 if part == f.Tag.Get("xml") { 154 pomValue = pomValue.Field(fieldNum) 155 pomValueType = pomValue.Type() 156 if partNum == numParts-1 { 157 return fmt.Sprintf("%v", pomValue.Interface()) 158 } 159 break 160 } 161 } 162 } 163 } 164 return match 165 }) 166 }