github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/java/archive_filename.go (about) 1 package java 2 3 import ( 4 "path/filepath" 5 "regexp" 6 "strings" 7 8 "github.com/anchore/syft/internal/log" 9 "github.com/anchore/syft/syft/pkg" 10 ) 11 12 // nameAndVersionPattern finds the package name and version (as named capture 13 // groups) in a string. The pattern's strategy is to start at the beginning of 14 // the string, and for every next dash-delimited group, consider the group to be 15 // a continuation of the package name, unless the group begins with a number or 16 // matches any of a specified set of "version-indicating" patterns. When a given 17 // group meets this criterion, consider the group and the remainder of the 18 // string to be the package version. 19 // 20 // Regex components of note: 21 // 22 // (?Ui) ... Sets the "U" and the "i" options for this Regex —— (ungreedy, 23 // and case-insensitive, respectively). "Ungreedy" is important so that the '*' that trails the package name 24 // component doesn't consume the rest of the string. 25 // 26 // [[:alpha:]][[:word:].]* ... Matches any word, and the word can include "word" characters ( 27 // which includes numbers and underscores), and periods, but the first character of the word MUST be a letter. 28 // 29 // (?:\.[[:alpha:]][[:word:].]*)* ... This looks redundant, but it's not. It 30 // extends the previous pattern such that the net effect of both components is 31 // that words can also include a period and more words (thus, when combined, not 32 // only is "something" matched, but so is "com.prefix.thing" 33 // 34 // (?:\d.*|(?:build\d*.*)|(?:rc?\d+(?:^[[:alpha:]].*)?)) ... 35 // This match group covers the "version-indicating" patterns mentioned in the above description. Given the pipes ( 36 // '|'), this functions as a series of 'OR'-joined conditions: 37 // 38 // \d.* ... "If it starts with a numeric digit, this is a version, no matter what follows." 39 // build\d*.* ... "If it starts with "build" and then a numeric digit immediately after, this is a version." 40 // rc?\d+(?:^[[:alpha:]].*)? ... "If it starts with "r" or "rc" and then one or more numeric digits immediately 41 // after, but no alpha characters right after that (in the same word), this is a version." 42 // 43 // Match examples: 44 // 45 // some-package-4.0.1 --> name="some-package", version="4.0.1" 46 // prefix.thing-4 --> name="prefix.thing", version="4" 47 // my-http2-server-5 --> name="my-http2-server", version="5" 48 // jetpack-build235-rc5 --> name="jetpack", version="build2.0-rc5" 49 // ironman-r4-2009 --> name="ironman", version="r4-2009" 50 var nameAndVersionPattern = regexp.MustCompile(`(?Ui)^(?P<name>(?:[[:alpha:]][[:word:].]*(?:\.[[:alpha:]][[:word:].]*)*-?)+)(?:-(?P<version>(\d.*|(build\d+.*)|(rc?\d+(?:^[[:alpha:]].*)?))))?$`) 51 var secondaryVersionPattern = regexp.MustCompile(`(?:[._-](?P<version>(\d.*|(build\d+.*)|(rc?\d+(?:^[[:alpha:]].*)?))))?$`) 52 53 type archiveFilename struct { 54 raw string 55 name string 56 version string 57 } 58 59 func getSubexp(matches []string, subexpName string, re *regexp.Regexp, raw string) string { 60 if len(matches) < 1 { 61 log.Warnf("unexpectedly empty matches for archive '%s'", raw) 62 return "" 63 } 64 65 index := re.SubexpIndex(subexpName) 66 if index < 1 { 67 log.Warnf("unexpected index of '%s' capture group for Java archive '%s'", subexpName, raw) 68 return "" 69 } 70 71 // Prevent out-of-range panic 72 if len(matches) < index+1 { 73 log.Warnf("no match found for '%s' in '%s'", subexpName, matches[0]) 74 return "" 75 } 76 77 return matches[index] 78 } 79 80 func newJavaArchiveFilename(raw string) archiveFilename { 81 // trim the file extension and remove any path prefixes 82 cleanedFileName := strings.TrimSuffix(filepath.Base(raw), filepath.Ext(raw)) 83 84 matches := nameAndVersionPattern.FindStringSubmatch(cleanedFileName) 85 86 name := getSubexp(matches, "name", nameAndVersionPattern, raw) 87 version := getSubexp(matches, "version", nameAndVersionPattern, raw) 88 89 // some jars get named with different conventions, like `_<version>` or `.<version>` 90 if version == "" { 91 matches = secondaryVersionPattern.FindStringSubmatch(name) 92 version = getSubexp(matches, "version", secondaryVersionPattern, raw) 93 if version != "" { 94 name = name[0 : len(name)-len(version)-1] 95 } 96 } 97 98 return archiveFilename{ 99 raw: raw, 100 name: name, 101 version: version, 102 } 103 } 104 105 func (a archiveFilename) extension() string { 106 return strings.TrimPrefix(filepath.Ext(a.raw), ".") 107 } 108 109 func (a archiveFilename) pkgType() pkg.Type { 110 switch strings.ToLower(a.extension()) { 111 case "jar", "war", "ear", "lpkg", "par", "sar", "nar": 112 return pkg.JavaPkg 113 case "jpi", "hpi": 114 return pkg.JenkinsPluginPkg 115 default: 116 return pkg.UnknownPkg 117 } 118 }