github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/java/archive/filename.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package archive 16 17 import ( 18 "path/filepath" 19 "regexp" 20 "strings" 21 ) 22 23 // Regexes to determine if a string is a version 24 var ( 25 digit = regexp.MustCompile("^[0-9]") 26 buildAndDigit = regexp.MustCompile("^build[0-9]") 27 releaseAndDigit = regexp.MustCompile("^rc?[0-9]+([^a-zA-Z]|$)") 28 ) 29 30 // JarProps stores the name, version, and group ID of a Java archive. 31 type JarProps struct { 32 ArtifactID string 33 Version string 34 GroupID string 35 } 36 37 // ParseFilename attempts to figure out the package name, version, and group ID of a 38 // Java archive based on its filename. Returns nil if parsing was unsuccessful. 39 func ParseFilename(filePath string) *JarProps { 40 name, version := nameVersionFromFilename(filePath) 41 if version == "" { 42 return nil 43 } 44 groupID := "" 45 i := strings.LastIndex(name, ".") 46 if i >= 0 { 47 // Most JAR files only contain the artifact ID in the name, so the group ID 48 // cannot usually be determined strictly from the filename. However, since 49 // the format of artifact ID is arbitrarily determined by developers, 50 // sometimes they are namespaced to the group ID (e.g. for 51 // org.apache.felix.framework-1.2.3.jar the group ID is org.apache.felix). 52 // We attempt to extract such group IDs here. 53 groupID = name[:i] 54 } 55 return &JarProps{ArtifactID: name, Version: version, GroupID: groupID} 56 } 57 58 func nameVersionFromFilename(filePath string) (string, string) { 59 base := filepath.Base(filePath) 60 filename := strings.TrimSuffix(base, filepath.Ext(base)) 61 if strings.Contains(filename, "-") { 62 // Most archive names follow the convention "some-package-name-1.2.3" 63 // There might be dashes in the version too, e.g. "guava-31.1-jre" 64 for i, c := range filename { 65 if c != '-' { 66 continue 67 } 68 v := filename[i+1:] 69 if isVersion(v) { 70 return filename[:i], v 71 } 72 } 73 } 74 // Also try package_version and package.version 75 for _, sep := range []string{"_", "."} { 76 i := strings.Index(filename, sep) 77 if i == -1 { 78 continue 79 } 80 v := filename[i+1:] 81 if isVersion(v) { 82 return filename[:i], v 83 } 84 } 85 // Version could not be determined. 86 return filename, "" 87 } 88 89 func isVersion(str string) bool { 90 if digit.MatchString(str) { 91 return true 92 } 93 if buildAndDigit.MatchString(str) { 94 return true 95 } 96 return releaseAndDigit.MatchString(str) 97 }