github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/java/archive/filename.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package archive
    16  
    17  import (
    18  	"path/filepath"
    19  	"regexp"
    20  	"strings"
    21  )
    22  
    23  // Regexes to determine if a string is a version
    24  var (
    25  	digit           = regexp.MustCompile("^[0-9]")
    26  	buildAndDigit   = regexp.MustCompile("^build[0-9]")
    27  	releaseAndDigit = regexp.MustCompile("^rc?[0-9]+([^a-zA-Z]|$)")
    28  )
    29  
    30  // JarProps stores the name, version, and group ID of a Java archive.
    31  type JarProps struct {
    32  	ArtifactID string
    33  	Version    string
    34  	GroupID    string
    35  }
    36  
    37  // ParseFilename attempts to figure out the package name, version, and group ID of a
    38  // Java archive based on its filename. Returns nil if parsing was unsuccessful.
    39  func ParseFilename(filePath string) *JarProps {
    40  	name, version := nameVersionFromFilename(filePath)
    41  	if version == "" {
    42  		return nil
    43  	}
    44  	groupID := ""
    45  	i := strings.LastIndex(name, ".")
    46  	if i >= 0 {
    47  		// Most JAR files only contain the artifact ID in the name, so the group ID
    48  		// cannot usually be determined strictly from the filename. However, since
    49  		// the format of artifact ID is arbitrarily determined by developers,
    50  		// sometimes they are namespaced to the group ID (e.g. for
    51  		// org.apache.felix.framework-1.2.3.jar the group ID is org.apache.felix).
    52  		// We attempt to extract such group IDs here.
    53  		groupID = name[:i]
    54  	}
    55  	return &JarProps{ArtifactID: name, Version: version, GroupID: groupID}
    56  }
    57  
    58  func nameVersionFromFilename(filePath string) (string, string) {
    59  	base := filepath.Base(filePath)
    60  	filename := strings.TrimSuffix(base, filepath.Ext(base))
    61  	if strings.Contains(filename, "-") {
    62  		// Most archive names follow the convention "some-package-name-1.2.3"
    63  		// There might be dashes in the version too, e.g. "guava-31.1-jre"
    64  		for i, c := range filename {
    65  			if c != '-' {
    66  				continue
    67  			}
    68  			v := filename[i+1:]
    69  			if isVersion(v) {
    70  				return filename[:i], v
    71  			}
    72  		}
    73  	}
    74  	// Also try package_version and package.version
    75  	for _, sep := range []string{"_", "."} {
    76  		i := strings.Index(filename, sep)
    77  		if i == -1 {
    78  			continue
    79  		}
    80  		v := filename[i+1:]
    81  		if isVersion(v) {
    82  			return filename[:i], v
    83  		}
    84  	}
    85  	// Version could not be determined.
    86  	return filename, ""
    87  }
    88  
    89  func isVersion(str string) bool {
    90  	if digit.MatchString(str) {
    91  		return true
    92  	}
    93  	if buildAndDigit.MatchString(str) {
    94  		return true
    95  	}
    96  	return releaseAndDigit.MatchString(str)
    97  }